1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 #if defined(PETSC_HAVE_ELEMENTAL) 23 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 24 #endif 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 28 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 29 { 30 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 31 PetscErrorCode ierr; 32 33 PetscFunctionBegin; 34 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 35 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 36 PetscFunctionReturn(0); 37 } 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 53 { \ 54 \ 55 brow = row/bs; \ 56 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 57 rmax = aimax[brow]; nrow = ailen[brow]; \ 58 bcol = col/bs; \ 59 ridx = row % bs; cidx = col % bs; \ 60 low = 0; high = nrow; \ 61 while (high-low > 3) { \ 62 t = (low+high)/2; \ 63 if (rp[t] > bcol) high = t; \ 64 else low = t; \ 65 } \ 66 for (_i=low; _i<high; _i++) { \ 67 if (rp[_i] > bcol) break; \ 68 if (rp[_i] == bcol) { \ 69 bap = ap + bs2*_i + bs*cidx + ridx; \ 70 if (addv == ADD_VALUES) *bap += value; \ 71 else *bap = value; \ 72 goto a_noinsert; \ 73 } \ 74 } \ 75 if (a->nonew == 1) goto a_noinsert; \ 76 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 77 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 78 N = nrow++ - 1; \ 79 /* shift up all the later entries in this row */ \ 80 for (ii=N; ii>=_i; ii--) { \ 81 rp[ii+1] = rp[ii]; \ 82 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 83 } \ 84 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 85 rp[_i] = bcol; \ 86 ap[bs2*_i + bs*cidx + ridx] = value; \ 87 A->nonzerostate++;\ 88 a_noinsert:; \ 89 ailen[brow] = nrow; \ 90 } 91 92 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 93 { \ 94 brow = row/bs; \ 95 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 96 rmax = bimax[brow]; nrow = bilen[brow]; \ 97 bcol = col/bs; \ 98 ridx = row % bs; cidx = col % bs; \ 99 low = 0; high = nrow; \ 100 while (high-low > 3) { \ 101 t = (low+high)/2; \ 102 if (rp[t] > bcol) high = t; \ 103 else low = t; \ 104 } \ 105 for (_i=low; _i<high; _i++) { \ 106 if (rp[_i] > bcol) break; \ 107 if (rp[_i] == bcol) { \ 108 bap = ap + bs2*_i + bs*cidx + ridx; \ 109 if (addv == ADD_VALUES) *bap += value; \ 110 else *bap = value; \ 111 goto b_noinsert; \ 112 } \ 113 } \ 114 if (b->nonew == 1) goto b_noinsert; \ 115 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 116 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 117 N = nrow++ - 1; \ 118 /* shift up all the later entries in this row */ \ 119 for (ii=N; ii>=_i; ii--) { \ 120 rp[ii+1] = rp[ii]; \ 121 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 122 } \ 123 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 124 rp[_i] = bcol; \ 125 ap[bs2*_i + bs*cidx + ridx] = value; \ 126 B->nonzerostate++;\ 127 b_noinsert:; \ 128 bilen[brow] = nrow; \ 129 } 130 131 /* Only add/insert a(i,j) with i<=j (blocks). 132 Any a(i,j) with i>j input by user is ingored. 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetValues_MPISBAIJ" 136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 137 { 138 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 139 MatScalar value; 140 PetscBool roworiented = baij->roworiented; 141 PetscErrorCode ierr; 142 PetscInt i,j,row,col; 143 PetscInt rstart_orig=mat->rmap->rstart; 144 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 145 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 146 147 /* Some Variables required in the macro */ 148 Mat A = baij->A; 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 150 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 151 MatScalar *aa =a->a; 152 153 Mat B = baij->B; 154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 155 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 156 MatScalar *ba =b->a; 157 158 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 159 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 160 MatScalar *ap,*bap; 161 162 /* for stash */ 163 PetscInt n_loc, *in_loc = NULL; 164 MatScalar *v_loc = NULL; 165 166 PetscFunctionBegin; 167 if (!baij->donotstash) { 168 if (n > baij->n_loc) { 169 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 170 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 171 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 172 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 173 174 baij->n_loc = n; 175 } 176 in_loc = baij->in_loc; 177 v_loc = baij->v_loc; 178 } 179 180 for (i=0; i<m; i++) { 181 if (im[i] < 0) continue; 182 #if defined(PETSC_USE_DEBUG) 183 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 184 #endif 185 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 186 row = im[i] - rstart_orig; /* local row index */ 187 for (j=0; j<n; j++) { 188 if (im[i]/bs > in[j]/bs) { 189 if (a->ignore_ltriangular) { 190 continue; /* ignore lower triangular blocks */ 191 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 192 } 193 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 194 col = in[j] - cstart_orig; /* local col index */ 195 brow = row/bs; bcol = col/bs; 196 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 197 if (roworiented) value = v[i*n+j]; 198 else value = v[i+j*m]; 199 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 200 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 201 } else if (in[j] < 0) continue; 202 #if defined(PETSC_USE_DEBUG) 203 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 204 #endif 205 else { /* off-diag entry (B) */ 206 if (mat->was_assembled) { 207 if (!baij->colmap) { 208 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 209 } 210 #if defined(PETSC_USE_CTABLE) 211 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 212 col = col - 1; 213 #else 214 col = baij->colmap[in[j]/bs] - 1; 215 #endif 216 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 217 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 218 col = in[j]; 219 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 220 B = baij->B; 221 b = (Mat_SeqBAIJ*)(B)->data; 222 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 223 ba = b->a; 224 } else col += in[j]%bs; 225 } else col = in[j]; 226 if (roworiented) value = v[i*n+j]; 227 else value = v[i+j*m]; 228 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } 231 } 232 } else { /* off processor entry */ 233 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 234 if (!baij->donotstash) { 235 mat->assembled = PETSC_FALSE; 236 n_loc = 0; 237 for (j=0; j<n; j++) { 238 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 239 in_loc[n_loc] = in[j]; 240 if (roworiented) { 241 v_loc[n_loc] = v[i*n+j]; 242 } else { 243 v_loc[n_loc] = v[j*m+i]; 244 } 245 n_loc++; 246 } 247 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 248 } 249 } 250 } 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 257 { 258 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 259 const MatScalar *value; 260 MatScalar *barray =baij->barray; 261 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 262 PetscErrorCode ierr; 263 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 264 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 265 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 266 267 PetscFunctionBegin; 268 if (!barray) { 269 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 270 baij->barray = barray; 271 } 272 273 if (roworiented) { 274 stepval = (n-1)*bs; 275 } else { 276 stepval = (m-1)*bs; 277 } 278 for (i=0; i<m; i++) { 279 if (im[i] < 0) continue; 280 #if defined(PETSC_USE_DEBUG) 281 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 282 #endif 283 if (im[i] >= rstart && im[i] < rend) { 284 row = im[i] - rstart; 285 for (j=0; j<n; j++) { 286 if (im[i] > in[j]) { 287 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 288 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 289 } 290 /* If NumCol = 1 then a copy is not required */ 291 if ((roworiented) && (n == 1)) { 292 barray = (MatScalar*) v + i*bs2; 293 } else if ((!roworiented) && (m == 1)) { 294 barray = (MatScalar*) v + j*bs2; 295 } else { /* Here a copy is required */ 296 if (roworiented) { 297 value = v + i*(stepval+bs)*bs + j*bs; 298 } else { 299 value = v + j*(stepval+bs)*bs + i*bs; 300 } 301 for (ii=0; ii<bs; ii++,value+=stepval) { 302 for (jj=0; jj<bs; jj++) { 303 *barray++ = *value++; 304 } 305 } 306 barray -=bs2; 307 } 308 309 if (in[j] >= cstart && in[j] < cend) { 310 col = in[j] - cstart; 311 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 312 } else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined(PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined(PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 348 if (!baij->donotstash) { 349 if (roworiented) { 350 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 351 } else { 352 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 353 } 354 } 355 } 356 } 357 PetscFunctionReturn(0); 358 } 359 360 #undef __FUNCT__ 361 #define __FUNCT__ "MatGetValues_MPISBAIJ" 362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 363 { 364 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 365 PetscErrorCode ierr; 366 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 367 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 368 369 PetscFunctionBegin; 370 for (i=0; i<m; i++) { 371 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 372 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 373 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 374 row = idxm[i] - bsrstart; 375 for (j=0; j<n; j++) { 376 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 377 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 378 if (idxn[j] >= bscstart && idxn[j] < bscend) { 379 col = idxn[j] - bscstart; 380 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 381 } else { 382 if (!baij->colmap) { 383 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 384 } 385 #if defined(PETSC_USE_CTABLE) 386 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 387 data--; 388 #else 389 data = baij->colmap[idxn[j]/bs]-1; 390 #endif 391 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 392 else { 393 col = data + idxn[j]%bs; 394 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 395 } 396 } 397 } 398 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 399 } 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatNorm_MPISBAIJ" 405 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 406 { 407 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscReal sum[2],*lnorm2; 410 411 PetscFunctionBegin; 412 if (baij->size == 1) { 413 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 414 } else { 415 if (type == NORM_FROBENIUS) { 416 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 417 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 418 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 419 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 420 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 421 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 422 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 423 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 424 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 425 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 426 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 427 PetscReal *rsum,*rsum2,vabs; 428 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 429 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 430 MatScalar *v; 431 432 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 433 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 434 /* Amat */ 435 v = amat->a; jj = amat->j; 436 for (brow=0; brow<mbs; brow++) { 437 grow = bs*(rstart + brow); 438 nz = amat->i[brow+1] - amat->i[brow]; 439 for (bcol=0; bcol<nz; bcol++) { 440 gcol = bs*(rstart + *jj); jj++; 441 for (col=0; col<bs; col++) { 442 for (row=0; row<bs; row++) { 443 vabs = PetscAbsScalar(*v); v++; 444 rsum[gcol+col] += vabs; 445 /* non-diagonal block */ 446 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 447 } 448 } 449 } 450 } 451 /* Bmat */ 452 v = bmat->a; jj = bmat->j; 453 for (brow=0; brow<mbs; brow++) { 454 grow = bs*(rstart + brow); 455 nz = bmat->i[brow+1] - bmat->i[brow]; 456 for (bcol=0; bcol<nz; bcol++) { 457 gcol = bs*garray[*jj]; jj++; 458 for (col=0; col<bs; col++) { 459 for (row=0; row<bs; row++) { 460 vabs = PetscAbsScalar(*v); v++; 461 rsum[gcol+col] += vabs; 462 rsum[grow+row] += vabs; 463 } 464 } 465 } 466 } 467 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 468 *norm = 0.0; 469 for (col=0; col<mat->cmap->N; col++) { 470 if (rsum2[col] > *norm) *norm = rsum2[col]; 471 } 472 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 473 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 474 } 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 480 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 481 { 482 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 483 PetscErrorCode ierr; 484 PetscInt nstash,reallocs; 485 InsertMode addv; 486 487 PetscFunctionBegin; 488 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 489 490 /* make sure all processors are either in INSERTMODE or ADDMODE */ 491 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 492 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 493 mat->insertmode = addv; /* in case this processor had no cache */ 494 495 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 496 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 497 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 498 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 499 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 500 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 506 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 507 { 508 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 509 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 510 PetscErrorCode ierr; 511 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 512 PetscInt *row,*col; 513 PetscBool other_disassembled; 514 PetscMPIInt n; 515 PetscBool r1,r2,r3; 516 MatScalar *val; 517 InsertMode addv = mat->insertmode; 518 519 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 520 PetscFunctionBegin; 521 if (!baij->donotstash && !mat->nooffprocentries) { 522 while (1) { 523 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 524 if (!flg) break; 525 526 for (i=0; i<n;) { 527 /* Now identify the consecutive vals belonging to the same row */ 528 for (j=i,rstart=row[j]; j<n; j++) { 529 if (row[j] != rstart) break; 530 } 531 if (j < n) ncols = j-i; 532 else ncols = n-i; 533 /* Now assemble all these values with a single function call */ 534 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 535 i = j; 536 } 537 } 538 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 539 /* Now process the block-stash. Since the values are stashed column-oriented, 540 set the roworiented flag to column oriented, and after MatSetValues() 541 restore the original flags */ 542 r1 = baij->roworiented; 543 r2 = a->roworiented; 544 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 545 546 baij->roworiented = PETSC_FALSE; 547 a->roworiented = PETSC_FALSE; 548 549 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 550 while (1) { 551 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 552 if (!flg) break; 553 554 for (i=0; i<n;) { 555 /* Now identify the consecutive vals belonging to the same row */ 556 for (j=i,rstart=row[j]; j<n; j++) { 557 if (row[j] != rstart) break; 558 } 559 if (j < n) ncols = j-i; 560 else ncols = n-i; 561 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 562 i = j; 563 } 564 } 565 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 566 567 baij->roworiented = r1; 568 a->roworiented = r2; 569 570 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 571 } 572 573 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 575 576 /* determine if any processor has disassembled, if so we must 577 also disassemble ourselfs, in order that we may reassemble. */ 578 /* 579 if nonzero structure of submatrix B cannot change then we know that 580 no processor disassembled thus we can skip this stuff 581 */ 582 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 583 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 584 if (mat->was_assembled && !other_disassembled) { 585 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 586 } 587 } 588 589 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 590 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 591 } 592 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 593 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 594 595 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 596 597 baij->rowvalues = 0; 598 599 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 600 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 601 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 602 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 608 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 609 #include <petscdraw.h> 610 #undef __FUNCT__ 611 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 612 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 613 { 614 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 615 PetscErrorCode ierr; 616 PetscInt bs = mat->rmap->bs; 617 PetscMPIInt rank = baij->rank; 618 PetscBool iascii,isdraw; 619 PetscViewer sviewer; 620 PetscViewerFormat format; 621 622 PetscFunctionBegin; 623 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 624 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 625 if (iascii) { 626 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 627 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 628 MatInfo info; 629 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 630 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 631 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 632 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 633 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 634 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 635 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 636 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 637 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 638 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 639 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 640 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } else if (format == PETSC_VIEWER_ASCII_INFO) { 643 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 646 PetscFunctionReturn(0); 647 } 648 } 649 650 if (isdraw) { 651 PetscDraw draw; 652 PetscBool isnull; 653 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 654 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 655 } 656 657 { 658 /* assemble the entire matrix onto first processor. */ 659 Mat A; 660 Mat_SeqSBAIJ *Aloc; 661 Mat_SeqBAIJ *Bloc; 662 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 663 MatScalar *a; 664 const char *matname; 665 666 /* Should this be the same type as mat? */ 667 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 668 if (!rank) { 669 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 670 } else { 671 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 672 } 673 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 674 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 675 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 676 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 677 678 /* copy over the A part */ 679 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 680 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 681 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 682 683 for (i=0; i<mbs; i++) { 684 rvals[0] = bs*(baij->rstartbs + i); 685 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 686 for (j=ai[i]; j<ai[i+1]; j++) { 687 col = (baij->cstartbs+aj[j])*bs; 688 for (k=0; k<bs; k++) { 689 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 690 col++; 691 a += bs; 692 } 693 } 694 } 695 /* copy over the B part */ 696 Bloc = (Mat_SeqBAIJ*)baij->B->data; 697 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 698 for (i=0; i<mbs; i++) { 699 700 rvals[0] = bs*(baij->rstartbs + i); 701 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 702 for (j=ai[i]; j<ai[i+1]; j++) { 703 col = baij->garray[aj[j]]*bs; 704 for (k=0; k<bs; k++) { 705 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 706 col++; 707 a += bs; 708 } 709 } 710 } 711 ierr = PetscFree(rvals);CHKERRQ(ierr); 712 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 713 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 714 /* 715 Everyone has to call to draw the matrix since the graphics waits are 716 synchronized across all processors that share the PetscDraw object 717 */ 718 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 719 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 720 if (!rank) { 721 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 722 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 723 } 724 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 725 ierr = MatDestroy(&A);CHKERRQ(ierr); 726 } 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatView_MPISBAIJ" 732 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 733 { 734 PetscErrorCode ierr; 735 PetscBool iascii,isdraw,issocket,isbinary; 736 737 PetscFunctionBegin; 738 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 739 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 740 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 741 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 742 if (iascii || isdraw || issocket || isbinary) { 743 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 744 } 745 PetscFunctionReturn(0); 746 } 747 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatDestroy_MPISBAIJ" 750 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 751 { 752 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 753 PetscErrorCode ierr; 754 755 PetscFunctionBegin; 756 #if defined(PETSC_USE_LOG) 757 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 758 #endif 759 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 760 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 761 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 762 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 763 #if defined(PETSC_USE_CTABLE) 764 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 765 #else 766 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 767 #endif 768 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 769 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 770 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 771 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 772 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 773 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 774 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 775 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 776 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 777 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 778 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 779 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 780 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 781 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 782 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 783 #if defined(PETSC_USE_REAL_MAT_SINGLE) 784 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 785 #endif 786 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 787 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 788 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 789 ierr = PetscFree(mat->data);CHKERRQ(ierr); 790 791 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 792 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 793 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 794 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 795 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 796 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 797 #if defined(PETSC_HAVE_ELEMENTAL) 798 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 799 #endif 800 PetscFunctionReturn(0); 801 } 802 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 805 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 806 { 807 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 808 PetscErrorCode ierr; 809 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 810 PetscScalar *x,*from; 811 812 PetscFunctionBegin; 813 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 814 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 815 816 /* diagonal part */ 817 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 818 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 819 820 /* subdiagonal part */ 821 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 822 823 /* copy x into the vec slvec0 */ 824 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 825 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 826 827 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 828 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 829 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 830 831 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 832 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 833 /* supperdiagonal part */ 834 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatMult_MPISBAIJ" 840 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 841 { 842 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 843 PetscErrorCode ierr; 844 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 845 PetscScalar *from; 846 const PetscScalar *x; 847 848 PetscFunctionBegin; 849 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 850 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 851 852 /* diagonal part */ 853 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 854 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 855 856 /* subdiagonal part */ 857 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 858 859 /* copy x into the vec slvec0 */ 860 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 861 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 862 863 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 864 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 865 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 866 867 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 /* supperdiagonal part */ 870 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 876 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 877 { 878 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 879 PetscErrorCode ierr; 880 PetscInt nt; 881 882 PetscFunctionBegin; 883 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 884 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 885 886 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 887 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 888 889 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 /* do diagonal part */ 891 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 892 /* do supperdiagonal part */ 893 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 895 /* do subdiagonal part */ 896 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 897 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 898 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 904 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 905 { 906 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 907 PetscErrorCode ierr; 908 PetscInt mbs=a->mbs,bs=A->rmap->bs; 909 PetscScalar *from,zero=0.0; 910 const PetscScalar *x; 911 912 PetscFunctionBegin; 913 /* 914 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 915 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 916 */ 917 /* diagonal part */ 918 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 919 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 920 921 /* subdiagonal part */ 922 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 923 924 /* copy x into the vec slvec0 */ 925 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 926 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 927 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 928 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 929 930 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 932 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 933 934 /* supperdiagonal part */ 935 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 941 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 942 { 943 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 944 PetscErrorCode ierr; 945 946 PetscFunctionBegin; 947 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 /* do diagonal part */ 949 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 950 /* do supperdiagonal part */ 951 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 952 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 953 954 /* do subdiagonal part */ 955 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 956 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 957 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 /* 962 This only works correctly for square matrices where the subblock A->A is the 963 diagonal block 964 */ 965 #undef __FUNCT__ 966 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 967 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 968 { 969 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 970 PetscErrorCode ierr; 971 972 PetscFunctionBegin; 973 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 974 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatScale_MPISBAIJ" 980 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 981 { 982 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 987 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatGetRow_MPISBAIJ" 993 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 994 { 995 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 996 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 997 PetscErrorCode ierr; 998 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 999 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1000 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1001 1002 PetscFunctionBegin; 1003 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1004 mat->getrowactive = PETSC_TRUE; 1005 1006 if (!mat->rowvalues && (idx || v)) { 1007 /* 1008 allocate enough space to hold information from the longest row. 1009 */ 1010 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1011 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1012 PetscInt max = 1,mbs = mat->mbs,tmp; 1013 for (i=0; i<mbs; i++) { 1014 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1015 if (max < tmp) max = tmp; 1016 } 1017 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1018 } 1019 1020 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1021 lrow = row - brstart; /* local row index */ 1022 1023 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1024 if (!v) {pvA = 0; pvB = 0;} 1025 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1026 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1027 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1028 nztot = nzA + nzB; 1029 1030 cmap = mat->garray; 1031 if (v || idx) { 1032 if (nztot) { 1033 /* Sort by increasing column numbers, assuming A and B already sorted */ 1034 PetscInt imark = -1; 1035 if (v) { 1036 *v = v_p = mat->rowvalues; 1037 for (i=0; i<nzB; i++) { 1038 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1039 else break; 1040 } 1041 imark = i; 1042 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1043 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1044 } 1045 if (idx) { 1046 *idx = idx_p = mat->rowindices; 1047 if (imark > -1) { 1048 for (i=0; i<imark; i++) { 1049 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1050 } 1051 } else { 1052 for (i=0; i<nzB; i++) { 1053 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1054 else break; 1055 } 1056 imark = i; 1057 } 1058 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1059 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1060 } 1061 } else { 1062 if (idx) *idx = 0; 1063 if (v) *v = 0; 1064 } 1065 } 1066 *nz = nztot; 1067 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1068 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1074 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1075 { 1076 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1077 1078 PetscFunctionBegin; 1079 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1080 baij->getrowactive = PETSC_FALSE; 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1086 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1087 { 1088 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1089 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1090 1091 PetscFunctionBegin; 1092 aA->getrow_utriangular = PETSC_TRUE; 1093 PetscFunctionReturn(0); 1094 } 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1097 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1098 { 1099 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1100 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1101 1102 PetscFunctionBegin; 1103 aA->getrow_utriangular = PETSC_FALSE; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1109 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1110 { 1111 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 1114 PetscFunctionBegin; 1115 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1116 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1122 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1123 { 1124 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1125 PetscErrorCode ierr; 1126 1127 PetscFunctionBegin; 1128 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1129 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1135 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1136 { 1137 PetscErrorCode ierr; 1138 IS iscol_local; 1139 PetscInt csize; 1140 1141 PetscFunctionBegin; 1142 MPI_Comm comm; 1143 PetscMPIInt rank; 1144 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 1145 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1146 1147 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1148 if (call == MAT_REUSE_MATRIX) { 1149 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1150 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1151 } else { 1152 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1153 1154 { /* check if isrow == iscol_local Note: isrow is distributed, but iscol_local is locally owned! */ 1155 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1156 const PetscInt *ptr1,*ptr2; 1157 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1158 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1159 if (sz1 > sz2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1160 1161 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1162 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1163 1164 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1165 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1166 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1167 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1168 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1169 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1170 1171 nmatch=0; 1172 k = 0; 1173 for (i=0; i<sz1; i++){ 1174 for (j=k; j<sz2; j++){ 1175 if (a1[i] == a2[j]) { 1176 k = j; nmatch++; 1177 if (rank == 1) printf("match %d\n",a2[j]); 1178 break; 1179 } 1180 } 1181 } 1182 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1183 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1184 ierr = PetscFree(a1);CHKERRQ(ierr); 1185 ierr = PetscFree(a2);CHKERRQ(ierr); 1186 if (nmatch < sz1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1187 } 1188 } 1189 1190 /* now call MatGetSubMatrix_MPIBAIJ() */ 1191 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); //csize: global iscol size 1192 if (call == MAT_INITIAL_MATRIX) { 1193 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1194 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1195 } 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1201 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1202 { 1203 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1208 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1214 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1215 { 1216 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1217 Mat A = a->A,B = a->B; 1218 PetscErrorCode ierr; 1219 PetscReal isend[5],irecv[5]; 1220 1221 PetscFunctionBegin; 1222 info->block_size = (PetscReal)matin->rmap->bs; 1223 1224 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1225 1226 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1227 isend[3] = info->memory; isend[4] = info->mallocs; 1228 1229 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1230 1231 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1232 isend[3] += info->memory; isend[4] += info->mallocs; 1233 if (flag == MAT_LOCAL) { 1234 info->nz_used = isend[0]; 1235 info->nz_allocated = isend[1]; 1236 info->nz_unneeded = isend[2]; 1237 info->memory = isend[3]; 1238 info->mallocs = isend[4]; 1239 } else if (flag == MAT_GLOBAL_MAX) { 1240 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1241 1242 info->nz_used = irecv[0]; 1243 info->nz_allocated = irecv[1]; 1244 info->nz_unneeded = irecv[2]; 1245 info->memory = irecv[3]; 1246 info->mallocs = irecv[4]; 1247 } else if (flag == MAT_GLOBAL_SUM) { 1248 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1249 1250 info->nz_used = irecv[0]; 1251 info->nz_allocated = irecv[1]; 1252 info->nz_unneeded = irecv[2]; 1253 info->memory = irecv[3]; 1254 info->mallocs = irecv[4]; 1255 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1256 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1257 info->fill_ratio_needed = 0; 1258 info->factor_mallocs = 0; 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1264 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1265 { 1266 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1267 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1268 PetscErrorCode ierr; 1269 1270 PetscFunctionBegin; 1271 switch (op) { 1272 case MAT_NEW_NONZERO_LOCATIONS: 1273 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1274 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1275 case MAT_KEEP_NONZERO_PATTERN: 1276 case MAT_NEW_NONZERO_LOCATION_ERR: 1277 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1278 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1279 break; 1280 case MAT_ROW_ORIENTED: 1281 a->roworiented = flg; 1282 1283 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1284 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1285 break; 1286 case MAT_NEW_DIAGONALS: 1287 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1288 break; 1289 case MAT_IGNORE_OFF_PROC_ENTRIES: 1290 a->donotstash = flg; 1291 break; 1292 case MAT_USE_HASH_TABLE: 1293 a->ht_flag = flg; 1294 break; 1295 case MAT_HERMITIAN: 1296 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1297 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1298 1299 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1300 break; 1301 case MAT_SPD: 1302 A->spd_set = PETSC_TRUE; 1303 A->spd = flg; 1304 if (flg) { 1305 A->symmetric = PETSC_TRUE; 1306 A->structurally_symmetric = PETSC_TRUE; 1307 A->symmetric_set = PETSC_TRUE; 1308 A->structurally_symmetric_set = PETSC_TRUE; 1309 } 1310 break; 1311 case MAT_SYMMETRIC: 1312 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1313 break; 1314 case MAT_STRUCTURALLY_SYMMETRIC: 1315 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1316 break; 1317 case MAT_SYMMETRY_ETERNAL: 1318 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1319 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1320 break; 1321 case MAT_IGNORE_LOWER_TRIANGULAR: 1322 aA->ignore_ltriangular = flg; 1323 break; 1324 case MAT_ERROR_LOWER_TRIANGULAR: 1325 aA->ignore_ltriangular = flg; 1326 break; 1327 case MAT_GETROW_UPPERTRIANGULAR: 1328 aA->getrow_utriangular = flg; 1329 break; 1330 default: 1331 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1332 } 1333 PetscFunctionReturn(0); 1334 } 1335 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1338 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1339 { 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 if (MAT_INITIAL_MATRIX || *B != A) { 1344 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1345 } 1346 PetscFunctionReturn(0); 1347 } 1348 1349 #undef __FUNCT__ 1350 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1351 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1352 { 1353 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1354 Mat a = baij->A, b=baij->B; 1355 PetscErrorCode ierr; 1356 PetscInt nv,m,n; 1357 PetscBool flg; 1358 1359 PetscFunctionBegin; 1360 if (ll != rr) { 1361 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1362 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1363 } 1364 if (!ll) PetscFunctionReturn(0); 1365 1366 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1367 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1368 1369 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1370 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1371 1372 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1373 1374 /* left diagonalscale the off-diagonal part */ 1375 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1376 1377 /* scale the diagonal part */ 1378 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1379 1380 /* right diagonalscale the off-diagonal part */ 1381 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1382 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1383 PetscFunctionReturn(0); 1384 } 1385 1386 #undef __FUNCT__ 1387 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1388 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1389 { 1390 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1391 PetscErrorCode ierr; 1392 1393 PetscFunctionBegin; 1394 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1395 PetscFunctionReturn(0); 1396 } 1397 1398 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1399 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatEqual_MPISBAIJ" 1402 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1403 { 1404 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1405 Mat a,b,c,d; 1406 PetscBool flg; 1407 PetscErrorCode ierr; 1408 1409 PetscFunctionBegin; 1410 a = matA->A; b = matA->B; 1411 c = matB->A; d = matB->B; 1412 1413 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1414 if (flg) { 1415 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1416 } 1417 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatCopy_MPISBAIJ" 1423 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1424 { 1425 PetscErrorCode ierr; 1426 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1427 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1428 1429 PetscFunctionBegin; 1430 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1431 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1432 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1433 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1434 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1435 } else { 1436 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1437 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1438 } 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1444 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1445 { 1446 PetscErrorCode ierr; 1447 1448 PetscFunctionBegin; 1449 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1450 PetscFunctionReturn(0); 1451 } 1452 1453 #undef __FUNCT__ 1454 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1455 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1456 { 1457 PetscErrorCode ierr; 1458 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1459 PetscBLASInt bnz,one=1; 1460 Mat_SeqSBAIJ *xa,*ya; 1461 Mat_SeqBAIJ *xb,*yb; 1462 1463 PetscFunctionBegin; 1464 if (str == SAME_NONZERO_PATTERN) { 1465 PetscScalar alpha = a; 1466 xa = (Mat_SeqSBAIJ*)xx->A->data; 1467 ya = (Mat_SeqSBAIJ*)yy->A->data; 1468 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1469 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1470 xb = (Mat_SeqBAIJ*)xx->B->data; 1471 yb = (Mat_SeqBAIJ*)yy->B->data; 1472 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1473 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1474 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1475 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1476 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1477 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1478 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1479 } else { 1480 Mat B; 1481 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1482 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1483 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1484 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1485 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1486 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1487 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1488 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1489 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1490 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1491 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1492 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1493 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1494 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1495 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1496 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1497 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1498 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1499 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1500 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1507 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1508 { 1509 PetscErrorCode ierr; 1510 PetscInt i; 1511 PetscBool flg; 1512 1513 PetscFunctionBegin; 1514 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1515 for (i=0; i<n; i++) { 1516 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1517 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1518 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1519 } 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 /* -------------------------------------------------------------------*/ 1525 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1526 MatGetRow_MPISBAIJ, 1527 MatRestoreRow_MPISBAIJ, 1528 MatMult_MPISBAIJ, 1529 /* 4*/ MatMultAdd_MPISBAIJ, 1530 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1531 MatMultAdd_MPISBAIJ, 1532 0, 1533 0, 1534 0, 1535 /* 10*/ 0, 1536 0, 1537 0, 1538 MatSOR_MPISBAIJ, 1539 MatTranspose_MPISBAIJ, 1540 /* 15*/ MatGetInfo_MPISBAIJ, 1541 MatEqual_MPISBAIJ, 1542 MatGetDiagonal_MPISBAIJ, 1543 MatDiagonalScale_MPISBAIJ, 1544 MatNorm_MPISBAIJ, 1545 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1546 MatAssemblyEnd_MPISBAIJ, 1547 MatSetOption_MPISBAIJ, 1548 MatZeroEntries_MPISBAIJ, 1549 /* 24*/ 0, 1550 0, 1551 0, 1552 0, 1553 0, 1554 /* 29*/ MatSetUp_MPISBAIJ, 1555 0, 1556 0, 1557 0, 1558 0, 1559 /* 34*/ MatDuplicate_MPISBAIJ, 1560 0, 1561 0, 1562 0, 1563 0, 1564 /* 39*/ MatAXPY_MPISBAIJ, 1565 MatGetSubMatrices_MPISBAIJ, 1566 MatIncreaseOverlap_MPISBAIJ, 1567 MatGetValues_MPISBAIJ, 1568 MatCopy_MPISBAIJ, 1569 /* 44*/ 0, 1570 MatScale_MPISBAIJ, 1571 0, 1572 0, 1573 0, 1574 /* 49*/ 0, 1575 0, 1576 0, 1577 0, 1578 0, 1579 /* 54*/ 0, 1580 0, 1581 MatSetUnfactored_MPISBAIJ, 1582 0, 1583 MatSetValuesBlocked_MPISBAIJ, 1584 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1585 0, 1586 0, 1587 0, 1588 0, 1589 /* 64*/ 0, 1590 0, 1591 0, 1592 0, 1593 0, 1594 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1595 0, 1596 0, 1597 0, 1598 0, 1599 /* 74*/ 0, 1600 0, 1601 0, 1602 0, 1603 0, 1604 /* 79*/ 0, 1605 0, 1606 0, 1607 0, 1608 MatLoad_MPISBAIJ, 1609 /* 84*/ 0, 1610 0, 1611 0, 1612 0, 1613 0, 1614 /* 89*/ 0, 1615 0, 1616 0, 1617 0, 1618 0, 1619 /* 94*/ 0, 1620 0, 1621 0, 1622 0, 1623 0, 1624 /* 99*/ 0, 1625 0, 1626 0, 1627 0, 1628 0, 1629 /*104*/ 0, 1630 MatRealPart_MPISBAIJ, 1631 MatImaginaryPart_MPISBAIJ, 1632 MatGetRowUpperTriangular_MPISBAIJ, 1633 MatRestoreRowUpperTriangular_MPISBAIJ, 1634 /*109*/ 0, 1635 0, 1636 0, 1637 0, 1638 0, 1639 /*114*/ 0, 1640 0, 1641 0, 1642 0, 1643 0, 1644 /*119*/ 0, 1645 0, 1646 0, 1647 0, 1648 0, 1649 /*124*/ 0, 1650 0, 1651 0, 1652 0, 1653 0, 1654 /*129*/ 0, 1655 0, 1656 0, 1657 0, 1658 0, 1659 /*134*/ 0, 1660 0, 1661 0, 1662 0, 1663 0, 1664 /*139*/ 0, 1665 0, 1666 0, 1667 0, 1668 0, 1669 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1670 }; 1671 1672 #undef __FUNCT__ 1673 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1674 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1675 { 1676 PetscFunctionBegin; 1677 *a = ((Mat_MPISBAIJ*)A->data)->A; 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1683 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1684 { 1685 Mat_MPISBAIJ *b; 1686 PetscErrorCode ierr; 1687 PetscInt i,mbs,Mbs; 1688 1689 PetscFunctionBegin; 1690 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1691 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1692 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1693 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1694 1695 b = (Mat_MPISBAIJ*)B->data; 1696 mbs = B->rmap->n/bs; 1697 Mbs = B->rmap->N/bs; 1698 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1699 1700 B->rmap->bs = bs; 1701 b->bs2 = bs*bs; 1702 b->mbs = mbs; 1703 b->Mbs = Mbs; 1704 b->nbs = B->cmap->n/bs; 1705 b->Nbs = B->cmap->N/bs; 1706 1707 for (i=0; i<=b->size; i++) { 1708 b->rangebs[i] = B->rmap->range[i]/bs; 1709 } 1710 b->rstartbs = B->rmap->rstart/bs; 1711 b->rendbs = B->rmap->rend/bs; 1712 1713 b->cstartbs = B->cmap->rstart/bs; 1714 b->cendbs = B->cmap->rend/bs; 1715 1716 if (!B->preallocated) { 1717 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1718 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1719 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1720 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1721 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1722 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1723 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1724 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1725 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1726 } 1727 1728 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1729 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1730 1731 B->preallocated = PETSC_TRUE; 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1737 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1738 { 1739 PetscInt m,rstart,cstart,cend; 1740 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1741 const PetscInt *JJ =0; 1742 PetscScalar *values=0; 1743 PetscErrorCode ierr; 1744 1745 PetscFunctionBegin; 1746 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1747 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1748 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1749 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1750 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1751 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1752 m = B->rmap->n/bs; 1753 rstart = B->rmap->rstart/bs; 1754 cstart = B->cmap->rstart/bs; 1755 cend = B->cmap->rend/bs; 1756 1757 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1758 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1759 for (i=0; i<m; i++) { 1760 nz = ii[i+1] - ii[i]; 1761 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1762 nz_max = PetscMax(nz_max,nz); 1763 JJ = jj + ii[i]; 1764 for (j=0; j<nz; j++) { 1765 if (*JJ >= cstart) break; 1766 JJ++; 1767 } 1768 d = 0; 1769 for (; j<nz; j++) { 1770 if (*JJ++ >= cend) break; 1771 d++; 1772 } 1773 d_nnz[i] = d; 1774 o_nnz[i] = nz - d; 1775 } 1776 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1777 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1778 1779 values = (PetscScalar*)V; 1780 if (!values) { 1781 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 1782 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1783 } 1784 for (i=0; i<m; i++) { 1785 PetscInt row = i + rstart; 1786 PetscInt ncols = ii[i+1] - ii[i]; 1787 const PetscInt *icols = jj + ii[i]; 1788 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1789 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1790 } 1791 1792 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1793 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1794 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1796 PetscFunctionReturn(0); 1797 } 1798 1799 /*MC 1800 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1801 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1802 the matrix is stored. 1803 1804 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1805 can call MatSetOption(Mat, MAT_HERMITIAN); 1806 1807 Options Database Keys: 1808 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1809 1810 Level: beginner 1811 1812 .seealso: MatCreateMPISBAIJ 1813 M*/ 1814 1815 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 1816 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "MatCreate_MPISBAIJ" 1819 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1820 { 1821 Mat_MPISBAIJ *b; 1822 PetscErrorCode ierr; 1823 PetscBool flg = PETSC_FALSE; 1824 1825 PetscFunctionBegin; 1826 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1827 B->data = (void*)b; 1828 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1829 1830 B->ops->destroy = MatDestroy_MPISBAIJ; 1831 B->ops->view = MatView_MPISBAIJ; 1832 B->assembled = PETSC_FALSE; 1833 B->insertmode = NOT_SET_VALUES; 1834 1835 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 1836 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 1837 1838 /* build local table of row and column ownerships */ 1839 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 1840 1841 /* build cache for off array entries formed */ 1842 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 1843 1844 b->donotstash = PETSC_FALSE; 1845 b->colmap = NULL; 1846 b->garray = NULL; 1847 b->roworiented = PETSC_TRUE; 1848 1849 /* stuff used in block assembly */ 1850 b->barray = 0; 1851 1852 /* stuff used for matrix vector multiply */ 1853 b->lvec = 0; 1854 b->Mvctx = 0; 1855 b->slvec0 = 0; 1856 b->slvec0b = 0; 1857 b->slvec1 = 0; 1858 b->slvec1a = 0; 1859 b->slvec1b = 0; 1860 b->sMvctx = 0; 1861 1862 /* stuff for MatGetRow() */ 1863 b->rowindices = 0; 1864 b->rowvalues = 0; 1865 b->getrowactive = PETSC_FALSE; 1866 1867 /* hash table stuff */ 1868 b->ht = 0; 1869 b->hd = 0; 1870 b->ht_size = 0; 1871 b->ht_flag = PETSC_FALSE; 1872 b->ht_fact = 0; 1873 b->ht_total_ct = 0; 1874 b->ht_insert_ct = 0; 1875 1876 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1877 b->ijonly = PETSC_FALSE; 1878 1879 b->in_loc = 0; 1880 b->v_loc = 0; 1881 b->n_loc = 0; 1882 1883 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1884 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1885 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1886 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1887 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1888 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1889 #if defined(PETSC_HAVE_ELEMENTAL) 1890 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 1891 #endif 1892 1893 B->symmetric = PETSC_TRUE; 1894 B->structurally_symmetric = PETSC_TRUE; 1895 B->symmetric_set = PETSC_TRUE; 1896 B->structurally_symmetric_set = PETSC_TRUE; 1897 1898 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1899 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1900 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 1901 if (flg) { 1902 PetscReal fact = 1.39; 1903 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1904 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 1905 if (fact <= 1.0) fact = 1.39; 1906 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1907 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1908 } 1909 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1910 PetscFunctionReturn(0); 1911 } 1912 1913 /*MC 1914 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1915 1916 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1917 and MATMPISBAIJ otherwise. 1918 1919 Options Database Keys: 1920 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1921 1922 Level: beginner 1923 1924 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1925 M*/ 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1929 /*@C 1930 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1931 the user should preallocate the matrix storage by setting the parameters 1932 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1933 performance can be increased by more than a factor of 50. 1934 1935 Collective on Mat 1936 1937 Input Parameters: 1938 + B - the matrix 1939 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 1940 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 1941 . d_nz - number of block nonzeros per block row in diagonal portion of local 1942 submatrix (same for all local rows) 1943 . d_nnz - array containing the number of block nonzeros in the various block rows 1944 in the upper triangular and diagonal part of the in diagonal portion of the local 1945 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 1946 for the diagonal entry and set a value even if it is zero. 1947 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1948 submatrix (same for all local rows). 1949 - o_nnz - array containing the number of nonzeros in the various block rows of the 1950 off-diagonal portion of the local submatrix that is right of the diagonal 1951 (possibly different for each block row) or NULL. 1952 1953 1954 Options Database Keys: 1955 . -mat_no_unroll - uses code that does not unroll the loops in the 1956 block calculations (much slower) 1957 . -mat_block_size - size of the blocks to use 1958 1959 Notes: 1960 1961 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1962 than it must be used on all processors that share the object for that argument. 1963 1964 If the *_nnz parameter is given then the *_nz parameter is ignored 1965 1966 Storage Information: 1967 For a square global matrix we define each processor's diagonal portion 1968 to be its local rows and the corresponding columns (a square submatrix); 1969 each processor's off-diagonal portion encompasses the remainder of the 1970 local matrix (a rectangular submatrix). 1971 1972 The user can specify preallocated storage for the diagonal part of 1973 the local submatrix with either d_nz or d_nnz (not both). Set 1974 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 1975 memory allocation. Likewise, specify preallocated storage for the 1976 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1977 1978 You can call MatGetInfo() to get information on how effective the preallocation was; 1979 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1980 You can also run with the option -info and look for messages with the string 1981 malloc in them to see if additional memory allocation was needed. 1982 1983 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1984 the figure below we depict these three local rows and all columns (0-11). 1985 1986 .vb 1987 0 1 2 3 4 5 6 7 8 9 10 11 1988 -------------------------- 1989 row 3 |. . . d d d o o o o o o 1990 row 4 |. . . d d d o o o o o o 1991 row 5 |. . . d d d o o o o o o 1992 -------------------------- 1993 .ve 1994 1995 Thus, any entries in the d locations are stored in the d (diagonal) 1996 submatrix, and any entries in the o locations are stored in the 1997 o (off-diagonal) submatrix. Note that the d matrix is stored in 1998 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1999 2000 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2001 plus the diagonal part of the d matrix, 2002 and o_nz should indicate the number of block nonzeros per row in the o matrix 2003 2004 In general, for PDE problems in which most nonzeros are near the diagonal, 2005 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2006 or you will get TERRIBLE performance; see the users' manual chapter on 2007 matrices. 2008 2009 Level: intermediate 2010 2011 .keywords: matrix, block, aij, compressed row, sparse, parallel 2012 2013 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2014 @*/ 2015 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2016 { 2017 PetscErrorCode ierr; 2018 2019 PetscFunctionBegin; 2020 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2021 PetscValidType(B,1); 2022 PetscValidLogicalCollectiveInt(B,bs,2); 2023 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2024 PetscFunctionReturn(0); 2025 } 2026 2027 #undef __FUNCT__ 2028 #define __FUNCT__ "MatCreateSBAIJ" 2029 /*@C 2030 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2031 (block compressed row). For good matrix assembly performance 2032 the user should preallocate the matrix storage by setting the parameters 2033 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2034 performance can be increased by more than a factor of 50. 2035 2036 Collective on MPI_Comm 2037 2038 Input Parameters: 2039 + comm - MPI communicator 2040 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2041 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2042 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2043 This value should be the same as the local size used in creating the 2044 y vector for the matrix-vector product y = Ax. 2045 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2046 This value should be the same as the local size used in creating the 2047 x vector for the matrix-vector product y = Ax. 2048 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2049 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2050 . d_nz - number of block nonzeros per block row in diagonal portion of local 2051 submatrix (same for all local rows) 2052 . d_nnz - array containing the number of block nonzeros in the various block rows 2053 in the upper triangular portion of the in diagonal portion of the local 2054 (possibly different for each block block row) or NULL. 2055 If you plan to factor the matrix you must leave room for the diagonal entry and 2056 set its value even if it is zero. 2057 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2058 submatrix (same for all local rows). 2059 - o_nnz - array containing the number of nonzeros in the various block rows of the 2060 off-diagonal portion of the local submatrix (possibly different for 2061 each block row) or NULL. 2062 2063 Output Parameter: 2064 . A - the matrix 2065 2066 Options Database Keys: 2067 . -mat_no_unroll - uses code that does not unroll the loops in the 2068 block calculations (much slower) 2069 . -mat_block_size - size of the blocks to use 2070 . -mat_mpi - use the parallel matrix data structures even on one processor 2071 (defaults to using SeqBAIJ format on one processor) 2072 2073 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2074 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2075 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2076 2077 Notes: 2078 The number of rows and columns must be divisible by blocksize. 2079 This matrix type does not support complex Hermitian operation. 2080 2081 The user MUST specify either the local or global matrix dimensions 2082 (possibly both). 2083 2084 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2085 than it must be used on all processors that share the object for that argument. 2086 2087 If the *_nnz parameter is given then the *_nz parameter is ignored 2088 2089 Storage Information: 2090 For a square global matrix we define each processor's diagonal portion 2091 to be its local rows and the corresponding columns (a square submatrix); 2092 each processor's off-diagonal portion encompasses the remainder of the 2093 local matrix (a rectangular submatrix). 2094 2095 The user can specify preallocated storage for the diagonal part of 2096 the local submatrix with either d_nz or d_nnz (not both). Set 2097 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2098 memory allocation. Likewise, specify preallocated storage for the 2099 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2100 2101 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2102 the figure below we depict these three local rows and all columns (0-11). 2103 2104 .vb 2105 0 1 2 3 4 5 6 7 8 9 10 11 2106 -------------------------- 2107 row 3 |. . . d d d o o o o o o 2108 row 4 |. . . d d d o o o o o o 2109 row 5 |. . . d d d o o o o o o 2110 -------------------------- 2111 .ve 2112 2113 Thus, any entries in the d locations are stored in the d (diagonal) 2114 submatrix, and any entries in the o locations are stored in the 2115 o (off-diagonal) submatrix. Note that the d matrix is stored in 2116 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2117 2118 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2119 plus the diagonal part of the d matrix, 2120 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2121 In general, for PDE problems in which most nonzeros are near the diagonal, 2122 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2123 or you will get TERRIBLE performance; see the users' manual chapter on 2124 matrices. 2125 2126 Level: intermediate 2127 2128 .keywords: matrix, block, aij, compressed row, sparse, parallel 2129 2130 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2131 @*/ 2132 2133 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2134 { 2135 PetscErrorCode ierr; 2136 PetscMPIInt size; 2137 2138 PetscFunctionBegin; 2139 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2140 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2141 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2142 if (size > 1) { 2143 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2144 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2145 } else { 2146 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2147 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2148 } 2149 PetscFunctionReturn(0); 2150 } 2151 2152 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2155 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2156 { 2157 Mat mat; 2158 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2159 PetscErrorCode ierr; 2160 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2161 PetscScalar *array; 2162 2163 PetscFunctionBegin; 2164 *newmat = 0; 2165 2166 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2167 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2168 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2169 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2170 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2171 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2172 2173 mat->factortype = matin->factortype; 2174 mat->preallocated = PETSC_TRUE; 2175 mat->assembled = PETSC_TRUE; 2176 mat->insertmode = NOT_SET_VALUES; 2177 2178 a = (Mat_MPISBAIJ*)mat->data; 2179 a->bs2 = oldmat->bs2; 2180 a->mbs = oldmat->mbs; 2181 a->nbs = oldmat->nbs; 2182 a->Mbs = oldmat->Mbs; 2183 a->Nbs = oldmat->Nbs; 2184 2185 2186 a->size = oldmat->size; 2187 a->rank = oldmat->rank; 2188 a->donotstash = oldmat->donotstash; 2189 a->roworiented = oldmat->roworiented; 2190 a->rowindices = 0; 2191 a->rowvalues = 0; 2192 a->getrowactive = PETSC_FALSE; 2193 a->barray = 0; 2194 a->rstartbs = oldmat->rstartbs; 2195 a->rendbs = oldmat->rendbs; 2196 a->cstartbs = oldmat->cstartbs; 2197 a->cendbs = oldmat->cendbs; 2198 2199 /* hash table stuff */ 2200 a->ht = 0; 2201 a->hd = 0; 2202 a->ht_size = 0; 2203 a->ht_flag = oldmat->ht_flag; 2204 a->ht_fact = oldmat->ht_fact; 2205 a->ht_total_ct = 0; 2206 a->ht_insert_ct = 0; 2207 2208 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2209 if (oldmat->colmap) { 2210 #if defined(PETSC_USE_CTABLE) 2211 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2212 #else 2213 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2214 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2215 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2216 #endif 2217 } else a->colmap = 0; 2218 2219 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2220 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2221 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2222 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2223 } else a->garray = 0; 2224 2225 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2226 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2227 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2228 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2229 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2230 2231 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2232 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2233 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2234 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2235 2236 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2237 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2238 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2239 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2240 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2241 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2242 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2243 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2244 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2245 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2246 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2247 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2248 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2249 2250 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2251 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2252 a->sMvctx = oldmat->sMvctx; 2253 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2254 2255 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2256 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2257 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2258 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2259 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2260 *newmat = mat; 2261 PetscFunctionReturn(0); 2262 } 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatLoad_MPISBAIJ" 2266 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2267 { 2268 PetscErrorCode ierr; 2269 PetscInt i,nz,j,rstart,rend; 2270 PetscScalar *vals,*buf; 2271 MPI_Comm comm; 2272 MPI_Status status; 2273 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2274 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2275 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2276 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2277 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2278 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2279 int fd; 2280 2281 PetscFunctionBegin; 2282 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2283 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2284 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2285 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2286 if (bs < 0) bs = 1; 2287 2288 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2289 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2290 if (!rank) { 2291 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2292 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2293 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2294 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2295 } 2296 2297 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2298 2299 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2300 M = header[1]; 2301 N = header[2]; 2302 2303 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2304 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2305 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2306 2307 /* If global sizes are set, check if they are consistent with that given in the file */ 2308 if (sizesset) { 2309 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2310 } 2311 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2312 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2313 2314 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2315 2316 /* 2317 This code adds extra rows to make sure the number of rows is 2318 divisible by the blocksize 2319 */ 2320 Mbs = M/bs; 2321 extra_rows = bs - M + bs*(Mbs); 2322 if (extra_rows == bs) extra_rows = 0; 2323 else Mbs++; 2324 if (extra_rows &&!rank) { 2325 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2326 } 2327 2328 /* determine ownership of all rows */ 2329 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2330 mbs = Mbs/size + ((Mbs % size) > rank); 2331 m = mbs*bs; 2332 } else { /* User Set */ 2333 m = newmat->rmap->n; 2334 mbs = m/bs; 2335 } 2336 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2337 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2338 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2339 rowners[0] = 0; 2340 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2341 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2342 rstart = rowners[rank]; 2343 rend = rowners[rank+1]; 2344 2345 /* distribute row lengths to all processors */ 2346 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2347 if (!rank) { 2348 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2349 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2350 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2351 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2352 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2353 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2354 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2355 } else { 2356 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2357 } 2358 2359 if (!rank) { /* procs[0] */ 2360 /* calculate the number of nonzeros on each processor */ 2361 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2362 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2363 for (i=0; i<size; i++) { 2364 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2365 procsnz[i] += rowlengths[j]; 2366 } 2367 } 2368 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2369 2370 /* determine max buffer needed and allocate it */ 2371 maxnz = 0; 2372 for (i=0; i<size; i++) { 2373 maxnz = PetscMax(maxnz,procsnz[i]); 2374 } 2375 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2376 2377 /* read in my part of the matrix column indices */ 2378 nz = procsnz[0]; 2379 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2380 mycols = ibuf; 2381 if (size == 1) nz -= extra_rows; 2382 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2383 if (size == 1) { 2384 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2385 } 2386 2387 /* read in every ones (except the last) and ship off */ 2388 for (i=1; i<size-1; i++) { 2389 nz = procsnz[i]; 2390 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2391 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2392 } 2393 /* read in the stuff for the last proc */ 2394 if (size != 1) { 2395 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2396 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2397 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2398 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2399 } 2400 ierr = PetscFree(cols);CHKERRQ(ierr); 2401 } else { /* procs[i], i>0 */ 2402 /* determine buffer space needed for message */ 2403 nz = 0; 2404 for (i=0; i<m; i++) nz += locrowlens[i]; 2405 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2406 mycols = ibuf; 2407 /* receive message of column indices*/ 2408 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2409 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2410 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2411 } 2412 2413 /* loop over local rows, determining number of off diagonal entries */ 2414 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2415 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2416 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2417 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2418 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2419 rowcount = 0; 2420 nzcount = 0; 2421 for (i=0; i<mbs; i++) { 2422 dcount = 0; 2423 odcount = 0; 2424 for (j=0; j<bs; j++) { 2425 kmax = locrowlens[rowcount]; 2426 for (k=0; k<kmax; k++) { 2427 tmp = mycols[nzcount++]/bs; /* block col. index */ 2428 if (!mask[tmp]) { 2429 mask[tmp] = 1; 2430 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2431 else masked1[dcount++] = tmp; /* entry in diag portion */ 2432 } 2433 } 2434 rowcount++; 2435 } 2436 2437 dlens[i] = dcount; /* d_nzz[i] */ 2438 odlens[i] = odcount; /* o_nzz[i] */ 2439 2440 /* zero out the mask elements we set */ 2441 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2442 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2443 } 2444 if (!sizesset) { 2445 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2446 } 2447 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2448 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2449 2450 if (!rank) { 2451 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2452 /* read in my part of the matrix numerical values */ 2453 nz = procsnz[0]; 2454 vals = buf; 2455 mycols = ibuf; 2456 if (size == 1) nz -= extra_rows; 2457 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2458 if (size == 1) { 2459 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2460 } 2461 2462 /* insert into matrix */ 2463 jj = rstart*bs; 2464 for (i=0; i<m; i++) { 2465 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2466 mycols += locrowlens[i]; 2467 vals += locrowlens[i]; 2468 jj++; 2469 } 2470 2471 /* read in other processors (except the last one) and ship out */ 2472 for (i=1; i<size-1; i++) { 2473 nz = procsnz[i]; 2474 vals = buf; 2475 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2476 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2477 } 2478 /* the last proc */ 2479 if (size != 1) { 2480 nz = procsnz[i] - extra_rows; 2481 vals = buf; 2482 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2483 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2484 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2485 } 2486 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2487 2488 } else { 2489 /* receive numeric values */ 2490 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2491 2492 /* receive message of values*/ 2493 vals = buf; 2494 mycols = ibuf; 2495 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2496 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2497 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2498 2499 /* insert into matrix */ 2500 jj = rstart*bs; 2501 for (i=0; i<m; i++) { 2502 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2503 mycols += locrowlens[i]; 2504 vals += locrowlens[i]; 2505 jj++; 2506 } 2507 } 2508 2509 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2510 ierr = PetscFree(buf);CHKERRQ(ierr); 2511 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2512 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2513 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2514 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2515 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2516 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 #undef __FUNCT__ 2521 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2522 /*XXXXX@ 2523 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2524 2525 Input Parameters: 2526 . mat - the matrix 2527 . fact - factor 2528 2529 Not Collective on Mat, each process can have a different hash factor 2530 2531 Level: advanced 2532 2533 Notes: 2534 This can also be set by the command line option: -mat_use_hash_table fact 2535 2536 .keywords: matrix, hashtable, factor, HT 2537 2538 .seealso: MatSetOption() 2539 @XXXXX*/ 2540 2541 2542 #undef __FUNCT__ 2543 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2544 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2545 { 2546 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2547 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2548 PetscReal atmp; 2549 PetscReal *work,*svalues,*rvalues; 2550 PetscErrorCode ierr; 2551 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2552 PetscMPIInt rank,size; 2553 PetscInt *rowners_bs,dest,count,source; 2554 PetscScalar *va; 2555 MatScalar *ba; 2556 MPI_Status stat; 2557 2558 PetscFunctionBegin; 2559 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2560 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2561 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2562 2563 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2564 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2565 2566 bs = A->rmap->bs; 2567 mbs = a->mbs; 2568 Mbs = a->Mbs; 2569 ba = b->a; 2570 bi = b->i; 2571 bj = b->j; 2572 2573 /* find ownerships */ 2574 rowners_bs = A->rmap->range; 2575 2576 /* each proc creates an array to be distributed */ 2577 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2578 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2579 2580 /* row_max for B */ 2581 if (rank != size-1) { 2582 for (i=0; i<mbs; i++) { 2583 ncols = bi[1] - bi[0]; bi++; 2584 brow = bs*i; 2585 for (j=0; j<ncols; j++) { 2586 bcol = bs*(*bj); 2587 for (kcol=0; kcol<bs; kcol++) { 2588 col = bcol + kcol; /* local col index */ 2589 col += rowners_bs[rank+1]; /* global col index */ 2590 for (krow=0; krow<bs; krow++) { 2591 atmp = PetscAbsScalar(*ba); ba++; 2592 row = brow + krow; /* local row index */ 2593 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2594 if (work[col] < atmp) work[col] = atmp; 2595 } 2596 } 2597 bj++; 2598 } 2599 } 2600 2601 /* send values to its owners */ 2602 for (dest=rank+1; dest<size; dest++) { 2603 svalues = work + rowners_bs[dest]; 2604 count = rowners_bs[dest+1]-rowners_bs[dest]; 2605 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2606 } 2607 } 2608 2609 /* receive values */ 2610 if (rank) { 2611 rvalues = work; 2612 count = rowners_bs[rank+1]-rowners_bs[rank]; 2613 for (source=0; source<rank; source++) { 2614 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2615 /* process values */ 2616 for (i=0; i<count; i++) { 2617 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2618 } 2619 } 2620 } 2621 2622 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2623 ierr = PetscFree(work);CHKERRQ(ierr); 2624 PetscFunctionReturn(0); 2625 } 2626 2627 #undef __FUNCT__ 2628 #define __FUNCT__ "MatSOR_MPISBAIJ" 2629 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2630 { 2631 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2632 PetscErrorCode ierr; 2633 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2634 PetscScalar *x,*ptr,*from; 2635 Vec bb1; 2636 const PetscScalar *b; 2637 2638 PetscFunctionBegin; 2639 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2640 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2641 2642 if (flag == SOR_APPLY_UPPER) { 2643 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2644 PetscFunctionReturn(0); 2645 } 2646 2647 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2648 if (flag & SOR_ZERO_INITIAL_GUESS) { 2649 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2650 its--; 2651 } 2652 2653 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2654 while (its--) { 2655 2656 /* lower triangular part: slvec0b = - B^T*xx */ 2657 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2658 2659 /* copy xx into slvec0a */ 2660 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2661 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2662 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2663 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2664 2665 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2666 2667 /* copy bb into slvec1a */ 2668 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2669 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2670 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2671 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2672 2673 /* set slvec1b = 0 */ 2674 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2675 2676 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2677 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2678 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2679 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2680 2681 /* upper triangular part: bb1 = bb1 - B*x */ 2682 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2683 2684 /* local diagonal sweep */ 2685 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2686 } 2687 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2688 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2689 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2690 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2691 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2692 } else if (flag & SOR_EISENSTAT) { 2693 Vec xx1; 2694 PetscBool hasop; 2695 const PetscScalar *diag; 2696 PetscScalar *sl,scale = (omega - 2.0)/omega; 2697 PetscInt i,n; 2698 2699 if (!mat->xx1) { 2700 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2701 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2702 } 2703 xx1 = mat->xx1; 2704 bb1 = mat->bb1; 2705 2706 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2707 2708 if (!mat->diag) { 2709 /* this is wrong for same matrix with new nonzero values */ 2710 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2711 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2712 } 2713 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2714 2715 if (hasop) { 2716 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2717 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2718 } else { 2719 /* 2720 These two lines are replaced by code that may be a bit faster for a good compiler 2721 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2722 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2723 */ 2724 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2725 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2726 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2727 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2728 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2729 if (omega == 1.0) { 2730 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2731 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2732 } else { 2733 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2734 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2735 } 2736 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2737 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2738 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2739 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2740 } 2741 2742 /* multiply off-diagonal portion of matrix */ 2743 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2744 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2745 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2746 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2747 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2748 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2749 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2750 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2751 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2752 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2753 2754 /* local sweep */ 2755 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2756 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2757 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2758 PetscFunctionReturn(0); 2759 } 2760 2761 #undef __FUNCT__ 2762 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2763 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2764 { 2765 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2766 PetscErrorCode ierr; 2767 Vec lvec1,bb1; 2768 2769 PetscFunctionBegin; 2770 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2771 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2772 2773 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2774 if (flag & SOR_ZERO_INITIAL_GUESS) { 2775 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2776 its--; 2777 } 2778 2779 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2780 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2781 while (its--) { 2782 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2783 2784 /* lower diagonal part: bb1 = bb - B^T*xx */ 2785 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2786 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2787 2788 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2789 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2790 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2791 2792 /* upper diagonal part: bb1 = bb1 - B*x */ 2793 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2794 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2795 2796 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2797 2798 /* diagonal sweep */ 2799 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2800 } 2801 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2802 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2803 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2804 PetscFunctionReturn(0); 2805 } 2806 2807 #undef __FUNCT__ 2808 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2809 /*@ 2810 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2811 CSR format the local rows. 2812 2813 Collective on MPI_Comm 2814 2815 Input Parameters: 2816 + comm - MPI communicator 2817 . bs - the block size, only a block size of 1 is supported 2818 . m - number of local rows (Cannot be PETSC_DECIDE) 2819 . n - This value should be the same as the local size used in creating the 2820 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2821 calculated if N is given) For square matrices n is almost always m. 2822 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2823 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2824 . i - row indices 2825 . j - column indices 2826 - a - matrix values 2827 2828 Output Parameter: 2829 . mat - the matrix 2830 2831 Level: intermediate 2832 2833 Notes: 2834 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2835 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2836 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2837 2838 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2839 2840 .keywords: matrix, aij, compressed row, sparse, parallel 2841 2842 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2843 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2844 @*/ 2845 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2846 { 2847 PetscErrorCode ierr; 2848 2849 2850 PetscFunctionBegin; 2851 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2852 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2853 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2854 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2855 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2856 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2857 PetscFunctionReturn(0); 2858 } 2859 2860 2861 #undef __FUNCT__ 2862 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2863 /*@C 2864 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2865 (the default parallel PETSc format). 2866 2867 Collective on MPI_Comm 2868 2869 Input Parameters: 2870 + B - the matrix 2871 . bs - the block size 2872 . i - the indices into j for the start of each local row (starts with zero) 2873 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2874 - v - optional values in the matrix 2875 2876 Level: developer 2877 2878 .keywords: matrix, aij, compressed row, sparse, parallel 2879 2880 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2881 @*/ 2882 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2883 { 2884 PetscErrorCode ierr; 2885 2886 PetscFunctionBegin; 2887 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2888 PetscFunctionReturn(0); 2889 } 2890 2891 #undef __FUNCT__ 2892 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 2893 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2894 { 2895 PetscErrorCode ierr; 2896 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2897 PetscInt *indx; 2898 PetscScalar *values; 2899 2900 PetscFunctionBegin; 2901 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2902 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2903 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2904 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 2905 PetscInt *bindx,rmax=a->rmax,j; 2906 2907 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2908 mbs = m/bs; Nbs = N/cbs; 2909 if (n == PETSC_DECIDE) { 2910 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 2911 } 2912 /* Check sum(n) = Nbs */ 2913 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2914 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 2915 2916 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2917 rstart -= mbs; 2918 2919 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 2920 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 2921 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2922 for (i=0; i<mbs; i++) { 2923 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 2924 nnz = nnz/bs; 2925 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2926 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 2927 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 2928 } 2929 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2930 ierr = PetscFree(bindx);CHKERRQ(ierr); 2931 2932 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2933 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2934 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 2935 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 2936 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2937 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2938 } 2939 2940 /* numeric phase */ 2941 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2942 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 2943 2944 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2945 for (i=0; i<m; i++) { 2946 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2947 Ii = i + rstart; 2948 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2949 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2950 } 2951 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2952 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2953 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2954 PetscFunctionReturn(0); 2955 } 2956