1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 #if defined(PETSC_HAVE_ELEMENTAL) 23 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 24 #endif 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 28 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 29 { 30 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 31 PetscErrorCode ierr; 32 33 PetscFunctionBegin; 34 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 35 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 36 PetscFunctionReturn(0); 37 } 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 53 { \ 54 \ 55 brow = row/bs; \ 56 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 57 rmax = aimax[brow]; nrow = ailen[brow]; \ 58 bcol = col/bs; \ 59 ridx = row % bs; cidx = col % bs; \ 60 low = 0; high = nrow; \ 61 while (high-low > 3) { \ 62 t = (low+high)/2; \ 63 if (rp[t] > bcol) high = t; \ 64 else low = t; \ 65 } \ 66 for (_i=low; _i<high; _i++) { \ 67 if (rp[_i] > bcol) break; \ 68 if (rp[_i] == bcol) { \ 69 bap = ap + bs2*_i + bs*cidx + ridx; \ 70 if (addv == ADD_VALUES) *bap += value; \ 71 else *bap = value; \ 72 goto a_noinsert; \ 73 } \ 74 } \ 75 if (a->nonew == 1) goto a_noinsert; \ 76 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 77 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 78 N = nrow++ - 1; \ 79 /* shift up all the later entries in this row */ \ 80 for (ii=N; ii>=_i; ii--) { \ 81 rp[ii+1] = rp[ii]; \ 82 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 83 } \ 84 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 85 rp[_i] = bcol; \ 86 ap[bs2*_i + bs*cidx + ridx] = value; \ 87 A->nonzerostate++;\ 88 a_noinsert:; \ 89 ailen[brow] = nrow; \ 90 } 91 92 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 93 { \ 94 brow = row/bs; \ 95 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 96 rmax = bimax[brow]; nrow = bilen[brow]; \ 97 bcol = col/bs; \ 98 ridx = row % bs; cidx = col % bs; \ 99 low = 0; high = nrow; \ 100 while (high-low > 3) { \ 101 t = (low+high)/2; \ 102 if (rp[t] > bcol) high = t; \ 103 else low = t; \ 104 } \ 105 for (_i=low; _i<high; _i++) { \ 106 if (rp[_i] > bcol) break; \ 107 if (rp[_i] == bcol) { \ 108 bap = ap + bs2*_i + bs*cidx + ridx; \ 109 if (addv == ADD_VALUES) *bap += value; \ 110 else *bap = value; \ 111 goto b_noinsert; \ 112 } \ 113 } \ 114 if (b->nonew == 1) goto b_noinsert; \ 115 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 116 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 117 N = nrow++ - 1; \ 118 /* shift up all the later entries in this row */ \ 119 for (ii=N; ii>=_i; ii--) { \ 120 rp[ii+1] = rp[ii]; \ 121 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 122 } \ 123 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 124 rp[_i] = bcol; \ 125 ap[bs2*_i + bs*cidx + ridx] = value; \ 126 B->nonzerostate++;\ 127 b_noinsert:; \ 128 bilen[brow] = nrow; \ 129 } 130 131 /* Only add/insert a(i,j) with i<=j (blocks). 132 Any a(i,j) with i>j input by user is ingored. 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetValues_MPISBAIJ" 136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 137 { 138 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 139 MatScalar value; 140 PetscBool roworiented = baij->roworiented; 141 PetscErrorCode ierr; 142 PetscInt i,j,row,col; 143 PetscInt rstart_orig=mat->rmap->rstart; 144 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 145 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 146 147 /* Some Variables required in the macro */ 148 Mat A = baij->A; 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 150 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 151 MatScalar *aa =a->a; 152 153 Mat B = baij->B; 154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 155 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 156 MatScalar *ba =b->a; 157 158 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 159 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 160 MatScalar *ap,*bap; 161 162 /* for stash */ 163 PetscInt n_loc, *in_loc = NULL; 164 MatScalar *v_loc = NULL; 165 166 PetscFunctionBegin; 167 if (!baij->donotstash) { 168 if (n > baij->n_loc) { 169 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 170 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 171 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 172 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 173 174 baij->n_loc = n; 175 } 176 in_loc = baij->in_loc; 177 v_loc = baij->v_loc; 178 } 179 180 for (i=0; i<m; i++) { 181 if (im[i] < 0) continue; 182 #if defined(PETSC_USE_DEBUG) 183 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 184 #endif 185 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 186 row = im[i] - rstart_orig; /* local row index */ 187 for (j=0; j<n; j++) { 188 if (im[i]/bs > in[j]/bs) { 189 if (a->ignore_ltriangular) { 190 continue; /* ignore lower triangular blocks */ 191 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 192 } 193 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 194 col = in[j] - cstart_orig; /* local col index */ 195 brow = row/bs; bcol = col/bs; 196 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 197 if (roworiented) value = v[i*n+j]; 198 else value = v[i+j*m]; 199 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 200 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 201 } else if (in[j] < 0) continue; 202 #if defined(PETSC_USE_DEBUG) 203 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 204 #endif 205 else { /* off-diag entry (B) */ 206 if (mat->was_assembled) { 207 if (!baij->colmap) { 208 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 209 } 210 #if defined(PETSC_USE_CTABLE) 211 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 212 col = col - 1; 213 #else 214 col = baij->colmap[in[j]/bs] - 1; 215 #endif 216 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 217 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 218 col = in[j]; 219 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 220 B = baij->B; 221 b = (Mat_SeqBAIJ*)(B)->data; 222 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 223 ba = b->a; 224 } else col += in[j]%bs; 225 } else col = in[j]; 226 if (roworiented) value = v[i*n+j]; 227 else value = v[i+j*m]; 228 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } 231 } 232 } else { /* off processor entry */ 233 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 234 if (!baij->donotstash) { 235 mat->assembled = PETSC_FALSE; 236 n_loc = 0; 237 for (j=0; j<n; j++) { 238 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 239 in_loc[n_loc] = in[j]; 240 if (roworiented) { 241 v_loc[n_loc] = v[i*n+j]; 242 } else { 243 v_loc[n_loc] = v[j*m+i]; 244 } 245 n_loc++; 246 } 247 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 248 } 249 } 250 } 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 257 { 258 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 259 const MatScalar *value; 260 MatScalar *barray =baij->barray; 261 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 262 PetscErrorCode ierr; 263 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 264 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 265 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 266 267 PetscFunctionBegin; 268 if (!barray) { 269 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 270 baij->barray = barray; 271 } 272 273 if (roworiented) { 274 stepval = (n-1)*bs; 275 } else { 276 stepval = (m-1)*bs; 277 } 278 for (i=0; i<m; i++) { 279 if (im[i] < 0) continue; 280 #if defined(PETSC_USE_DEBUG) 281 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 282 #endif 283 if (im[i] >= rstart && im[i] < rend) { 284 row = im[i] - rstart; 285 for (j=0; j<n; j++) { 286 if (im[i] > in[j]) { 287 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 288 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 289 } 290 /* If NumCol = 1 then a copy is not required */ 291 if ((roworiented) && (n == 1)) { 292 barray = (MatScalar*) v + i*bs2; 293 } else if ((!roworiented) && (m == 1)) { 294 barray = (MatScalar*) v + j*bs2; 295 } else { /* Here a copy is required */ 296 if (roworiented) { 297 value = v + i*(stepval+bs)*bs + j*bs; 298 } else { 299 value = v + j*(stepval+bs)*bs + i*bs; 300 } 301 for (ii=0; ii<bs; ii++,value+=stepval) { 302 for (jj=0; jj<bs; jj++) { 303 *barray++ = *value++; 304 } 305 } 306 barray -=bs2; 307 } 308 309 if (in[j] >= cstart && in[j] < cend) { 310 col = in[j] - cstart; 311 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 312 } else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined(PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined(PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 348 if (!baij->donotstash) { 349 if (roworiented) { 350 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 351 } else { 352 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 353 } 354 } 355 } 356 } 357 PetscFunctionReturn(0); 358 } 359 360 #undef __FUNCT__ 361 #define __FUNCT__ "MatGetValues_MPISBAIJ" 362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 363 { 364 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 365 PetscErrorCode ierr; 366 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 367 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 368 369 PetscFunctionBegin; 370 for (i=0; i<m; i++) { 371 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 372 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 373 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 374 row = idxm[i] - bsrstart; 375 for (j=0; j<n; j++) { 376 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 377 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 378 if (idxn[j] >= bscstart && idxn[j] < bscend) { 379 col = idxn[j] - bscstart; 380 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 381 } else { 382 if (!baij->colmap) { 383 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 384 } 385 #if defined(PETSC_USE_CTABLE) 386 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 387 data--; 388 #else 389 data = baij->colmap[idxn[j]/bs]-1; 390 #endif 391 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 392 else { 393 col = data + idxn[j]%bs; 394 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 395 } 396 } 397 } 398 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 399 } 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatNorm_MPISBAIJ" 405 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 406 { 407 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscReal sum[2],*lnorm2; 410 411 PetscFunctionBegin; 412 if (baij->size == 1) { 413 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 414 } else { 415 if (type == NORM_FROBENIUS) { 416 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 417 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 418 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 419 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 420 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 421 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 422 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 423 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 424 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 425 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 426 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 427 PetscReal *rsum,*rsum2,vabs; 428 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 429 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 430 MatScalar *v; 431 432 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 433 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 434 /* Amat */ 435 v = amat->a; jj = amat->j; 436 for (brow=0; brow<mbs; brow++) { 437 grow = bs*(rstart + brow); 438 nz = amat->i[brow+1] - amat->i[brow]; 439 for (bcol=0; bcol<nz; bcol++) { 440 gcol = bs*(rstart + *jj); jj++; 441 for (col=0; col<bs; col++) { 442 for (row=0; row<bs; row++) { 443 vabs = PetscAbsScalar(*v); v++; 444 rsum[gcol+col] += vabs; 445 /* non-diagonal block */ 446 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 447 } 448 } 449 } 450 } 451 /* Bmat */ 452 v = bmat->a; jj = bmat->j; 453 for (brow=0; brow<mbs; brow++) { 454 grow = bs*(rstart + brow); 455 nz = bmat->i[brow+1] - bmat->i[brow]; 456 for (bcol=0; bcol<nz; bcol++) { 457 gcol = bs*garray[*jj]; jj++; 458 for (col=0; col<bs; col++) { 459 for (row=0; row<bs; row++) { 460 vabs = PetscAbsScalar(*v); v++; 461 rsum[gcol+col] += vabs; 462 rsum[grow+row] += vabs; 463 } 464 } 465 } 466 } 467 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 468 *norm = 0.0; 469 for (col=0; col<mat->cmap->N; col++) { 470 if (rsum2[col] > *norm) *norm = rsum2[col]; 471 } 472 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 473 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 474 } 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 480 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 481 { 482 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 483 PetscErrorCode ierr; 484 PetscInt nstash,reallocs; 485 InsertMode addv; 486 487 PetscFunctionBegin; 488 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 489 490 /* make sure all processors are either in INSERTMODE or ADDMODE */ 491 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 492 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 493 mat->insertmode = addv; /* in case this processor had no cache */ 494 495 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 496 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 497 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 498 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 499 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 500 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 506 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 507 { 508 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 509 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 510 PetscErrorCode ierr; 511 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 512 PetscInt *row,*col; 513 PetscBool other_disassembled; 514 PetscMPIInt n; 515 PetscBool r1,r2,r3; 516 MatScalar *val; 517 InsertMode addv = mat->insertmode; 518 519 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 520 PetscFunctionBegin; 521 if (!baij->donotstash && !mat->nooffprocentries) { 522 while (1) { 523 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 524 if (!flg) break; 525 526 for (i=0; i<n;) { 527 /* Now identify the consecutive vals belonging to the same row */ 528 for (j=i,rstart=row[j]; j<n; j++) { 529 if (row[j] != rstart) break; 530 } 531 if (j < n) ncols = j-i; 532 else ncols = n-i; 533 /* Now assemble all these values with a single function call */ 534 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 535 i = j; 536 } 537 } 538 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 539 /* Now process the block-stash. Since the values are stashed column-oriented, 540 set the roworiented flag to column oriented, and after MatSetValues() 541 restore the original flags */ 542 r1 = baij->roworiented; 543 r2 = a->roworiented; 544 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 545 546 baij->roworiented = PETSC_FALSE; 547 a->roworiented = PETSC_FALSE; 548 549 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 550 while (1) { 551 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 552 if (!flg) break; 553 554 for (i=0; i<n;) { 555 /* Now identify the consecutive vals belonging to the same row */ 556 for (j=i,rstart=row[j]; j<n; j++) { 557 if (row[j] != rstart) break; 558 } 559 if (j < n) ncols = j-i; 560 else ncols = n-i; 561 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 562 i = j; 563 } 564 } 565 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 566 567 baij->roworiented = r1; 568 a->roworiented = r2; 569 570 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 571 } 572 573 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 575 576 /* determine if any processor has disassembled, if so we must 577 also disassemble ourselfs, in order that we may reassemble. */ 578 /* 579 if nonzero structure of submatrix B cannot change then we know that 580 no processor disassembled thus we can skip this stuff 581 */ 582 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 583 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 584 if (mat->was_assembled && !other_disassembled) { 585 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 586 } 587 } 588 589 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 590 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 591 } 592 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 593 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 594 595 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 596 597 baij->rowvalues = 0; 598 599 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 600 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 601 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 602 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 608 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 609 #include <petscdraw.h> 610 #undef __FUNCT__ 611 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 612 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 613 { 614 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 615 PetscErrorCode ierr; 616 PetscInt bs = mat->rmap->bs; 617 PetscMPIInt rank = baij->rank; 618 PetscBool iascii,isdraw; 619 PetscViewer sviewer; 620 PetscViewerFormat format; 621 622 PetscFunctionBegin; 623 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 624 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 625 if (iascii) { 626 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 627 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 628 MatInfo info; 629 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 630 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 631 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 632 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 633 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 634 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 635 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 636 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 637 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 638 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 639 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 640 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } else if (format == PETSC_VIEWER_ASCII_INFO) { 643 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 646 PetscFunctionReturn(0); 647 } 648 } 649 650 if (isdraw) { 651 PetscDraw draw; 652 PetscBool isnull; 653 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 654 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 655 } 656 657 { 658 /* assemble the entire matrix onto first processor. */ 659 Mat A; 660 Mat_SeqSBAIJ *Aloc; 661 Mat_SeqBAIJ *Bloc; 662 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 663 MatScalar *a; 664 const char *matname; 665 666 /* Should this be the same type as mat? */ 667 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 668 if (!rank) { 669 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 670 } else { 671 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 672 } 673 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 674 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 675 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 676 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 677 678 /* copy over the A part */ 679 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 680 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 681 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 682 683 for (i=0; i<mbs; i++) { 684 rvals[0] = bs*(baij->rstartbs + i); 685 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 686 for (j=ai[i]; j<ai[i+1]; j++) { 687 col = (baij->cstartbs+aj[j])*bs; 688 for (k=0; k<bs; k++) { 689 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 690 col++; 691 a += bs; 692 } 693 } 694 } 695 /* copy over the B part */ 696 Bloc = (Mat_SeqBAIJ*)baij->B->data; 697 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 698 for (i=0; i<mbs; i++) { 699 700 rvals[0] = bs*(baij->rstartbs + i); 701 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 702 for (j=ai[i]; j<ai[i+1]; j++) { 703 col = baij->garray[aj[j]]*bs; 704 for (k=0; k<bs; k++) { 705 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 706 col++; 707 a += bs; 708 } 709 } 710 } 711 ierr = PetscFree(rvals);CHKERRQ(ierr); 712 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 713 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 714 /* 715 Everyone has to call to draw the matrix since the graphics waits are 716 synchronized across all processors that share the PetscDraw object 717 */ 718 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 719 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 720 if (!rank) { 721 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 722 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 723 } 724 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 725 ierr = MatDestroy(&A);CHKERRQ(ierr); 726 } 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatView_MPISBAIJ" 732 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 733 { 734 PetscErrorCode ierr; 735 PetscBool iascii,isdraw,issocket,isbinary; 736 737 PetscFunctionBegin; 738 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 739 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 740 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 741 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 742 if (iascii || isdraw || issocket || isbinary) { 743 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 744 } 745 PetscFunctionReturn(0); 746 } 747 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatDestroy_MPISBAIJ" 750 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 751 { 752 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 753 PetscErrorCode ierr; 754 755 PetscFunctionBegin; 756 #if defined(PETSC_USE_LOG) 757 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 758 #endif 759 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 760 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 761 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 762 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 763 #if defined(PETSC_USE_CTABLE) 764 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 765 #else 766 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 767 #endif 768 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 769 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 770 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 771 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 772 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 773 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 774 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 775 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 776 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 777 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 778 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 779 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 780 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 781 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 782 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 783 #if defined(PETSC_USE_REAL_MAT_SINGLE) 784 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 785 #endif 786 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 787 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 788 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 789 ierr = PetscFree(mat->data);CHKERRQ(ierr); 790 791 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 792 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 793 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 794 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 795 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 796 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 797 #if defined(PETSC_HAVE_ELEMENTAL) 798 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 799 #endif 800 PetscFunctionReturn(0); 801 } 802 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 805 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 806 { 807 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 808 PetscErrorCode ierr; 809 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 810 PetscScalar *x,*from; 811 812 PetscFunctionBegin; 813 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 814 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 815 816 /* diagonal part */ 817 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 818 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 819 820 /* subdiagonal part */ 821 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 822 823 /* copy x into the vec slvec0 */ 824 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 825 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 826 827 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 828 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 829 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 830 831 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 832 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 833 /* supperdiagonal part */ 834 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatMult_MPISBAIJ" 840 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 841 { 842 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 843 PetscErrorCode ierr; 844 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 845 PetscScalar *from; 846 const PetscScalar *x; 847 848 PetscFunctionBegin; 849 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 850 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 851 852 /* diagonal part */ 853 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 854 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 855 856 /* subdiagonal part */ 857 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 858 859 /* copy x into the vec slvec0 */ 860 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 861 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 862 863 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 864 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 865 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 866 867 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 /* supperdiagonal part */ 870 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 876 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 877 { 878 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 879 PetscErrorCode ierr; 880 PetscInt nt; 881 882 PetscFunctionBegin; 883 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 884 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 885 886 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 887 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 888 889 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 /* do diagonal part */ 891 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 892 /* do supperdiagonal part */ 893 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 895 /* do subdiagonal part */ 896 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 897 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 898 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 904 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 905 { 906 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 907 PetscErrorCode ierr; 908 PetscInt mbs=a->mbs,bs=A->rmap->bs; 909 PetscScalar *from,zero=0.0; 910 const PetscScalar *x; 911 912 PetscFunctionBegin; 913 /* 914 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 915 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 916 */ 917 /* diagonal part */ 918 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 919 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 920 921 /* subdiagonal part */ 922 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 923 924 /* copy x into the vec slvec0 */ 925 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 926 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 927 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 928 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 929 930 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 932 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 933 934 /* supperdiagonal part */ 935 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 941 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 942 { 943 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 944 PetscErrorCode ierr; 945 946 PetscFunctionBegin; 947 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 /* do diagonal part */ 949 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 950 /* do supperdiagonal part */ 951 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 952 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 953 954 /* do subdiagonal part */ 955 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 956 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 957 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 /* 962 This only works correctly for square matrices where the subblock A->A is the 963 diagonal block 964 */ 965 #undef __FUNCT__ 966 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 967 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 968 { 969 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 970 PetscErrorCode ierr; 971 972 PetscFunctionBegin; 973 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 974 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatScale_MPISBAIJ" 980 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 981 { 982 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 987 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatGetRow_MPISBAIJ" 993 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 994 { 995 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 996 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 997 PetscErrorCode ierr; 998 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 999 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1000 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1001 1002 PetscFunctionBegin; 1003 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1004 mat->getrowactive = PETSC_TRUE; 1005 1006 if (!mat->rowvalues && (idx || v)) { 1007 /* 1008 allocate enough space to hold information from the longest row. 1009 */ 1010 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1011 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1012 PetscInt max = 1,mbs = mat->mbs,tmp; 1013 for (i=0; i<mbs; i++) { 1014 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1015 if (max < tmp) max = tmp; 1016 } 1017 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1018 } 1019 1020 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1021 lrow = row - brstart; /* local row index */ 1022 1023 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1024 if (!v) {pvA = 0; pvB = 0;} 1025 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1026 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1027 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1028 nztot = nzA + nzB; 1029 1030 cmap = mat->garray; 1031 if (v || idx) { 1032 if (nztot) { 1033 /* Sort by increasing column numbers, assuming A and B already sorted */ 1034 PetscInt imark = -1; 1035 if (v) { 1036 *v = v_p = mat->rowvalues; 1037 for (i=0; i<nzB; i++) { 1038 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1039 else break; 1040 } 1041 imark = i; 1042 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1043 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1044 } 1045 if (idx) { 1046 *idx = idx_p = mat->rowindices; 1047 if (imark > -1) { 1048 for (i=0; i<imark; i++) { 1049 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1050 } 1051 } else { 1052 for (i=0; i<nzB; i++) { 1053 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1054 else break; 1055 } 1056 imark = i; 1057 } 1058 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1059 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1060 } 1061 } else { 1062 if (idx) *idx = 0; 1063 if (v) *v = 0; 1064 } 1065 } 1066 *nz = nztot; 1067 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1068 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1074 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1075 { 1076 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1077 1078 PetscFunctionBegin; 1079 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1080 baij->getrowactive = PETSC_FALSE; 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1086 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1087 { 1088 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1089 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1090 1091 PetscFunctionBegin; 1092 aA->getrow_utriangular = PETSC_TRUE; 1093 PetscFunctionReturn(0); 1094 } 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1097 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1098 { 1099 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1100 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1101 1102 PetscFunctionBegin; 1103 aA->getrow_utriangular = PETSC_FALSE; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1109 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1110 { 1111 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 1114 PetscFunctionBegin; 1115 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1116 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1122 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1123 { 1124 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1125 PetscErrorCode ierr; 1126 1127 PetscFunctionBegin; 1128 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1129 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1134 Input: isrow - distributed(parallel), 1135 iscol_local - locally owned (seq) 1136 */ 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "ISEqual_private" 1139 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1140 { 1141 PetscErrorCode ierr; 1142 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1143 const PetscInt *ptr1,*ptr2; 1144 1145 PetscFunctionBegin; 1146 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1147 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1148 if (sz1 > sz2) { 1149 *flg = PETSC_FALSE; 1150 PetscFunctionReturn(0); 1151 } 1152 1153 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1154 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1155 1156 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1157 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1158 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1159 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1160 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1161 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1162 1163 nmatch=0; 1164 k = 0; 1165 for (i=0; i<sz1; i++){ 1166 for (j=k; j<sz2; j++){ 1167 if (a1[i] == a2[j]) { 1168 k = j; nmatch++; 1169 break; 1170 } 1171 } 1172 } 1173 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1174 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1175 ierr = PetscFree(a1);CHKERRQ(ierr); 1176 ierr = PetscFree(a2);CHKERRQ(ierr); 1177 if (nmatch < sz1) { 1178 *flg = PETSC_FALSE; 1179 } else { 1180 *flg = PETSC_TRUE; 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1187 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1188 { 1189 PetscErrorCode ierr; 1190 IS iscol_local; 1191 PetscInt csize; 1192 PetscBool isequal; 1193 1194 PetscFunctionBegin; 1195 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1196 if (call == MAT_REUSE_MATRIX) { 1197 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1198 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1199 } else { 1200 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1201 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1202 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1203 } 1204 1205 /* now call MatGetSubMatrix_MPIBAIJ() */ 1206 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1207 if (call == MAT_INITIAL_MATRIX) { 1208 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1209 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1210 } 1211 PetscFunctionReturn(0); 1212 } 1213 1214 #undef __FUNCT__ 1215 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1216 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1217 { 1218 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1219 PetscErrorCode ierr; 1220 1221 PetscFunctionBegin; 1222 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1223 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1229 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1230 { 1231 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1232 Mat A = a->A,B = a->B; 1233 PetscErrorCode ierr; 1234 PetscReal isend[5],irecv[5]; 1235 1236 PetscFunctionBegin; 1237 info->block_size = (PetscReal)matin->rmap->bs; 1238 1239 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1240 1241 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1242 isend[3] = info->memory; isend[4] = info->mallocs; 1243 1244 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1245 1246 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1247 isend[3] += info->memory; isend[4] += info->mallocs; 1248 if (flag == MAT_LOCAL) { 1249 info->nz_used = isend[0]; 1250 info->nz_allocated = isend[1]; 1251 info->nz_unneeded = isend[2]; 1252 info->memory = isend[3]; 1253 info->mallocs = isend[4]; 1254 } else if (flag == MAT_GLOBAL_MAX) { 1255 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1256 1257 info->nz_used = irecv[0]; 1258 info->nz_allocated = irecv[1]; 1259 info->nz_unneeded = irecv[2]; 1260 info->memory = irecv[3]; 1261 info->mallocs = irecv[4]; 1262 } else if (flag == MAT_GLOBAL_SUM) { 1263 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1264 1265 info->nz_used = irecv[0]; 1266 info->nz_allocated = irecv[1]; 1267 info->nz_unneeded = irecv[2]; 1268 info->memory = irecv[3]; 1269 info->mallocs = irecv[4]; 1270 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1271 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1272 info->fill_ratio_needed = 0; 1273 info->factor_mallocs = 0; 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1279 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1280 { 1281 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1282 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1283 PetscErrorCode ierr; 1284 1285 PetscFunctionBegin; 1286 switch (op) { 1287 case MAT_NEW_NONZERO_LOCATIONS: 1288 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1289 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1290 case MAT_KEEP_NONZERO_PATTERN: 1291 case MAT_NEW_NONZERO_LOCATION_ERR: 1292 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1293 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1294 break; 1295 case MAT_ROW_ORIENTED: 1296 a->roworiented = flg; 1297 1298 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1299 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1300 break; 1301 case MAT_NEW_DIAGONALS: 1302 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1303 break; 1304 case MAT_IGNORE_OFF_PROC_ENTRIES: 1305 a->donotstash = flg; 1306 break; 1307 case MAT_USE_HASH_TABLE: 1308 a->ht_flag = flg; 1309 break; 1310 case MAT_HERMITIAN: 1311 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1312 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1313 1314 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1315 break; 1316 case MAT_SPD: 1317 A->spd_set = PETSC_TRUE; 1318 A->spd = flg; 1319 if (flg) { 1320 A->symmetric = PETSC_TRUE; 1321 A->structurally_symmetric = PETSC_TRUE; 1322 A->symmetric_set = PETSC_TRUE; 1323 A->structurally_symmetric_set = PETSC_TRUE; 1324 } 1325 break; 1326 case MAT_SYMMETRIC: 1327 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1328 break; 1329 case MAT_STRUCTURALLY_SYMMETRIC: 1330 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1331 break; 1332 case MAT_SYMMETRY_ETERNAL: 1333 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1334 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1335 break; 1336 case MAT_IGNORE_LOWER_TRIANGULAR: 1337 aA->ignore_ltriangular = flg; 1338 break; 1339 case MAT_ERROR_LOWER_TRIANGULAR: 1340 aA->ignore_ltriangular = flg; 1341 break; 1342 case MAT_GETROW_UPPERTRIANGULAR: 1343 aA->getrow_utriangular = flg; 1344 break; 1345 default: 1346 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1347 } 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1353 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1354 { 1355 PetscErrorCode ierr; 1356 1357 PetscFunctionBegin; 1358 if (MAT_INITIAL_MATRIX || *B != A) { 1359 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1360 } 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1366 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1367 { 1368 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1369 Mat a = baij->A, b=baij->B; 1370 PetscErrorCode ierr; 1371 PetscInt nv,m,n; 1372 PetscBool flg; 1373 1374 PetscFunctionBegin; 1375 if (ll != rr) { 1376 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1377 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1378 } 1379 if (!ll) PetscFunctionReturn(0); 1380 1381 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1382 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1383 1384 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1385 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1386 1387 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1388 1389 /* left diagonalscale the off-diagonal part */ 1390 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1391 1392 /* scale the diagonal part */ 1393 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1394 1395 /* right diagonalscale the off-diagonal part */ 1396 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1397 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1403 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1404 { 1405 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1406 PetscErrorCode ierr; 1407 1408 PetscFunctionBegin; 1409 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatEqual_MPISBAIJ" 1417 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1418 { 1419 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1420 Mat a,b,c,d; 1421 PetscBool flg; 1422 PetscErrorCode ierr; 1423 1424 PetscFunctionBegin; 1425 a = matA->A; b = matA->B; 1426 c = matB->A; d = matB->B; 1427 1428 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1429 if (flg) { 1430 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1431 } 1432 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatCopy_MPISBAIJ" 1438 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1439 { 1440 PetscErrorCode ierr; 1441 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1442 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1443 1444 PetscFunctionBegin; 1445 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1446 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1447 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1448 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1449 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1450 } else { 1451 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1452 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1453 } 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1459 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1460 { 1461 PetscErrorCode ierr; 1462 1463 PetscFunctionBegin; 1464 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1470 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1471 { 1472 PetscErrorCode ierr; 1473 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1474 PetscBLASInt bnz,one=1; 1475 Mat_SeqSBAIJ *xa,*ya; 1476 Mat_SeqBAIJ *xb,*yb; 1477 1478 PetscFunctionBegin; 1479 if (str == SAME_NONZERO_PATTERN) { 1480 PetscScalar alpha = a; 1481 xa = (Mat_SeqSBAIJ*)xx->A->data; 1482 ya = (Mat_SeqSBAIJ*)yy->A->data; 1483 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1484 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1485 xb = (Mat_SeqBAIJ*)xx->B->data; 1486 yb = (Mat_SeqBAIJ*)yy->B->data; 1487 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1488 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1489 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1490 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1491 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1492 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1493 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1494 } else { 1495 Mat B; 1496 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1497 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1498 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1499 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1500 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1501 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1502 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1503 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1504 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1505 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1506 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1507 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1508 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1509 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1510 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1511 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1512 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1513 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1514 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1515 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1516 } 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1522 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1523 { 1524 PetscErrorCode ierr; 1525 PetscInt i; 1526 PetscBool flg; 1527 1528 PetscFunctionBegin; 1529 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1530 for (i=0; i<n; i++) { 1531 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1532 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1533 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1534 } 1535 } 1536 PetscFunctionReturn(0); 1537 } 1538 1539 /* -------------------------------------------------------------------*/ 1540 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1541 MatGetRow_MPISBAIJ, 1542 MatRestoreRow_MPISBAIJ, 1543 MatMult_MPISBAIJ, 1544 /* 4*/ MatMultAdd_MPISBAIJ, 1545 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1546 MatMultAdd_MPISBAIJ, 1547 0, 1548 0, 1549 0, 1550 /* 10*/ 0, 1551 0, 1552 0, 1553 MatSOR_MPISBAIJ, 1554 MatTranspose_MPISBAIJ, 1555 /* 15*/ MatGetInfo_MPISBAIJ, 1556 MatEqual_MPISBAIJ, 1557 MatGetDiagonal_MPISBAIJ, 1558 MatDiagonalScale_MPISBAIJ, 1559 MatNorm_MPISBAIJ, 1560 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1561 MatAssemblyEnd_MPISBAIJ, 1562 MatSetOption_MPISBAIJ, 1563 MatZeroEntries_MPISBAIJ, 1564 /* 24*/ 0, 1565 0, 1566 0, 1567 0, 1568 0, 1569 /* 29*/ MatSetUp_MPISBAIJ, 1570 0, 1571 0, 1572 0, 1573 0, 1574 /* 34*/ MatDuplicate_MPISBAIJ, 1575 0, 1576 0, 1577 0, 1578 0, 1579 /* 39*/ MatAXPY_MPISBAIJ, 1580 MatGetSubMatrices_MPISBAIJ, 1581 MatIncreaseOverlap_MPISBAIJ, 1582 MatGetValues_MPISBAIJ, 1583 MatCopy_MPISBAIJ, 1584 /* 44*/ 0, 1585 MatScale_MPISBAIJ, 1586 0, 1587 0, 1588 0, 1589 /* 49*/ 0, 1590 0, 1591 0, 1592 0, 1593 0, 1594 /* 54*/ 0, 1595 0, 1596 MatSetUnfactored_MPISBAIJ, 1597 0, 1598 MatSetValuesBlocked_MPISBAIJ, 1599 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1600 0, 1601 0, 1602 0, 1603 0, 1604 /* 64*/ 0, 1605 0, 1606 0, 1607 0, 1608 0, 1609 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1610 0, 1611 0, 1612 0, 1613 0, 1614 /* 74*/ 0, 1615 0, 1616 0, 1617 0, 1618 0, 1619 /* 79*/ 0, 1620 0, 1621 0, 1622 0, 1623 MatLoad_MPISBAIJ, 1624 /* 84*/ 0, 1625 0, 1626 0, 1627 0, 1628 0, 1629 /* 89*/ 0, 1630 0, 1631 0, 1632 0, 1633 0, 1634 /* 94*/ 0, 1635 0, 1636 0, 1637 0, 1638 0, 1639 /* 99*/ 0, 1640 0, 1641 0, 1642 0, 1643 0, 1644 /*104*/ 0, 1645 MatRealPart_MPISBAIJ, 1646 MatImaginaryPart_MPISBAIJ, 1647 MatGetRowUpperTriangular_MPISBAIJ, 1648 MatRestoreRowUpperTriangular_MPISBAIJ, 1649 /*109*/ 0, 1650 0, 1651 0, 1652 0, 1653 0, 1654 /*114*/ 0, 1655 0, 1656 0, 1657 0, 1658 0, 1659 /*119*/ 0, 1660 0, 1661 0, 1662 0, 1663 0, 1664 /*124*/ 0, 1665 0, 1666 0, 1667 0, 1668 0, 1669 /*129*/ 0, 1670 0, 1671 0, 1672 0, 1673 0, 1674 /*134*/ 0, 1675 0, 1676 0, 1677 0, 1678 0, 1679 /*139*/ 0, 1680 0, 1681 0, 1682 0, 1683 0, 1684 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1685 }; 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1689 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1690 { 1691 PetscFunctionBegin; 1692 *a = ((Mat_MPISBAIJ*)A->data)->A; 1693 PetscFunctionReturn(0); 1694 } 1695 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1698 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1699 { 1700 Mat_MPISBAIJ *b; 1701 PetscErrorCode ierr; 1702 PetscInt i,mbs,Mbs; 1703 1704 PetscFunctionBegin; 1705 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1706 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1707 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1708 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1709 1710 b = (Mat_MPISBAIJ*)B->data; 1711 mbs = B->rmap->n/bs; 1712 Mbs = B->rmap->N/bs; 1713 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1714 1715 B->rmap->bs = bs; 1716 b->bs2 = bs*bs; 1717 b->mbs = mbs; 1718 b->Mbs = Mbs; 1719 b->nbs = B->cmap->n/bs; 1720 b->Nbs = B->cmap->N/bs; 1721 1722 for (i=0; i<=b->size; i++) { 1723 b->rangebs[i] = B->rmap->range[i]/bs; 1724 } 1725 b->rstartbs = B->rmap->rstart/bs; 1726 b->rendbs = B->rmap->rend/bs; 1727 1728 b->cstartbs = B->cmap->rstart/bs; 1729 b->cendbs = B->cmap->rend/bs; 1730 1731 if (!B->preallocated) { 1732 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1733 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1734 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1735 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1736 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1737 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1738 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1739 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1740 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1741 } 1742 1743 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1744 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1745 1746 B->preallocated = PETSC_TRUE; 1747 PetscFunctionReturn(0); 1748 } 1749 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1752 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1753 { 1754 PetscInt m,rstart,cstart,cend; 1755 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1756 const PetscInt *JJ =0; 1757 PetscScalar *values=0; 1758 PetscErrorCode ierr; 1759 1760 PetscFunctionBegin; 1761 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1762 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1763 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1764 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1765 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1766 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1767 m = B->rmap->n/bs; 1768 rstart = B->rmap->rstart/bs; 1769 cstart = B->cmap->rstart/bs; 1770 cend = B->cmap->rend/bs; 1771 1772 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1773 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1774 for (i=0; i<m; i++) { 1775 nz = ii[i+1] - ii[i]; 1776 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1777 nz_max = PetscMax(nz_max,nz); 1778 JJ = jj + ii[i]; 1779 for (j=0; j<nz; j++) { 1780 if (*JJ >= cstart) break; 1781 JJ++; 1782 } 1783 d = 0; 1784 for (; j<nz; j++) { 1785 if (*JJ++ >= cend) break; 1786 d++; 1787 } 1788 d_nnz[i] = d; 1789 o_nnz[i] = nz - d; 1790 } 1791 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1792 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1793 1794 values = (PetscScalar*)V; 1795 if (!values) { 1796 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 1797 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1798 } 1799 for (i=0; i<m; i++) { 1800 PetscInt row = i + rstart; 1801 PetscInt ncols = ii[i+1] - ii[i]; 1802 const PetscInt *icols = jj + ii[i]; 1803 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1804 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1805 } 1806 1807 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1808 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1809 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1810 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 /*MC 1815 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1816 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1817 the matrix is stored. 1818 1819 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1820 can call MatSetOption(Mat, MAT_HERMITIAN); 1821 1822 Options Database Keys: 1823 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1824 1825 Level: beginner 1826 1827 .seealso: MatCreateMPISBAIJ 1828 M*/ 1829 1830 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 1831 1832 #undef __FUNCT__ 1833 #define __FUNCT__ "MatCreate_MPISBAIJ" 1834 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1835 { 1836 Mat_MPISBAIJ *b; 1837 PetscErrorCode ierr; 1838 PetscBool flg = PETSC_FALSE; 1839 1840 PetscFunctionBegin; 1841 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1842 B->data = (void*)b; 1843 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1844 1845 B->ops->destroy = MatDestroy_MPISBAIJ; 1846 B->ops->view = MatView_MPISBAIJ; 1847 B->assembled = PETSC_FALSE; 1848 B->insertmode = NOT_SET_VALUES; 1849 1850 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 1851 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 1852 1853 /* build local table of row and column ownerships */ 1854 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 1855 1856 /* build cache for off array entries formed */ 1857 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 1858 1859 b->donotstash = PETSC_FALSE; 1860 b->colmap = NULL; 1861 b->garray = NULL; 1862 b->roworiented = PETSC_TRUE; 1863 1864 /* stuff used in block assembly */ 1865 b->barray = 0; 1866 1867 /* stuff used for matrix vector multiply */ 1868 b->lvec = 0; 1869 b->Mvctx = 0; 1870 b->slvec0 = 0; 1871 b->slvec0b = 0; 1872 b->slvec1 = 0; 1873 b->slvec1a = 0; 1874 b->slvec1b = 0; 1875 b->sMvctx = 0; 1876 1877 /* stuff for MatGetRow() */ 1878 b->rowindices = 0; 1879 b->rowvalues = 0; 1880 b->getrowactive = PETSC_FALSE; 1881 1882 /* hash table stuff */ 1883 b->ht = 0; 1884 b->hd = 0; 1885 b->ht_size = 0; 1886 b->ht_flag = PETSC_FALSE; 1887 b->ht_fact = 0; 1888 b->ht_total_ct = 0; 1889 b->ht_insert_ct = 0; 1890 1891 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1892 b->ijonly = PETSC_FALSE; 1893 1894 b->in_loc = 0; 1895 b->v_loc = 0; 1896 b->n_loc = 0; 1897 1898 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1899 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1900 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1901 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1902 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1903 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1904 #if defined(PETSC_HAVE_ELEMENTAL) 1905 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 1906 #endif 1907 1908 B->symmetric = PETSC_TRUE; 1909 B->structurally_symmetric = PETSC_TRUE; 1910 B->symmetric_set = PETSC_TRUE; 1911 B->structurally_symmetric_set = PETSC_TRUE; 1912 1913 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1914 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1915 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 1916 if (flg) { 1917 PetscReal fact = 1.39; 1918 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1919 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 1920 if (fact <= 1.0) fact = 1.39; 1921 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1922 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1923 } 1924 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1925 PetscFunctionReturn(0); 1926 } 1927 1928 /*MC 1929 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1930 1931 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1932 and MATMPISBAIJ otherwise. 1933 1934 Options Database Keys: 1935 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1936 1937 Level: beginner 1938 1939 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1940 M*/ 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1944 /*@C 1945 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1946 the user should preallocate the matrix storage by setting the parameters 1947 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1948 performance can be increased by more than a factor of 50. 1949 1950 Collective on Mat 1951 1952 Input Parameters: 1953 + B - the matrix 1954 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 1955 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 1956 . d_nz - number of block nonzeros per block row in diagonal portion of local 1957 submatrix (same for all local rows) 1958 . d_nnz - array containing the number of block nonzeros in the various block rows 1959 in the upper triangular and diagonal part of the in diagonal portion of the local 1960 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 1961 for the diagonal entry and set a value even if it is zero. 1962 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1963 submatrix (same for all local rows). 1964 - o_nnz - array containing the number of nonzeros in the various block rows of the 1965 off-diagonal portion of the local submatrix that is right of the diagonal 1966 (possibly different for each block row) or NULL. 1967 1968 1969 Options Database Keys: 1970 . -mat_no_unroll - uses code that does not unroll the loops in the 1971 block calculations (much slower) 1972 . -mat_block_size - size of the blocks to use 1973 1974 Notes: 1975 1976 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1977 than it must be used on all processors that share the object for that argument. 1978 1979 If the *_nnz parameter is given then the *_nz parameter is ignored 1980 1981 Storage Information: 1982 For a square global matrix we define each processor's diagonal portion 1983 to be its local rows and the corresponding columns (a square submatrix); 1984 each processor's off-diagonal portion encompasses the remainder of the 1985 local matrix (a rectangular submatrix). 1986 1987 The user can specify preallocated storage for the diagonal part of 1988 the local submatrix with either d_nz or d_nnz (not both). Set 1989 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 1990 memory allocation. Likewise, specify preallocated storage for the 1991 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1992 1993 You can call MatGetInfo() to get information on how effective the preallocation was; 1994 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1995 You can also run with the option -info and look for messages with the string 1996 malloc in them to see if additional memory allocation was needed. 1997 1998 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1999 the figure below we depict these three local rows and all columns (0-11). 2000 2001 .vb 2002 0 1 2 3 4 5 6 7 8 9 10 11 2003 -------------------------- 2004 row 3 |. . . d d d o o o o o o 2005 row 4 |. . . d d d o o o o o o 2006 row 5 |. . . d d d o o o o o o 2007 -------------------------- 2008 .ve 2009 2010 Thus, any entries in the d locations are stored in the d (diagonal) 2011 submatrix, and any entries in the o locations are stored in the 2012 o (off-diagonal) submatrix. Note that the d matrix is stored in 2013 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2014 2015 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2016 plus the diagonal part of the d matrix, 2017 and o_nz should indicate the number of block nonzeros per row in the o matrix 2018 2019 In general, for PDE problems in which most nonzeros are near the diagonal, 2020 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2021 or you will get TERRIBLE performance; see the users' manual chapter on 2022 matrices. 2023 2024 Level: intermediate 2025 2026 .keywords: matrix, block, aij, compressed row, sparse, parallel 2027 2028 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2029 @*/ 2030 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2031 { 2032 PetscErrorCode ierr; 2033 2034 PetscFunctionBegin; 2035 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2036 PetscValidType(B,1); 2037 PetscValidLogicalCollectiveInt(B,bs,2); 2038 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "MatCreateSBAIJ" 2044 /*@C 2045 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2046 (block compressed row). For good matrix assembly performance 2047 the user should preallocate the matrix storage by setting the parameters 2048 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2049 performance can be increased by more than a factor of 50. 2050 2051 Collective on MPI_Comm 2052 2053 Input Parameters: 2054 + comm - MPI communicator 2055 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2056 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2057 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2058 This value should be the same as the local size used in creating the 2059 y vector for the matrix-vector product y = Ax. 2060 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2061 This value should be the same as the local size used in creating the 2062 x vector for the matrix-vector product y = Ax. 2063 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2064 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2065 . d_nz - number of block nonzeros per block row in diagonal portion of local 2066 submatrix (same for all local rows) 2067 . d_nnz - array containing the number of block nonzeros in the various block rows 2068 in the upper triangular portion of the in diagonal portion of the local 2069 (possibly different for each block block row) or NULL. 2070 If you plan to factor the matrix you must leave room for the diagonal entry and 2071 set its value even if it is zero. 2072 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2073 submatrix (same for all local rows). 2074 - o_nnz - array containing the number of nonzeros in the various block rows of the 2075 off-diagonal portion of the local submatrix (possibly different for 2076 each block row) or NULL. 2077 2078 Output Parameter: 2079 . A - the matrix 2080 2081 Options Database Keys: 2082 . -mat_no_unroll - uses code that does not unroll the loops in the 2083 block calculations (much slower) 2084 . -mat_block_size - size of the blocks to use 2085 . -mat_mpi - use the parallel matrix data structures even on one processor 2086 (defaults to using SeqBAIJ format on one processor) 2087 2088 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2089 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2090 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2091 2092 Notes: 2093 The number of rows and columns must be divisible by blocksize. 2094 This matrix type does not support complex Hermitian operation. 2095 2096 The user MUST specify either the local or global matrix dimensions 2097 (possibly both). 2098 2099 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2100 than it must be used on all processors that share the object for that argument. 2101 2102 If the *_nnz parameter is given then the *_nz parameter is ignored 2103 2104 Storage Information: 2105 For a square global matrix we define each processor's diagonal portion 2106 to be its local rows and the corresponding columns (a square submatrix); 2107 each processor's off-diagonal portion encompasses the remainder of the 2108 local matrix (a rectangular submatrix). 2109 2110 The user can specify preallocated storage for the diagonal part of 2111 the local submatrix with either d_nz or d_nnz (not both). Set 2112 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2113 memory allocation. Likewise, specify preallocated storage for the 2114 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2115 2116 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2117 the figure below we depict these three local rows and all columns (0-11). 2118 2119 .vb 2120 0 1 2 3 4 5 6 7 8 9 10 11 2121 -------------------------- 2122 row 3 |. . . d d d o o o o o o 2123 row 4 |. . . d d d o o o o o o 2124 row 5 |. . . d d d o o o o o o 2125 -------------------------- 2126 .ve 2127 2128 Thus, any entries in the d locations are stored in the d (diagonal) 2129 submatrix, and any entries in the o locations are stored in the 2130 o (off-diagonal) submatrix. Note that the d matrix is stored in 2131 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2132 2133 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2134 plus the diagonal part of the d matrix, 2135 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2136 In general, for PDE problems in which most nonzeros are near the diagonal, 2137 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2138 or you will get TERRIBLE performance; see the users' manual chapter on 2139 matrices. 2140 2141 Level: intermediate 2142 2143 .keywords: matrix, block, aij, compressed row, sparse, parallel 2144 2145 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2146 @*/ 2147 2148 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2149 { 2150 PetscErrorCode ierr; 2151 PetscMPIInt size; 2152 2153 PetscFunctionBegin; 2154 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2155 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2156 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2157 if (size > 1) { 2158 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2159 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2160 } else { 2161 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2162 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2170 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2171 { 2172 Mat mat; 2173 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2174 PetscErrorCode ierr; 2175 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2176 PetscScalar *array; 2177 2178 PetscFunctionBegin; 2179 *newmat = 0; 2180 2181 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2182 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2183 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2184 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2185 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2186 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2187 2188 mat->factortype = matin->factortype; 2189 mat->preallocated = PETSC_TRUE; 2190 mat->assembled = PETSC_TRUE; 2191 mat->insertmode = NOT_SET_VALUES; 2192 2193 a = (Mat_MPISBAIJ*)mat->data; 2194 a->bs2 = oldmat->bs2; 2195 a->mbs = oldmat->mbs; 2196 a->nbs = oldmat->nbs; 2197 a->Mbs = oldmat->Mbs; 2198 a->Nbs = oldmat->Nbs; 2199 2200 2201 a->size = oldmat->size; 2202 a->rank = oldmat->rank; 2203 a->donotstash = oldmat->donotstash; 2204 a->roworiented = oldmat->roworiented; 2205 a->rowindices = 0; 2206 a->rowvalues = 0; 2207 a->getrowactive = PETSC_FALSE; 2208 a->barray = 0; 2209 a->rstartbs = oldmat->rstartbs; 2210 a->rendbs = oldmat->rendbs; 2211 a->cstartbs = oldmat->cstartbs; 2212 a->cendbs = oldmat->cendbs; 2213 2214 /* hash table stuff */ 2215 a->ht = 0; 2216 a->hd = 0; 2217 a->ht_size = 0; 2218 a->ht_flag = oldmat->ht_flag; 2219 a->ht_fact = oldmat->ht_fact; 2220 a->ht_total_ct = 0; 2221 a->ht_insert_ct = 0; 2222 2223 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2224 if (oldmat->colmap) { 2225 #if defined(PETSC_USE_CTABLE) 2226 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2227 #else 2228 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2229 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2230 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2231 #endif 2232 } else a->colmap = 0; 2233 2234 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2235 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2236 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2237 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2238 } else a->garray = 0; 2239 2240 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2241 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2242 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2243 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2244 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2245 2246 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2247 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2248 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2249 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2250 2251 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2252 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2253 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2254 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2255 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2256 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2257 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2258 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2259 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2260 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2261 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2262 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2263 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2264 2265 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2266 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2267 a->sMvctx = oldmat->sMvctx; 2268 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2269 2270 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2271 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2272 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2273 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2274 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2275 *newmat = mat; 2276 PetscFunctionReturn(0); 2277 } 2278 2279 #undef __FUNCT__ 2280 #define __FUNCT__ "MatLoad_MPISBAIJ" 2281 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2282 { 2283 PetscErrorCode ierr; 2284 PetscInt i,nz,j,rstart,rend; 2285 PetscScalar *vals,*buf; 2286 MPI_Comm comm; 2287 MPI_Status status; 2288 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2289 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2290 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2291 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2292 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2293 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2294 int fd; 2295 2296 PetscFunctionBegin; 2297 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2298 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2299 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2300 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2301 if (bs < 0) bs = 1; 2302 2303 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2304 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2305 if (!rank) { 2306 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2307 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2308 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2309 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2310 } 2311 2312 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2313 2314 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2315 M = header[1]; 2316 N = header[2]; 2317 2318 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2319 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2320 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2321 2322 /* If global sizes are set, check if they are consistent with that given in the file */ 2323 if (sizesset) { 2324 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2325 } 2326 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2327 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2328 2329 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2330 2331 /* 2332 This code adds extra rows to make sure the number of rows is 2333 divisible by the blocksize 2334 */ 2335 Mbs = M/bs; 2336 extra_rows = bs - M + bs*(Mbs); 2337 if (extra_rows == bs) extra_rows = 0; 2338 else Mbs++; 2339 if (extra_rows &&!rank) { 2340 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2341 } 2342 2343 /* determine ownership of all rows */ 2344 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2345 mbs = Mbs/size + ((Mbs % size) > rank); 2346 m = mbs*bs; 2347 } else { /* User Set */ 2348 m = newmat->rmap->n; 2349 mbs = m/bs; 2350 } 2351 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2352 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2353 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2354 rowners[0] = 0; 2355 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2356 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2357 rstart = rowners[rank]; 2358 rend = rowners[rank+1]; 2359 2360 /* distribute row lengths to all processors */ 2361 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2362 if (!rank) { 2363 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2364 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2365 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2366 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2367 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2368 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2369 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2370 } else { 2371 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2372 } 2373 2374 if (!rank) { /* procs[0] */ 2375 /* calculate the number of nonzeros on each processor */ 2376 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2377 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2378 for (i=0; i<size; i++) { 2379 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2380 procsnz[i] += rowlengths[j]; 2381 } 2382 } 2383 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2384 2385 /* determine max buffer needed and allocate it */ 2386 maxnz = 0; 2387 for (i=0; i<size; i++) { 2388 maxnz = PetscMax(maxnz,procsnz[i]); 2389 } 2390 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2391 2392 /* read in my part of the matrix column indices */ 2393 nz = procsnz[0]; 2394 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2395 mycols = ibuf; 2396 if (size == 1) nz -= extra_rows; 2397 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2398 if (size == 1) { 2399 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2400 } 2401 2402 /* read in every ones (except the last) and ship off */ 2403 for (i=1; i<size-1; i++) { 2404 nz = procsnz[i]; 2405 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2406 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2407 } 2408 /* read in the stuff for the last proc */ 2409 if (size != 1) { 2410 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2411 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2412 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2413 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2414 } 2415 ierr = PetscFree(cols);CHKERRQ(ierr); 2416 } else { /* procs[i], i>0 */ 2417 /* determine buffer space needed for message */ 2418 nz = 0; 2419 for (i=0; i<m; i++) nz += locrowlens[i]; 2420 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2421 mycols = ibuf; 2422 /* receive message of column indices*/ 2423 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2424 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2425 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2426 } 2427 2428 /* loop over local rows, determining number of off diagonal entries */ 2429 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2430 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2431 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2432 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2433 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2434 rowcount = 0; 2435 nzcount = 0; 2436 for (i=0; i<mbs; i++) { 2437 dcount = 0; 2438 odcount = 0; 2439 for (j=0; j<bs; j++) { 2440 kmax = locrowlens[rowcount]; 2441 for (k=0; k<kmax; k++) { 2442 tmp = mycols[nzcount++]/bs; /* block col. index */ 2443 if (!mask[tmp]) { 2444 mask[tmp] = 1; 2445 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2446 else masked1[dcount++] = tmp; /* entry in diag portion */ 2447 } 2448 } 2449 rowcount++; 2450 } 2451 2452 dlens[i] = dcount; /* d_nzz[i] */ 2453 odlens[i] = odcount; /* o_nzz[i] */ 2454 2455 /* zero out the mask elements we set */ 2456 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2457 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2458 } 2459 if (!sizesset) { 2460 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2461 } 2462 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2463 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2464 2465 if (!rank) { 2466 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2467 /* read in my part of the matrix numerical values */ 2468 nz = procsnz[0]; 2469 vals = buf; 2470 mycols = ibuf; 2471 if (size == 1) nz -= extra_rows; 2472 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2473 if (size == 1) { 2474 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2475 } 2476 2477 /* insert into matrix */ 2478 jj = rstart*bs; 2479 for (i=0; i<m; i++) { 2480 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2481 mycols += locrowlens[i]; 2482 vals += locrowlens[i]; 2483 jj++; 2484 } 2485 2486 /* read in other processors (except the last one) and ship out */ 2487 for (i=1; i<size-1; i++) { 2488 nz = procsnz[i]; 2489 vals = buf; 2490 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2491 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2492 } 2493 /* the last proc */ 2494 if (size != 1) { 2495 nz = procsnz[i] - extra_rows; 2496 vals = buf; 2497 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2498 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2499 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2500 } 2501 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2502 2503 } else { 2504 /* receive numeric values */ 2505 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2506 2507 /* receive message of values*/ 2508 vals = buf; 2509 mycols = ibuf; 2510 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2511 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2512 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2513 2514 /* insert into matrix */ 2515 jj = rstart*bs; 2516 for (i=0; i<m; i++) { 2517 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2518 mycols += locrowlens[i]; 2519 vals += locrowlens[i]; 2520 jj++; 2521 } 2522 } 2523 2524 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2525 ierr = PetscFree(buf);CHKERRQ(ierr); 2526 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2527 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2528 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2529 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2530 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2531 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2532 PetscFunctionReturn(0); 2533 } 2534 2535 #undef __FUNCT__ 2536 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2537 /*XXXXX@ 2538 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2539 2540 Input Parameters: 2541 . mat - the matrix 2542 . fact - factor 2543 2544 Not Collective on Mat, each process can have a different hash factor 2545 2546 Level: advanced 2547 2548 Notes: 2549 This can also be set by the command line option: -mat_use_hash_table fact 2550 2551 .keywords: matrix, hashtable, factor, HT 2552 2553 .seealso: MatSetOption() 2554 @XXXXX*/ 2555 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2559 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2560 { 2561 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2562 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2563 PetscReal atmp; 2564 PetscReal *work,*svalues,*rvalues; 2565 PetscErrorCode ierr; 2566 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2567 PetscMPIInt rank,size; 2568 PetscInt *rowners_bs,dest,count,source; 2569 PetscScalar *va; 2570 MatScalar *ba; 2571 MPI_Status stat; 2572 2573 PetscFunctionBegin; 2574 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2575 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2576 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2577 2578 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2579 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2580 2581 bs = A->rmap->bs; 2582 mbs = a->mbs; 2583 Mbs = a->Mbs; 2584 ba = b->a; 2585 bi = b->i; 2586 bj = b->j; 2587 2588 /* find ownerships */ 2589 rowners_bs = A->rmap->range; 2590 2591 /* each proc creates an array to be distributed */ 2592 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2593 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2594 2595 /* row_max for B */ 2596 if (rank != size-1) { 2597 for (i=0; i<mbs; i++) { 2598 ncols = bi[1] - bi[0]; bi++; 2599 brow = bs*i; 2600 for (j=0; j<ncols; j++) { 2601 bcol = bs*(*bj); 2602 for (kcol=0; kcol<bs; kcol++) { 2603 col = bcol + kcol; /* local col index */ 2604 col += rowners_bs[rank+1]; /* global col index */ 2605 for (krow=0; krow<bs; krow++) { 2606 atmp = PetscAbsScalar(*ba); ba++; 2607 row = brow + krow; /* local row index */ 2608 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2609 if (work[col] < atmp) work[col] = atmp; 2610 } 2611 } 2612 bj++; 2613 } 2614 } 2615 2616 /* send values to its owners */ 2617 for (dest=rank+1; dest<size; dest++) { 2618 svalues = work + rowners_bs[dest]; 2619 count = rowners_bs[dest+1]-rowners_bs[dest]; 2620 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2621 } 2622 } 2623 2624 /* receive values */ 2625 if (rank) { 2626 rvalues = work; 2627 count = rowners_bs[rank+1]-rowners_bs[rank]; 2628 for (source=0; source<rank; source++) { 2629 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2630 /* process values */ 2631 for (i=0; i<count; i++) { 2632 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2633 } 2634 } 2635 } 2636 2637 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2638 ierr = PetscFree(work);CHKERRQ(ierr); 2639 PetscFunctionReturn(0); 2640 } 2641 2642 #undef __FUNCT__ 2643 #define __FUNCT__ "MatSOR_MPISBAIJ" 2644 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2645 { 2646 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2647 PetscErrorCode ierr; 2648 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2649 PetscScalar *x,*ptr,*from; 2650 Vec bb1; 2651 const PetscScalar *b; 2652 2653 PetscFunctionBegin; 2654 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2655 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2656 2657 if (flag == SOR_APPLY_UPPER) { 2658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2659 PetscFunctionReturn(0); 2660 } 2661 2662 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2663 if (flag & SOR_ZERO_INITIAL_GUESS) { 2664 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2665 its--; 2666 } 2667 2668 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2669 while (its--) { 2670 2671 /* lower triangular part: slvec0b = - B^T*xx */ 2672 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2673 2674 /* copy xx into slvec0a */ 2675 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2676 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2677 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2678 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2679 2680 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2681 2682 /* copy bb into slvec1a */ 2683 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2684 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2685 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2686 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2687 2688 /* set slvec1b = 0 */ 2689 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2690 2691 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2692 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2693 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2694 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2695 2696 /* upper triangular part: bb1 = bb1 - B*x */ 2697 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2698 2699 /* local diagonal sweep */ 2700 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2701 } 2702 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2703 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2704 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2705 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2706 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2707 } else if (flag & SOR_EISENSTAT) { 2708 Vec xx1; 2709 PetscBool hasop; 2710 const PetscScalar *diag; 2711 PetscScalar *sl,scale = (omega - 2.0)/omega; 2712 PetscInt i,n; 2713 2714 if (!mat->xx1) { 2715 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2716 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2717 } 2718 xx1 = mat->xx1; 2719 bb1 = mat->bb1; 2720 2721 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2722 2723 if (!mat->diag) { 2724 /* this is wrong for same matrix with new nonzero values */ 2725 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2726 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2727 } 2728 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2729 2730 if (hasop) { 2731 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2732 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2733 } else { 2734 /* 2735 These two lines are replaced by code that may be a bit faster for a good compiler 2736 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2737 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2738 */ 2739 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2740 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2741 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2742 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2743 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2744 if (omega == 1.0) { 2745 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2746 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2747 } else { 2748 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2749 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2750 } 2751 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2752 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2753 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2754 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2755 } 2756 2757 /* multiply off-diagonal portion of matrix */ 2758 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2759 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2760 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2761 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2762 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2763 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2764 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2765 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2766 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2767 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2768 2769 /* local sweep */ 2770 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2771 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2772 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2773 PetscFunctionReturn(0); 2774 } 2775 2776 #undef __FUNCT__ 2777 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2778 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2779 { 2780 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2781 PetscErrorCode ierr; 2782 Vec lvec1,bb1; 2783 2784 PetscFunctionBegin; 2785 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2786 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2787 2788 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2789 if (flag & SOR_ZERO_INITIAL_GUESS) { 2790 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2791 its--; 2792 } 2793 2794 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2795 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2796 while (its--) { 2797 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2798 2799 /* lower diagonal part: bb1 = bb - B^T*xx */ 2800 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2801 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2802 2803 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2804 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2805 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2806 2807 /* upper diagonal part: bb1 = bb1 - B*x */ 2808 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2809 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2810 2811 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2812 2813 /* diagonal sweep */ 2814 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2815 } 2816 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2817 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2818 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2819 PetscFunctionReturn(0); 2820 } 2821 2822 #undef __FUNCT__ 2823 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2824 /*@ 2825 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2826 CSR format the local rows. 2827 2828 Collective on MPI_Comm 2829 2830 Input Parameters: 2831 + comm - MPI communicator 2832 . bs - the block size, only a block size of 1 is supported 2833 . m - number of local rows (Cannot be PETSC_DECIDE) 2834 . n - This value should be the same as the local size used in creating the 2835 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2836 calculated if N is given) For square matrices n is almost always m. 2837 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2838 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2839 . i - row indices 2840 . j - column indices 2841 - a - matrix values 2842 2843 Output Parameter: 2844 . mat - the matrix 2845 2846 Level: intermediate 2847 2848 Notes: 2849 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2850 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2851 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2852 2853 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2854 2855 .keywords: matrix, aij, compressed row, sparse, parallel 2856 2857 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2858 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2859 @*/ 2860 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2861 { 2862 PetscErrorCode ierr; 2863 2864 2865 PetscFunctionBegin; 2866 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2867 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2868 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2869 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2870 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2871 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2872 PetscFunctionReturn(0); 2873 } 2874 2875 2876 #undef __FUNCT__ 2877 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2878 /*@C 2879 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2880 (the default parallel PETSc format). 2881 2882 Collective on MPI_Comm 2883 2884 Input Parameters: 2885 + B - the matrix 2886 . bs - the block size 2887 . i - the indices into j for the start of each local row (starts with zero) 2888 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2889 - v - optional values in the matrix 2890 2891 Level: developer 2892 2893 .keywords: matrix, aij, compressed row, sparse, parallel 2894 2895 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2896 @*/ 2897 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2898 { 2899 PetscErrorCode ierr; 2900 2901 PetscFunctionBegin; 2902 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2903 PetscFunctionReturn(0); 2904 } 2905 2906 #undef __FUNCT__ 2907 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 2908 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2909 { 2910 PetscErrorCode ierr; 2911 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2912 PetscInt *indx; 2913 PetscScalar *values; 2914 2915 PetscFunctionBegin; 2916 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2917 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2918 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2919 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 2920 PetscInt *bindx,rmax=a->rmax,j; 2921 2922 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2923 mbs = m/bs; Nbs = N/cbs; 2924 if (n == PETSC_DECIDE) { 2925 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 2926 } 2927 /* Check sum(n) = Nbs */ 2928 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2929 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 2930 2931 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2932 rstart -= mbs; 2933 2934 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 2935 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 2936 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2937 for (i=0; i<mbs; i++) { 2938 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 2939 nnz = nnz/bs; 2940 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2941 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 2942 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 2943 } 2944 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2945 ierr = PetscFree(bindx);CHKERRQ(ierr); 2946 2947 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2948 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2949 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 2950 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 2951 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2952 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2953 } 2954 2955 /* numeric phase */ 2956 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2957 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 2958 2959 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2960 for (i=0; i<m; i++) { 2961 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2962 Ii = i + rstart; 2963 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2964 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2965 } 2966 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2967 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2968 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2969 PetscFunctionReturn(0); 2970 } 2971