1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 #if defined(PETSC_HAVE_ELEMENTAL) 23 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 24 #endif 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 28 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 29 { 30 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 31 PetscErrorCode ierr; 32 33 PetscFunctionBegin; 34 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 35 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 36 PetscFunctionReturn(0); 37 } 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 53 { \ 54 \ 55 brow = row/bs; \ 56 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 57 rmax = aimax[brow]; nrow = ailen[brow]; \ 58 bcol = col/bs; \ 59 ridx = row % bs; cidx = col % bs; \ 60 low = 0; high = nrow; \ 61 while (high-low > 3) { \ 62 t = (low+high)/2; \ 63 if (rp[t] > bcol) high = t; \ 64 else low = t; \ 65 } \ 66 for (_i=low; _i<high; _i++) { \ 67 if (rp[_i] > bcol) break; \ 68 if (rp[_i] == bcol) { \ 69 bap = ap + bs2*_i + bs*cidx + ridx; \ 70 if (addv == ADD_VALUES) *bap += value; \ 71 else *bap = value; \ 72 goto a_noinsert; \ 73 } \ 74 } \ 75 if (a->nonew == 1) goto a_noinsert; \ 76 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 77 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 78 N = nrow++ - 1; \ 79 /* shift up all the later entries in this row */ \ 80 for (ii=N; ii>=_i; ii--) { \ 81 rp[ii+1] = rp[ii]; \ 82 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 83 } \ 84 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 85 rp[_i] = bcol; \ 86 ap[bs2*_i + bs*cidx + ridx] = value; \ 87 A->nonzerostate++;\ 88 a_noinsert:; \ 89 ailen[brow] = nrow; \ 90 } 91 92 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 93 { \ 94 brow = row/bs; \ 95 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 96 rmax = bimax[brow]; nrow = bilen[brow]; \ 97 bcol = col/bs; \ 98 ridx = row % bs; cidx = col % bs; \ 99 low = 0; high = nrow; \ 100 while (high-low > 3) { \ 101 t = (low+high)/2; \ 102 if (rp[t] > bcol) high = t; \ 103 else low = t; \ 104 } \ 105 for (_i=low; _i<high; _i++) { \ 106 if (rp[_i] > bcol) break; \ 107 if (rp[_i] == bcol) { \ 108 bap = ap + bs2*_i + bs*cidx + ridx; \ 109 if (addv == ADD_VALUES) *bap += value; \ 110 else *bap = value; \ 111 goto b_noinsert; \ 112 } \ 113 } \ 114 if (b->nonew == 1) goto b_noinsert; \ 115 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 116 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 117 N = nrow++ - 1; \ 118 /* shift up all the later entries in this row */ \ 119 for (ii=N; ii>=_i; ii--) { \ 120 rp[ii+1] = rp[ii]; \ 121 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 122 } \ 123 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 124 rp[_i] = bcol; \ 125 ap[bs2*_i + bs*cidx + ridx] = value; \ 126 B->nonzerostate++;\ 127 b_noinsert:; \ 128 bilen[brow] = nrow; \ 129 } 130 131 /* Only add/insert a(i,j) with i<=j (blocks). 132 Any a(i,j) with i>j input by user is ingored. 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetValues_MPISBAIJ" 136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 137 { 138 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 139 MatScalar value; 140 PetscBool roworiented = baij->roworiented; 141 PetscErrorCode ierr; 142 PetscInt i,j,row,col; 143 PetscInt rstart_orig=mat->rmap->rstart; 144 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 145 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 146 147 /* Some Variables required in the macro */ 148 Mat A = baij->A; 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 150 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 151 MatScalar *aa =a->a; 152 153 Mat B = baij->B; 154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 155 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 156 MatScalar *ba =b->a; 157 158 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 159 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 160 MatScalar *ap,*bap; 161 162 /* for stash */ 163 PetscInt n_loc, *in_loc = NULL; 164 MatScalar *v_loc = NULL; 165 166 PetscFunctionBegin; 167 if (!baij->donotstash) { 168 if (n > baij->n_loc) { 169 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 170 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 171 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 172 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 173 174 baij->n_loc = n; 175 } 176 in_loc = baij->in_loc; 177 v_loc = baij->v_loc; 178 } 179 180 for (i=0; i<m; i++) { 181 if (im[i] < 0) continue; 182 #if defined(PETSC_USE_DEBUG) 183 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 184 #endif 185 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 186 row = im[i] - rstart_orig; /* local row index */ 187 for (j=0; j<n; j++) { 188 if (im[i]/bs > in[j]/bs) { 189 if (a->ignore_ltriangular) { 190 continue; /* ignore lower triangular blocks */ 191 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 192 } 193 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 194 col = in[j] - cstart_orig; /* local col index */ 195 brow = row/bs; bcol = col/bs; 196 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 197 if (roworiented) value = v[i*n+j]; 198 else value = v[i+j*m]; 199 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 200 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 201 } else if (in[j] < 0) continue; 202 #if defined(PETSC_USE_DEBUG) 203 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 204 #endif 205 else { /* off-diag entry (B) */ 206 if (mat->was_assembled) { 207 if (!baij->colmap) { 208 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 209 } 210 #if defined(PETSC_USE_CTABLE) 211 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 212 col = col - 1; 213 #else 214 col = baij->colmap[in[j]/bs] - 1; 215 #endif 216 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 217 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 218 col = in[j]; 219 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 220 B = baij->B; 221 b = (Mat_SeqBAIJ*)(B)->data; 222 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 223 ba = b->a; 224 } else col += in[j]%bs; 225 } else col = in[j]; 226 if (roworiented) value = v[i*n+j]; 227 else value = v[i+j*m]; 228 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } 231 } 232 } else { /* off processor entry */ 233 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 234 if (!baij->donotstash) { 235 mat->assembled = PETSC_FALSE; 236 n_loc = 0; 237 for (j=0; j<n; j++) { 238 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 239 in_loc[n_loc] = in[j]; 240 if (roworiented) { 241 v_loc[n_loc] = v[i*n+j]; 242 } else { 243 v_loc[n_loc] = v[j*m+i]; 244 } 245 n_loc++; 246 } 247 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 248 } 249 } 250 } 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 257 { 258 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 259 const MatScalar *value; 260 MatScalar *barray =baij->barray; 261 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 262 PetscErrorCode ierr; 263 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 264 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 265 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 266 267 PetscFunctionBegin; 268 if (!barray) { 269 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 270 baij->barray = barray; 271 } 272 273 if (roworiented) { 274 stepval = (n-1)*bs; 275 } else { 276 stepval = (m-1)*bs; 277 } 278 for (i=0; i<m; i++) { 279 if (im[i] < 0) continue; 280 #if defined(PETSC_USE_DEBUG) 281 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 282 #endif 283 if (im[i] >= rstart && im[i] < rend) { 284 row = im[i] - rstart; 285 for (j=0; j<n; j++) { 286 if (im[i] > in[j]) { 287 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 288 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 289 } 290 /* If NumCol = 1 then a copy is not required */ 291 if ((roworiented) && (n == 1)) { 292 barray = (MatScalar*) v + i*bs2; 293 } else if ((!roworiented) && (m == 1)) { 294 barray = (MatScalar*) v + j*bs2; 295 } else { /* Here a copy is required */ 296 if (roworiented) { 297 value = v + i*(stepval+bs)*bs + j*bs; 298 } else { 299 value = v + j*(stepval+bs)*bs + i*bs; 300 } 301 for (ii=0; ii<bs; ii++,value+=stepval) { 302 for (jj=0; jj<bs; jj++) { 303 *barray++ = *value++; 304 } 305 } 306 barray -=bs2; 307 } 308 309 if (in[j] >= cstart && in[j] < cend) { 310 col = in[j] - cstart; 311 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 312 } else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined(PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined(PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 348 if (!baij->donotstash) { 349 if (roworiented) { 350 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 351 } else { 352 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 353 } 354 } 355 } 356 } 357 PetscFunctionReturn(0); 358 } 359 360 #undef __FUNCT__ 361 #define __FUNCT__ "MatGetValues_MPISBAIJ" 362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 363 { 364 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 365 PetscErrorCode ierr; 366 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 367 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 368 369 PetscFunctionBegin; 370 for (i=0; i<m; i++) { 371 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 372 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 373 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 374 row = idxm[i] - bsrstart; 375 for (j=0; j<n; j++) { 376 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 377 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 378 if (idxn[j] >= bscstart && idxn[j] < bscend) { 379 col = idxn[j] - bscstart; 380 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 381 } else { 382 if (!baij->colmap) { 383 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 384 } 385 #if defined(PETSC_USE_CTABLE) 386 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 387 data--; 388 #else 389 data = baij->colmap[idxn[j]/bs]-1; 390 #endif 391 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 392 else { 393 col = data + idxn[j]%bs; 394 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 395 } 396 } 397 } 398 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 399 } 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatNorm_MPISBAIJ" 405 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 406 { 407 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscReal sum[2],*lnorm2; 410 411 PetscFunctionBegin; 412 if (baij->size == 1) { 413 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 414 } else { 415 if (type == NORM_FROBENIUS) { 416 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 417 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 418 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 419 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 420 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 421 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 422 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 423 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 424 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 425 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 426 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 427 PetscReal *rsum,*rsum2,vabs; 428 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 429 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 430 MatScalar *v; 431 432 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 433 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 434 /* Amat */ 435 v = amat->a; jj = amat->j; 436 for (brow=0; brow<mbs; brow++) { 437 grow = bs*(rstart + brow); 438 nz = amat->i[brow+1] - amat->i[brow]; 439 for (bcol=0; bcol<nz; bcol++) { 440 gcol = bs*(rstart + *jj); jj++; 441 for (col=0; col<bs; col++) { 442 for (row=0; row<bs; row++) { 443 vabs = PetscAbsScalar(*v); v++; 444 rsum[gcol+col] += vabs; 445 /* non-diagonal block */ 446 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 447 } 448 } 449 } 450 } 451 /* Bmat */ 452 v = bmat->a; jj = bmat->j; 453 for (brow=0; brow<mbs; brow++) { 454 grow = bs*(rstart + brow); 455 nz = bmat->i[brow+1] - bmat->i[brow]; 456 for (bcol=0; bcol<nz; bcol++) { 457 gcol = bs*garray[*jj]; jj++; 458 for (col=0; col<bs; col++) { 459 for (row=0; row<bs; row++) { 460 vabs = PetscAbsScalar(*v); v++; 461 rsum[gcol+col] += vabs; 462 rsum[grow+row] += vabs; 463 } 464 } 465 } 466 } 467 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 468 *norm = 0.0; 469 for (col=0; col<mat->cmap->N; col++) { 470 if (rsum2[col] > *norm) *norm = rsum2[col]; 471 } 472 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 473 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 474 } 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 480 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 481 { 482 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 483 PetscErrorCode ierr; 484 PetscInt nstash,reallocs; 485 InsertMode addv; 486 487 PetscFunctionBegin; 488 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 489 490 /* make sure all processors are either in INSERTMODE or ADDMODE */ 491 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 492 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 493 mat->insertmode = addv; /* in case this processor had no cache */ 494 495 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 496 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 497 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 498 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 499 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 500 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 506 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 507 { 508 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 509 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 510 PetscErrorCode ierr; 511 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 512 PetscInt *row,*col; 513 PetscBool other_disassembled; 514 PetscMPIInt n; 515 PetscBool r1,r2,r3; 516 MatScalar *val; 517 InsertMode addv = mat->insertmode; 518 519 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 520 PetscFunctionBegin; 521 if (!baij->donotstash && !mat->nooffprocentries) { 522 while (1) { 523 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 524 if (!flg) break; 525 526 for (i=0; i<n;) { 527 /* Now identify the consecutive vals belonging to the same row */ 528 for (j=i,rstart=row[j]; j<n; j++) { 529 if (row[j] != rstart) break; 530 } 531 if (j < n) ncols = j-i; 532 else ncols = n-i; 533 /* Now assemble all these values with a single function call */ 534 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 535 i = j; 536 } 537 } 538 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 539 /* Now process the block-stash. Since the values are stashed column-oriented, 540 set the roworiented flag to column oriented, and after MatSetValues() 541 restore the original flags */ 542 r1 = baij->roworiented; 543 r2 = a->roworiented; 544 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 545 546 baij->roworiented = PETSC_FALSE; 547 a->roworiented = PETSC_FALSE; 548 549 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 550 while (1) { 551 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 552 if (!flg) break; 553 554 for (i=0; i<n;) { 555 /* Now identify the consecutive vals belonging to the same row */ 556 for (j=i,rstart=row[j]; j<n; j++) { 557 if (row[j] != rstart) break; 558 } 559 if (j < n) ncols = j-i; 560 else ncols = n-i; 561 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 562 i = j; 563 } 564 } 565 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 566 567 baij->roworiented = r1; 568 a->roworiented = r2; 569 570 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 571 } 572 573 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 575 576 /* determine if any processor has disassembled, if so we must 577 also disassemble ourselfs, in order that we may reassemble. */ 578 /* 579 if nonzero structure of submatrix B cannot change then we know that 580 no processor disassembled thus we can skip this stuff 581 */ 582 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 583 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 584 if (mat->was_assembled && !other_disassembled) { 585 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 586 } 587 } 588 589 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 590 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 591 } 592 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 593 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 594 595 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 596 597 baij->rowvalues = 0; 598 599 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 600 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 601 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 602 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 608 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 609 #include <petscdraw.h> 610 #undef __FUNCT__ 611 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 612 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 613 { 614 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 615 PetscErrorCode ierr; 616 PetscInt bs = mat->rmap->bs; 617 PetscMPIInt rank = baij->rank; 618 PetscBool iascii,isdraw; 619 PetscViewer sviewer; 620 PetscViewerFormat format; 621 622 PetscFunctionBegin; 623 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 624 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 625 if (iascii) { 626 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 627 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 628 MatInfo info; 629 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 630 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 631 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 632 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 633 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 634 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 635 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 636 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 637 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 638 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 639 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 640 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } else if (format == PETSC_VIEWER_ASCII_INFO) { 643 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 646 PetscFunctionReturn(0); 647 } 648 } 649 650 if (isdraw) { 651 PetscDraw draw; 652 PetscBool isnull; 653 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 654 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 655 } 656 657 { 658 /* assemble the entire matrix onto first processor. */ 659 Mat A; 660 Mat_SeqSBAIJ *Aloc; 661 Mat_SeqBAIJ *Bloc; 662 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 663 MatScalar *a; 664 665 /* Should this be the same type as mat? */ 666 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 667 if (!rank) { 668 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 669 } else { 670 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 671 } 672 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 673 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 674 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 675 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 676 677 /* copy over the A part */ 678 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 679 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 680 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 681 682 for (i=0; i<mbs; i++) { 683 rvals[0] = bs*(baij->rstartbs + i); 684 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 685 for (j=ai[i]; j<ai[i+1]; j++) { 686 col = (baij->cstartbs+aj[j])*bs; 687 for (k=0; k<bs; k++) { 688 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 689 col++; 690 a += bs; 691 } 692 } 693 } 694 /* copy over the B part */ 695 Bloc = (Mat_SeqBAIJ*)baij->B->data; 696 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 697 for (i=0; i<mbs; i++) { 698 699 rvals[0] = bs*(baij->rstartbs + i); 700 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 701 for (j=ai[i]; j<ai[i+1]; j++) { 702 col = baij->garray[aj[j]]*bs; 703 for (k=0; k<bs; k++) { 704 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 705 col++; 706 a += bs; 707 } 708 } 709 } 710 ierr = PetscFree(rvals);CHKERRQ(ierr); 711 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 712 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 713 /* 714 Everyone has to call to draw the matrix since the graphics waits are 715 synchronized across all processors that share the PetscDraw object 716 */ 717 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 718 if (!rank) { 719 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 720 } 721 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 722 ierr = MatDestroy(&A);CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatView_MPISBAIJ" 729 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 730 { 731 PetscErrorCode ierr; 732 PetscBool iascii,isdraw,issocket,isbinary; 733 734 PetscFunctionBegin; 735 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 736 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 737 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 738 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 739 if (iascii || isdraw || issocket || isbinary) { 740 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 741 } 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatDestroy_MPISBAIJ" 747 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 748 { 749 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 750 PetscErrorCode ierr; 751 752 PetscFunctionBegin; 753 #if defined(PETSC_USE_LOG) 754 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 755 #endif 756 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 757 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 758 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 759 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 760 #if defined(PETSC_USE_CTABLE) 761 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 762 #else 763 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 764 #endif 765 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 766 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 767 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 768 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 769 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 770 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 771 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 772 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 773 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 774 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 775 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 776 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 777 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 778 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 779 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 780 #if defined(PETSC_USE_REAL_MAT_SINGLE) 781 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 782 #endif 783 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 784 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 785 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 786 ierr = PetscFree(mat->data);CHKERRQ(ierr); 787 788 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 792 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 793 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 794 #if defined(PETSC_HAVE_ELEMENTAL) 795 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 796 #endif 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 802 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 803 { 804 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 805 PetscErrorCode ierr; 806 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 807 PetscScalar *x,*from; 808 809 PetscFunctionBegin; 810 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 811 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 812 813 /* diagonal part */ 814 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 815 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 816 817 /* subdiagonal part */ 818 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 819 820 /* copy x into the vec slvec0 */ 821 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 822 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 823 824 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 825 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 826 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 827 828 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 829 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 830 /* supperdiagonal part */ 831 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 832 PetscFunctionReturn(0); 833 } 834 835 #undef __FUNCT__ 836 #define __FUNCT__ "MatMult_MPISBAIJ" 837 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 838 { 839 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 840 PetscErrorCode ierr; 841 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 842 PetscScalar *x,*from; 843 844 PetscFunctionBegin; 845 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 846 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 847 848 /* diagonal part */ 849 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 850 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 851 852 /* subdiagonal part */ 853 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 854 855 /* copy x into the vec slvec0 */ 856 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 857 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 858 859 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 860 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 861 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 862 863 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 864 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 865 /* supperdiagonal part */ 866 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 872 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 873 { 874 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 875 PetscErrorCode ierr; 876 PetscInt nt; 877 878 PetscFunctionBegin; 879 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 880 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 881 882 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 883 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 884 885 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 /* do diagonal part */ 887 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 888 /* do supperdiagonal part */ 889 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 891 /* do subdiagonal part */ 892 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 893 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 894 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 895 PetscFunctionReturn(0); 896 } 897 898 #undef __FUNCT__ 899 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 900 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 901 { 902 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 903 PetscErrorCode ierr; 904 PetscInt mbs=a->mbs,bs=A->rmap->bs; 905 PetscScalar *x,*from,zero=0.0; 906 907 PetscFunctionBegin; 908 /* 909 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 910 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 911 */ 912 /* diagonal part */ 913 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 914 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 915 916 /* subdiagonal part */ 917 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 918 919 /* copy x into the vec slvec0 */ 920 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 921 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 922 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 923 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 924 925 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 926 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 927 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 929 /* supperdiagonal part */ 930 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 936 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 937 { 938 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 /* do diagonal part */ 944 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 945 /* do supperdiagonal part */ 946 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 948 949 /* do subdiagonal part */ 950 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 951 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 952 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } 955 956 /* 957 This only works correctly for square matrices where the subblock A->A is the 958 diagonal block 959 */ 960 #undef __FUNCT__ 961 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 962 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 963 { 964 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 965 PetscErrorCode ierr; 966 967 PetscFunctionBegin; 968 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 969 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 973 #undef __FUNCT__ 974 #define __FUNCT__ "MatScale_MPISBAIJ" 975 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 976 { 977 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 978 PetscErrorCode ierr; 979 980 PetscFunctionBegin; 981 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 982 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } 985 986 #undef __FUNCT__ 987 #define __FUNCT__ "MatGetRow_MPISBAIJ" 988 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 989 { 990 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 991 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 992 PetscErrorCode ierr; 993 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 994 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 995 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 996 997 PetscFunctionBegin; 998 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 999 mat->getrowactive = PETSC_TRUE; 1000 1001 if (!mat->rowvalues && (idx || v)) { 1002 /* 1003 allocate enough space to hold information from the longest row. 1004 */ 1005 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1006 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1007 PetscInt max = 1,mbs = mat->mbs,tmp; 1008 for (i=0; i<mbs; i++) { 1009 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1010 if (max < tmp) max = tmp; 1011 } 1012 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1013 } 1014 1015 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1016 lrow = row - brstart; /* local row index */ 1017 1018 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1019 if (!v) {pvA = 0; pvB = 0;} 1020 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1021 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1022 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1023 nztot = nzA + nzB; 1024 1025 cmap = mat->garray; 1026 if (v || idx) { 1027 if (nztot) { 1028 /* Sort by increasing column numbers, assuming A and B already sorted */ 1029 PetscInt imark = -1; 1030 if (v) { 1031 *v = v_p = mat->rowvalues; 1032 for (i=0; i<nzB; i++) { 1033 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1034 else break; 1035 } 1036 imark = i; 1037 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1038 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1039 } 1040 if (idx) { 1041 *idx = idx_p = mat->rowindices; 1042 if (imark > -1) { 1043 for (i=0; i<imark; i++) { 1044 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1045 } 1046 } else { 1047 for (i=0; i<nzB; i++) { 1048 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1049 else break; 1050 } 1051 imark = i; 1052 } 1053 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1054 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1055 } 1056 } else { 1057 if (idx) *idx = 0; 1058 if (v) *v = 0; 1059 } 1060 } 1061 *nz = nztot; 1062 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1063 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1069 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1070 { 1071 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1072 1073 PetscFunctionBegin; 1074 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1075 baij->getrowactive = PETSC_FALSE; 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1081 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1082 { 1083 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1084 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1085 1086 PetscFunctionBegin; 1087 aA->getrow_utriangular = PETSC_TRUE; 1088 PetscFunctionReturn(0); 1089 } 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1092 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1093 { 1094 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1095 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1096 1097 PetscFunctionBegin; 1098 aA->getrow_utriangular = PETSC_FALSE; 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1104 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1105 { 1106 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1111 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1117 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1118 { 1119 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1120 PetscErrorCode ierr; 1121 1122 PetscFunctionBegin; 1123 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1124 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1130 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1131 { 1132 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 1135 PetscFunctionBegin; 1136 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1137 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1143 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1144 { 1145 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1146 Mat A = a->A,B = a->B; 1147 PetscErrorCode ierr; 1148 PetscReal isend[5],irecv[5]; 1149 1150 PetscFunctionBegin; 1151 info->block_size = (PetscReal)matin->rmap->bs; 1152 1153 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1154 1155 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1156 isend[3] = info->memory; isend[4] = info->mallocs; 1157 1158 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1159 1160 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1161 isend[3] += info->memory; isend[4] += info->mallocs; 1162 if (flag == MAT_LOCAL) { 1163 info->nz_used = isend[0]; 1164 info->nz_allocated = isend[1]; 1165 info->nz_unneeded = isend[2]; 1166 info->memory = isend[3]; 1167 info->mallocs = isend[4]; 1168 } else if (flag == MAT_GLOBAL_MAX) { 1169 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1170 1171 info->nz_used = irecv[0]; 1172 info->nz_allocated = irecv[1]; 1173 info->nz_unneeded = irecv[2]; 1174 info->memory = irecv[3]; 1175 info->mallocs = irecv[4]; 1176 } else if (flag == MAT_GLOBAL_SUM) { 1177 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1178 1179 info->nz_used = irecv[0]; 1180 info->nz_allocated = irecv[1]; 1181 info->nz_unneeded = irecv[2]; 1182 info->memory = irecv[3]; 1183 info->mallocs = irecv[4]; 1184 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1185 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1186 info->fill_ratio_needed = 0; 1187 info->factor_mallocs = 0; 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1193 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1194 { 1195 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1196 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 switch (op) { 1201 case MAT_NEW_NONZERO_LOCATIONS: 1202 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1203 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1204 case MAT_KEEP_NONZERO_PATTERN: 1205 case MAT_NEW_NONZERO_LOCATION_ERR: 1206 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1207 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1208 break; 1209 case MAT_ROW_ORIENTED: 1210 a->roworiented = flg; 1211 1212 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1213 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1214 break; 1215 case MAT_NEW_DIAGONALS: 1216 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1217 break; 1218 case MAT_IGNORE_OFF_PROC_ENTRIES: 1219 a->donotstash = flg; 1220 break; 1221 case MAT_USE_HASH_TABLE: 1222 a->ht_flag = flg; 1223 break; 1224 case MAT_HERMITIAN: 1225 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1226 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1227 1228 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1229 break; 1230 case MAT_SPD: 1231 A->spd_set = PETSC_TRUE; 1232 A->spd = flg; 1233 if (flg) { 1234 A->symmetric = PETSC_TRUE; 1235 A->structurally_symmetric = PETSC_TRUE; 1236 A->symmetric_set = PETSC_TRUE; 1237 A->structurally_symmetric_set = PETSC_TRUE; 1238 } 1239 break; 1240 case MAT_SYMMETRIC: 1241 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1242 break; 1243 case MAT_STRUCTURALLY_SYMMETRIC: 1244 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1245 break; 1246 case MAT_SYMMETRY_ETERNAL: 1247 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1248 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1249 break; 1250 case MAT_IGNORE_LOWER_TRIANGULAR: 1251 aA->ignore_ltriangular = flg; 1252 break; 1253 case MAT_ERROR_LOWER_TRIANGULAR: 1254 aA->ignore_ltriangular = flg; 1255 break; 1256 case MAT_GETROW_UPPERTRIANGULAR: 1257 aA->getrow_utriangular = flg; 1258 break; 1259 default: 1260 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1261 } 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1267 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1268 { 1269 PetscErrorCode ierr; 1270 1271 PetscFunctionBegin; 1272 if (MAT_INITIAL_MATRIX || *B != A) { 1273 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1274 } 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1280 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1281 { 1282 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1283 Mat a = baij->A, b=baij->B; 1284 PetscErrorCode ierr; 1285 PetscInt nv,m,n; 1286 PetscBool flg; 1287 1288 PetscFunctionBegin; 1289 if (ll != rr) { 1290 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1291 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1292 } 1293 if (!ll) PetscFunctionReturn(0); 1294 1295 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1296 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1297 1298 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1299 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1300 1301 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1302 1303 /* left diagonalscale the off-diagonal part */ 1304 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1305 1306 /* scale the diagonal part */ 1307 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1308 1309 /* right diagonalscale the off-diagonal part */ 1310 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1311 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1317 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1318 { 1319 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatEqual_MPISBAIJ" 1331 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1332 { 1333 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1334 Mat a,b,c,d; 1335 PetscBool flg; 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 a = matA->A; b = matA->B; 1340 c = matB->A; d = matB->B; 1341 1342 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1343 if (flg) { 1344 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1345 } 1346 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatCopy_MPISBAIJ" 1352 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1353 { 1354 PetscErrorCode ierr; 1355 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1356 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1357 1358 PetscFunctionBegin; 1359 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1360 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1361 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1362 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1363 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1364 } else { 1365 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1366 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1367 } 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1373 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1374 { 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 1382 #undef __FUNCT__ 1383 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1384 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1385 { 1386 PetscErrorCode ierr; 1387 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1388 PetscBLASInt bnz,one=1; 1389 Mat_SeqSBAIJ *xa,*ya; 1390 Mat_SeqBAIJ *xb,*yb; 1391 1392 PetscFunctionBegin; 1393 if (str == SAME_NONZERO_PATTERN) { 1394 PetscScalar alpha = a; 1395 xa = (Mat_SeqSBAIJ*)xx->A->data; 1396 ya = (Mat_SeqSBAIJ*)yy->A->data; 1397 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1398 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1399 xb = (Mat_SeqBAIJ*)xx->B->data; 1400 yb = (Mat_SeqBAIJ*)yy->B->data; 1401 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1402 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1403 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1404 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1405 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1406 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1407 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1408 } else { 1409 Mat B; 1410 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1411 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1412 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1413 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1414 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1415 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1416 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1417 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1418 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1419 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1420 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1421 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1422 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1423 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1424 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1425 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1426 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1427 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1428 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1429 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1430 } 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1436 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1437 { 1438 PetscErrorCode ierr; 1439 PetscInt i; 1440 PetscBool flg; 1441 1442 PetscFunctionBegin; 1443 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1444 for (i=0; i<n; i++) { 1445 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1446 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1447 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1448 } 1449 } 1450 PetscFunctionReturn(0); 1451 } 1452 1453 /* -------------------------------------------------------------------*/ 1454 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1455 MatGetRow_MPISBAIJ, 1456 MatRestoreRow_MPISBAIJ, 1457 MatMult_MPISBAIJ, 1458 /* 4*/ MatMultAdd_MPISBAIJ, 1459 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1460 MatMultAdd_MPISBAIJ, 1461 0, 1462 0, 1463 0, 1464 /* 10*/ 0, 1465 0, 1466 0, 1467 MatSOR_MPISBAIJ, 1468 MatTranspose_MPISBAIJ, 1469 /* 15*/ MatGetInfo_MPISBAIJ, 1470 MatEqual_MPISBAIJ, 1471 MatGetDiagonal_MPISBAIJ, 1472 MatDiagonalScale_MPISBAIJ, 1473 MatNorm_MPISBAIJ, 1474 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1475 MatAssemblyEnd_MPISBAIJ, 1476 MatSetOption_MPISBAIJ, 1477 MatZeroEntries_MPISBAIJ, 1478 /* 24*/ 0, 1479 0, 1480 0, 1481 0, 1482 0, 1483 /* 29*/ MatSetUp_MPISBAIJ, 1484 0, 1485 0, 1486 0, 1487 0, 1488 /* 34*/ MatDuplicate_MPISBAIJ, 1489 0, 1490 0, 1491 0, 1492 0, 1493 /* 39*/ MatAXPY_MPISBAIJ, 1494 MatGetSubMatrices_MPISBAIJ, 1495 MatIncreaseOverlap_MPISBAIJ, 1496 MatGetValues_MPISBAIJ, 1497 MatCopy_MPISBAIJ, 1498 /* 44*/ 0, 1499 MatScale_MPISBAIJ, 1500 0, 1501 0, 1502 0, 1503 /* 49*/ 0, 1504 0, 1505 0, 1506 0, 1507 0, 1508 /* 54*/ 0, 1509 0, 1510 MatSetUnfactored_MPISBAIJ, 1511 0, 1512 MatSetValuesBlocked_MPISBAIJ, 1513 /* 59*/ 0, 1514 0, 1515 0, 1516 0, 1517 0, 1518 /* 64*/ 0, 1519 0, 1520 0, 1521 0, 1522 0, 1523 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1524 0, 1525 0, 1526 0, 1527 0, 1528 /* 74*/ 0, 1529 0, 1530 0, 1531 0, 1532 0, 1533 /* 79*/ 0, 1534 0, 1535 0, 1536 0, 1537 MatLoad_MPISBAIJ, 1538 /* 84*/ 0, 1539 0, 1540 0, 1541 0, 1542 0, 1543 /* 89*/ 0, 1544 0, 1545 0, 1546 0, 1547 0, 1548 /* 94*/ 0, 1549 0, 1550 0, 1551 0, 1552 0, 1553 /* 99*/ 0, 1554 0, 1555 0, 1556 0, 1557 0, 1558 /*104*/ 0, 1559 MatRealPart_MPISBAIJ, 1560 MatImaginaryPart_MPISBAIJ, 1561 MatGetRowUpperTriangular_MPISBAIJ, 1562 MatRestoreRowUpperTriangular_MPISBAIJ, 1563 /*109*/ 0, 1564 0, 1565 0, 1566 0, 1567 0, 1568 /*114*/ 0, 1569 0, 1570 0, 1571 0, 1572 0, 1573 /*119*/ 0, 1574 0, 1575 0, 1576 0, 1577 0, 1578 /*124*/ 0, 1579 0, 1580 0, 1581 0, 1582 0, 1583 /*129*/ 0, 1584 0, 1585 0, 1586 0, 1587 0, 1588 /*134*/ 0, 1589 0, 1590 0, 1591 0, 1592 0, 1593 /*139*/ 0, 1594 0, 1595 0, 1596 0, 1597 0, 1598 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1599 }; 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1603 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1604 { 1605 PetscFunctionBegin; 1606 *a = ((Mat_MPISBAIJ*)A->data)->A; 1607 PetscFunctionReturn(0); 1608 } 1609 1610 #undef __FUNCT__ 1611 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1612 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1613 { 1614 Mat_MPISBAIJ *b; 1615 PetscErrorCode ierr; 1616 PetscInt i,mbs,Mbs; 1617 1618 PetscFunctionBegin; 1619 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1620 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1621 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1622 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1623 1624 b = (Mat_MPISBAIJ*)B->data; 1625 mbs = B->rmap->n/bs; 1626 Mbs = B->rmap->N/bs; 1627 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1628 1629 B->rmap->bs = bs; 1630 b->bs2 = bs*bs; 1631 b->mbs = mbs; 1632 b->Mbs = Mbs; 1633 b->nbs = B->cmap->n/bs; 1634 b->Nbs = B->cmap->N/bs; 1635 1636 for (i=0; i<=b->size; i++) { 1637 b->rangebs[i] = B->rmap->range[i]/bs; 1638 } 1639 b->rstartbs = B->rmap->rstart/bs; 1640 b->rendbs = B->rmap->rend/bs; 1641 1642 b->cstartbs = B->cmap->rstart/bs; 1643 b->cendbs = B->cmap->rend/bs; 1644 1645 if (!B->preallocated) { 1646 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1647 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1648 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1649 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1650 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1651 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1652 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1653 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1654 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1655 } 1656 1657 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1658 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1659 1660 B->preallocated = PETSC_TRUE; 1661 PetscFunctionReturn(0); 1662 } 1663 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1666 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1667 { 1668 PetscInt m,rstart,cstart,cend; 1669 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1670 const PetscInt *JJ =0; 1671 PetscScalar *values=0; 1672 PetscErrorCode ierr; 1673 1674 PetscFunctionBegin; 1675 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1676 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1677 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1678 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1679 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1680 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1681 m = B->rmap->n/bs; 1682 rstart = B->rmap->rstart/bs; 1683 cstart = B->cmap->rstart/bs; 1684 cend = B->cmap->rend/bs; 1685 1686 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1687 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1688 for (i=0; i<m; i++) { 1689 nz = ii[i+1] - ii[i]; 1690 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1691 nz_max = PetscMax(nz_max,nz); 1692 JJ = jj + ii[i]; 1693 for (j=0; j<nz; j++) { 1694 if (*JJ >= cstart) break; 1695 JJ++; 1696 } 1697 d = 0; 1698 for (; j<nz; j++) { 1699 if (*JJ++ >= cend) break; 1700 d++; 1701 } 1702 d_nnz[i] = d; 1703 o_nnz[i] = nz - d; 1704 } 1705 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1706 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1707 1708 values = (PetscScalar*)V; 1709 if (!values) { 1710 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 1711 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1712 } 1713 for (i=0; i<m; i++) { 1714 PetscInt row = i + rstart; 1715 PetscInt ncols = ii[i+1] - ii[i]; 1716 const PetscInt *icols = jj + ii[i]; 1717 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1718 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1719 } 1720 1721 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1722 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1723 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1724 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1725 PetscFunctionReturn(0); 1726 } 1727 1728 /*MC 1729 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1730 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1731 the matrix is stored. 1732 1733 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1734 can call MatSetOption(Mat, MAT_HERMITIAN); 1735 1736 Options Database Keys: 1737 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1738 1739 Level: beginner 1740 1741 .seealso: MatCreateMPISBAIJ 1742 M*/ 1743 1744 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatCreate_MPISBAIJ" 1748 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1749 { 1750 Mat_MPISBAIJ *b; 1751 PetscErrorCode ierr; 1752 PetscBool flg = PETSC_FALSE; 1753 1754 PetscFunctionBegin; 1755 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1756 B->data = (void*)b; 1757 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1758 1759 B->ops->destroy = MatDestroy_MPISBAIJ; 1760 B->ops->view = MatView_MPISBAIJ; 1761 B->assembled = PETSC_FALSE; 1762 B->insertmode = NOT_SET_VALUES; 1763 1764 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 1765 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 1766 1767 /* build local table of row and column ownerships */ 1768 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 1769 1770 /* build cache for off array entries formed */ 1771 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 1772 1773 b->donotstash = PETSC_FALSE; 1774 b->colmap = NULL; 1775 b->garray = NULL; 1776 b->roworiented = PETSC_TRUE; 1777 1778 /* stuff used in block assembly */ 1779 b->barray = 0; 1780 1781 /* stuff used for matrix vector multiply */ 1782 b->lvec = 0; 1783 b->Mvctx = 0; 1784 b->slvec0 = 0; 1785 b->slvec0b = 0; 1786 b->slvec1 = 0; 1787 b->slvec1a = 0; 1788 b->slvec1b = 0; 1789 b->sMvctx = 0; 1790 1791 /* stuff for MatGetRow() */ 1792 b->rowindices = 0; 1793 b->rowvalues = 0; 1794 b->getrowactive = PETSC_FALSE; 1795 1796 /* hash table stuff */ 1797 b->ht = 0; 1798 b->hd = 0; 1799 b->ht_size = 0; 1800 b->ht_flag = PETSC_FALSE; 1801 b->ht_fact = 0; 1802 b->ht_total_ct = 0; 1803 b->ht_insert_ct = 0; 1804 1805 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1806 b->ijonly = PETSC_FALSE; 1807 1808 b->in_loc = 0; 1809 b->v_loc = 0; 1810 b->n_loc = 0; 1811 1812 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1813 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1814 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1815 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1816 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1817 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1818 #if defined(PETSC_HAVE_ELEMENTAL) 1819 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 1820 #endif 1821 1822 B->symmetric = PETSC_TRUE; 1823 B->structurally_symmetric = PETSC_TRUE; 1824 B->symmetric_set = PETSC_TRUE; 1825 B->structurally_symmetric_set = PETSC_TRUE; 1826 1827 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1828 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1829 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 1830 if (flg) { 1831 PetscReal fact = 1.39; 1832 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1833 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 1834 if (fact <= 1.0) fact = 1.39; 1835 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1836 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1837 } 1838 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1839 PetscFunctionReturn(0); 1840 } 1841 1842 /*MC 1843 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1844 1845 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1846 and MATMPISBAIJ otherwise. 1847 1848 Options Database Keys: 1849 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1850 1851 Level: beginner 1852 1853 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1854 M*/ 1855 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1858 /*@C 1859 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1860 the user should preallocate the matrix storage by setting the parameters 1861 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1862 performance can be increased by more than a factor of 50. 1863 1864 Collective on Mat 1865 1866 Input Parameters: 1867 + B - the matrix 1868 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 1869 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 1870 . d_nz - number of block nonzeros per block row in diagonal portion of local 1871 submatrix (same for all local rows) 1872 . d_nnz - array containing the number of block nonzeros in the various block rows 1873 in the upper triangular and diagonal part of the in diagonal portion of the local 1874 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 1875 for the diagonal entry and set a value even if it is zero. 1876 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1877 submatrix (same for all local rows). 1878 - o_nnz - array containing the number of nonzeros in the various block rows of the 1879 off-diagonal portion of the local submatrix that is right of the diagonal 1880 (possibly different for each block row) or NULL. 1881 1882 1883 Options Database Keys: 1884 . -mat_no_unroll - uses code that does not unroll the loops in the 1885 block calculations (much slower) 1886 . -mat_block_size - size of the blocks to use 1887 1888 Notes: 1889 1890 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1891 than it must be used on all processors that share the object for that argument. 1892 1893 If the *_nnz parameter is given then the *_nz parameter is ignored 1894 1895 Storage Information: 1896 For a square global matrix we define each processor's diagonal portion 1897 to be its local rows and the corresponding columns (a square submatrix); 1898 each processor's off-diagonal portion encompasses the remainder of the 1899 local matrix (a rectangular submatrix). 1900 1901 The user can specify preallocated storage for the diagonal part of 1902 the local submatrix with either d_nz or d_nnz (not both). Set 1903 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 1904 memory allocation. Likewise, specify preallocated storage for the 1905 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1906 1907 You can call MatGetInfo() to get information on how effective the preallocation was; 1908 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1909 You can also run with the option -info and look for messages with the string 1910 malloc in them to see if additional memory allocation was needed. 1911 1912 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1913 the figure below we depict these three local rows and all columns (0-11). 1914 1915 .vb 1916 0 1 2 3 4 5 6 7 8 9 10 11 1917 -------------------------- 1918 row 3 |. . . d d d o o o o o o 1919 row 4 |. . . d d d o o o o o o 1920 row 5 |. . . d d d o o o o o o 1921 -------------------------- 1922 .ve 1923 1924 Thus, any entries in the d locations are stored in the d (diagonal) 1925 submatrix, and any entries in the o locations are stored in the 1926 o (off-diagonal) submatrix. Note that the d matrix is stored in 1927 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1928 1929 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1930 plus the diagonal part of the d matrix, 1931 and o_nz should indicate the number of block nonzeros per row in the o matrix 1932 1933 In general, for PDE problems in which most nonzeros are near the diagonal, 1934 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1935 or you will get TERRIBLE performance; see the users' manual chapter on 1936 matrices. 1937 1938 Level: intermediate 1939 1940 .keywords: matrix, block, aij, compressed row, sparse, parallel 1941 1942 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 1943 @*/ 1944 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1945 { 1946 PetscErrorCode ierr; 1947 1948 PetscFunctionBegin; 1949 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1950 PetscValidType(B,1); 1951 PetscValidLogicalCollectiveInt(B,bs,2); 1952 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 1953 PetscFunctionReturn(0); 1954 } 1955 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "MatCreateSBAIJ" 1958 /*@C 1959 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1960 (block compressed row). For good matrix assembly performance 1961 the user should preallocate the matrix storage by setting the parameters 1962 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1963 performance can be increased by more than a factor of 50. 1964 1965 Collective on MPI_Comm 1966 1967 Input Parameters: 1968 + comm - MPI communicator 1969 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 1970 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 1971 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1972 This value should be the same as the local size used in creating the 1973 y vector for the matrix-vector product y = Ax. 1974 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1975 This value should be the same as the local size used in creating the 1976 x vector for the matrix-vector product y = Ax. 1977 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1978 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1979 . d_nz - number of block nonzeros per block row in diagonal portion of local 1980 submatrix (same for all local rows) 1981 . d_nnz - array containing the number of block nonzeros in the various block rows 1982 in the upper triangular portion of the in diagonal portion of the local 1983 (possibly different for each block block row) or NULL. 1984 If you plan to factor the matrix you must leave room for the diagonal entry and 1985 set its value even if it is zero. 1986 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1987 submatrix (same for all local rows). 1988 - o_nnz - array containing the number of nonzeros in the various block rows of the 1989 off-diagonal portion of the local submatrix (possibly different for 1990 each block row) or NULL. 1991 1992 Output Parameter: 1993 . A - the matrix 1994 1995 Options Database Keys: 1996 . -mat_no_unroll - uses code that does not unroll the loops in the 1997 block calculations (much slower) 1998 . -mat_block_size - size of the blocks to use 1999 . -mat_mpi - use the parallel matrix data structures even on one processor 2000 (defaults to using SeqBAIJ format on one processor) 2001 2002 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2003 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2004 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2005 2006 Notes: 2007 The number of rows and columns must be divisible by blocksize. 2008 This matrix type does not support complex Hermitian operation. 2009 2010 The user MUST specify either the local or global matrix dimensions 2011 (possibly both). 2012 2013 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2014 than it must be used on all processors that share the object for that argument. 2015 2016 If the *_nnz parameter is given then the *_nz parameter is ignored 2017 2018 Storage Information: 2019 For a square global matrix we define each processor's diagonal portion 2020 to be its local rows and the corresponding columns (a square submatrix); 2021 each processor's off-diagonal portion encompasses the remainder of the 2022 local matrix (a rectangular submatrix). 2023 2024 The user can specify preallocated storage for the diagonal part of 2025 the local submatrix with either d_nz or d_nnz (not both). Set 2026 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2027 memory allocation. Likewise, specify preallocated storage for the 2028 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2029 2030 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2031 the figure below we depict these three local rows and all columns (0-11). 2032 2033 .vb 2034 0 1 2 3 4 5 6 7 8 9 10 11 2035 -------------------------- 2036 row 3 |. . . d d d o o o o o o 2037 row 4 |. . . d d d o o o o o o 2038 row 5 |. . . d d d o o o o o o 2039 -------------------------- 2040 .ve 2041 2042 Thus, any entries in the d locations are stored in the d (diagonal) 2043 submatrix, and any entries in the o locations are stored in the 2044 o (off-diagonal) submatrix. Note that the d matrix is stored in 2045 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2046 2047 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2048 plus the diagonal part of the d matrix, 2049 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2050 In general, for PDE problems in which most nonzeros are near the diagonal, 2051 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2052 or you will get TERRIBLE performance; see the users' manual chapter on 2053 matrices. 2054 2055 Level: intermediate 2056 2057 .keywords: matrix, block, aij, compressed row, sparse, parallel 2058 2059 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2060 @*/ 2061 2062 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2063 { 2064 PetscErrorCode ierr; 2065 PetscMPIInt size; 2066 2067 PetscFunctionBegin; 2068 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2069 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2070 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2071 if (size > 1) { 2072 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2073 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2074 } else { 2075 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2076 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2077 } 2078 PetscFunctionReturn(0); 2079 } 2080 2081 2082 #undef __FUNCT__ 2083 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2084 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2085 { 2086 Mat mat; 2087 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2088 PetscErrorCode ierr; 2089 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2090 PetscScalar *array; 2091 2092 PetscFunctionBegin; 2093 *newmat = 0; 2094 2095 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2096 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2097 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2098 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2099 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2100 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2101 2102 mat->factortype = matin->factortype; 2103 mat->preallocated = PETSC_TRUE; 2104 mat->assembled = PETSC_TRUE; 2105 mat->insertmode = NOT_SET_VALUES; 2106 2107 a = (Mat_MPISBAIJ*)mat->data; 2108 a->bs2 = oldmat->bs2; 2109 a->mbs = oldmat->mbs; 2110 a->nbs = oldmat->nbs; 2111 a->Mbs = oldmat->Mbs; 2112 a->Nbs = oldmat->Nbs; 2113 2114 2115 a->size = oldmat->size; 2116 a->rank = oldmat->rank; 2117 a->donotstash = oldmat->donotstash; 2118 a->roworiented = oldmat->roworiented; 2119 a->rowindices = 0; 2120 a->rowvalues = 0; 2121 a->getrowactive = PETSC_FALSE; 2122 a->barray = 0; 2123 a->rstartbs = oldmat->rstartbs; 2124 a->rendbs = oldmat->rendbs; 2125 a->cstartbs = oldmat->cstartbs; 2126 a->cendbs = oldmat->cendbs; 2127 2128 /* hash table stuff */ 2129 a->ht = 0; 2130 a->hd = 0; 2131 a->ht_size = 0; 2132 a->ht_flag = oldmat->ht_flag; 2133 a->ht_fact = oldmat->ht_fact; 2134 a->ht_total_ct = 0; 2135 a->ht_insert_ct = 0; 2136 2137 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2138 if (oldmat->colmap) { 2139 #if defined(PETSC_USE_CTABLE) 2140 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2141 #else 2142 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2143 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2144 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2145 #endif 2146 } else a->colmap = 0; 2147 2148 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2149 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2150 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2151 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2152 } else a->garray = 0; 2153 2154 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2155 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2156 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2157 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2158 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2159 2160 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2161 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2162 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2163 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2164 2165 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2166 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2167 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2168 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2169 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2170 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2171 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2172 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2173 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2174 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2175 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2176 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2177 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2178 2179 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2180 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2181 a->sMvctx = oldmat->sMvctx; 2182 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2183 2184 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2185 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2186 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2187 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2188 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2189 *newmat = mat; 2190 PetscFunctionReturn(0); 2191 } 2192 2193 #undef __FUNCT__ 2194 #define __FUNCT__ "MatLoad_MPISBAIJ" 2195 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2196 { 2197 PetscErrorCode ierr; 2198 PetscInt i,nz,j,rstart,rend; 2199 PetscScalar *vals,*buf; 2200 MPI_Comm comm; 2201 MPI_Status status; 2202 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2203 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2204 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2205 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2206 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2207 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2208 int fd; 2209 2210 PetscFunctionBegin; 2211 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2212 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2213 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2214 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2215 if (bs < 0) bs = 1; 2216 2217 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2218 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2219 if (!rank) { 2220 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2221 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2222 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2223 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2224 } 2225 2226 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2227 2228 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2229 M = header[1]; 2230 N = header[2]; 2231 2232 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2233 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2234 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2235 2236 /* If global sizes are set, check if they are consistent with that given in the file */ 2237 if (sizesset) { 2238 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2239 } 2240 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2241 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2242 2243 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2244 2245 /* 2246 This code adds extra rows to make sure the number of rows is 2247 divisible by the blocksize 2248 */ 2249 Mbs = M/bs; 2250 extra_rows = bs - M + bs*(Mbs); 2251 if (extra_rows == bs) extra_rows = 0; 2252 else Mbs++; 2253 if (extra_rows &&!rank) { 2254 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2255 } 2256 2257 /* determine ownership of all rows */ 2258 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2259 mbs = Mbs/size + ((Mbs % size) > rank); 2260 m = mbs*bs; 2261 } else { /* User Set */ 2262 m = newmat->rmap->n; 2263 mbs = m/bs; 2264 } 2265 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2266 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2267 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2268 rowners[0] = 0; 2269 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2270 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2271 rstart = rowners[rank]; 2272 rend = rowners[rank+1]; 2273 2274 /* distribute row lengths to all processors */ 2275 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2276 if (!rank) { 2277 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2278 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2279 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2280 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2281 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2282 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2283 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2284 } else { 2285 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2286 } 2287 2288 if (!rank) { /* procs[0] */ 2289 /* calculate the number of nonzeros on each processor */ 2290 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2291 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2292 for (i=0; i<size; i++) { 2293 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2294 procsnz[i] += rowlengths[j]; 2295 } 2296 } 2297 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2298 2299 /* determine max buffer needed and allocate it */ 2300 maxnz = 0; 2301 for (i=0; i<size; i++) { 2302 maxnz = PetscMax(maxnz,procsnz[i]); 2303 } 2304 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2305 2306 /* read in my part of the matrix column indices */ 2307 nz = procsnz[0]; 2308 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2309 mycols = ibuf; 2310 if (size == 1) nz -= extra_rows; 2311 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2312 if (size == 1) { 2313 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2314 } 2315 2316 /* read in every ones (except the last) and ship off */ 2317 for (i=1; i<size-1; i++) { 2318 nz = procsnz[i]; 2319 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2320 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2321 } 2322 /* read in the stuff for the last proc */ 2323 if (size != 1) { 2324 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2325 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2326 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2327 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2328 } 2329 ierr = PetscFree(cols);CHKERRQ(ierr); 2330 } else { /* procs[i], i>0 */ 2331 /* determine buffer space needed for message */ 2332 nz = 0; 2333 for (i=0; i<m; i++) nz += locrowlens[i]; 2334 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2335 mycols = ibuf; 2336 /* receive message of column indices*/ 2337 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2338 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2339 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2340 } 2341 2342 /* loop over local rows, determining number of off diagonal entries */ 2343 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2344 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2345 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2346 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2347 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2348 rowcount = 0; 2349 nzcount = 0; 2350 for (i=0; i<mbs; i++) { 2351 dcount = 0; 2352 odcount = 0; 2353 for (j=0; j<bs; j++) { 2354 kmax = locrowlens[rowcount]; 2355 for (k=0; k<kmax; k++) { 2356 tmp = mycols[nzcount++]/bs; /* block col. index */ 2357 if (!mask[tmp]) { 2358 mask[tmp] = 1; 2359 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2360 else masked1[dcount++] = tmp; /* entry in diag portion */ 2361 } 2362 } 2363 rowcount++; 2364 } 2365 2366 dlens[i] = dcount; /* d_nzz[i] */ 2367 odlens[i] = odcount; /* o_nzz[i] */ 2368 2369 /* zero out the mask elements we set */ 2370 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2371 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2372 } 2373 if (!sizesset) { 2374 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2375 } 2376 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2377 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2378 2379 if (!rank) { 2380 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2381 /* read in my part of the matrix numerical values */ 2382 nz = procsnz[0]; 2383 vals = buf; 2384 mycols = ibuf; 2385 if (size == 1) nz -= extra_rows; 2386 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2387 if (size == 1) { 2388 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2389 } 2390 2391 /* insert into matrix */ 2392 jj = rstart*bs; 2393 for (i=0; i<m; i++) { 2394 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2395 mycols += locrowlens[i]; 2396 vals += locrowlens[i]; 2397 jj++; 2398 } 2399 2400 /* read in other processors (except the last one) and ship out */ 2401 for (i=1; i<size-1; i++) { 2402 nz = procsnz[i]; 2403 vals = buf; 2404 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2405 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2406 } 2407 /* the last proc */ 2408 if (size != 1) { 2409 nz = procsnz[i] - extra_rows; 2410 vals = buf; 2411 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2412 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2413 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2414 } 2415 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2416 2417 } else { 2418 /* receive numeric values */ 2419 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2420 2421 /* receive message of values*/ 2422 vals = buf; 2423 mycols = ibuf; 2424 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2425 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2426 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2427 2428 /* insert into matrix */ 2429 jj = rstart*bs; 2430 for (i=0; i<m; i++) { 2431 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2432 mycols += locrowlens[i]; 2433 vals += locrowlens[i]; 2434 jj++; 2435 } 2436 } 2437 2438 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2439 ierr = PetscFree(buf);CHKERRQ(ierr); 2440 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2441 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2442 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2443 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2444 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2445 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2446 PetscFunctionReturn(0); 2447 } 2448 2449 #undef __FUNCT__ 2450 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2451 /*XXXXX@ 2452 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2453 2454 Input Parameters: 2455 . mat - the matrix 2456 . fact - factor 2457 2458 Not Collective on Mat, each process can have a different hash factor 2459 2460 Level: advanced 2461 2462 Notes: 2463 This can also be set by the command line option: -mat_use_hash_table fact 2464 2465 .keywords: matrix, hashtable, factor, HT 2466 2467 .seealso: MatSetOption() 2468 @XXXXX*/ 2469 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2473 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2474 { 2475 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2476 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2477 PetscReal atmp; 2478 PetscReal *work,*svalues,*rvalues; 2479 PetscErrorCode ierr; 2480 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2481 PetscMPIInt rank,size; 2482 PetscInt *rowners_bs,dest,count,source; 2483 PetscScalar *va; 2484 MatScalar *ba; 2485 MPI_Status stat; 2486 2487 PetscFunctionBegin; 2488 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2489 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2490 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2491 2492 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2493 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2494 2495 bs = A->rmap->bs; 2496 mbs = a->mbs; 2497 Mbs = a->Mbs; 2498 ba = b->a; 2499 bi = b->i; 2500 bj = b->j; 2501 2502 /* find ownerships */ 2503 rowners_bs = A->rmap->range; 2504 2505 /* each proc creates an array to be distributed */ 2506 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2507 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2508 2509 /* row_max for B */ 2510 if (rank != size-1) { 2511 for (i=0; i<mbs; i++) { 2512 ncols = bi[1] - bi[0]; bi++; 2513 brow = bs*i; 2514 for (j=0; j<ncols; j++) { 2515 bcol = bs*(*bj); 2516 for (kcol=0; kcol<bs; kcol++) { 2517 col = bcol + kcol; /* local col index */ 2518 col += rowners_bs[rank+1]; /* global col index */ 2519 for (krow=0; krow<bs; krow++) { 2520 atmp = PetscAbsScalar(*ba); ba++; 2521 row = brow + krow; /* local row index */ 2522 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2523 if (work[col] < atmp) work[col] = atmp; 2524 } 2525 } 2526 bj++; 2527 } 2528 } 2529 2530 /* send values to its owners */ 2531 for (dest=rank+1; dest<size; dest++) { 2532 svalues = work + rowners_bs[dest]; 2533 count = rowners_bs[dest+1]-rowners_bs[dest]; 2534 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2535 } 2536 } 2537 2538 /* receive values */ 2539 if (rank) { 2540 rvalues = work; 2541 count = rowners_bs[rank+1]-rowners_bs[rank]; 2542 for (source=0; source<rank; source++) { 2543 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2544 /* process values */ 2545 for (i=0; i<count; i++) { 2546 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2547 } 2548 } 2549 } 2550 2551 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2552 ierr = PetscFree(work);CHKERRQ(ierr); 2553 PetscFunctionReturn(0); 2554 } 2555 2556 #undef __FUNCT__ 2557 #define __FUNCT__ "MatSOR_MPISBAIJ" 2558 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2559 { 2560 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2561 PetscErrorCode ierr; 2562 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2563 PetscScalar *x,*ptr,*from; 2564 Vec bb1; 2565 const PetscScalar *b; 2566 2567 PetscFunctionBegin; 2568 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2569 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2570 2571 if (flag == SOR_APPLY_UPPER) { 2572 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2577 if (flag & SOR_ZERO_INITIAL_GUESS) { 2578 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2579 its--; 2580 } 2581 2582 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2583 while (its--) { 2584 2585 /* lower triangular part: slvec0b = - B^T*xx */ 2586 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2587 2588 /* copy xx into slvec0a */ 2589 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2590 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2591 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2592 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2593 2594 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2595 2596 /* copy bb into slvec1a */ 2597 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2598 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2599 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2600 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2601 2602 /* set slvec1b = 0 */ 2603 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2604 2605 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2606 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2607 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2608 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2609 2610 /* upper triangular part: bb1 = bb1 - B*x */ 2611 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2612 2613 /* local diagonal sweep */ 2614 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2615 } 2616 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2617 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2618 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2619 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2620 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2621 } else if (flag & SOR_EISENSTAT) { 2622 Vec xx1; 2623 PetscBool hasop; 2624 const PetscScalar *diag; 2625 PetscScalar *sl,scale = (omega - 2.0)/omega; 2626 PetscInt i,n; 2627 2628 if (!mat->xx1) { 2629 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2630 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2631 } 2632 xx1 = mat->xx1; 2633 bb1 = mat->bb1; 2634 2635 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2636 2637 if (!mat->diag) { 2638 /* this is wrong for same matrix with new nonzero values */ 2639 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2640 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2641 } 2642 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2643 2644 if (hasop) { 2645 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2646 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2647 } else { 2648 /* 2649 These two lines are replaced by code that may be a bit faster for a good compiler 2650 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2651 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2652 */ 2653 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2654 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2655 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2656 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2657 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2658 if (omega == 1.0) { 2659 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2660 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2661 } else { 2662 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2663 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2664 } 2665 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2666 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2667 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2668 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2669 } 2670 2671 /* multiply off-diagonal portion of matrix */ 2672 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2673 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2674 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2675 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2676 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2677 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2678 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2679 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2680 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2681 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2682 2683 /* local sweep */ 2684 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2685 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2686 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2687 PetscFunctionReturn(0); 2688 } 2689 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2692 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2693 { 2694 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2695 PetscErrorCode ierr; 2696 Vec lvec1,bb1; 2697 2698 PetscFunctionBegin; 2699 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2700 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2701 2702 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2703 if (flag & SOR_ZERO_INITIAL_GUESS) { 2704 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2705 its--; 2706 } 2707 2708 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2709 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2710 while (its--) { 2711 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2712 2713 /* lower diagonal part: bb1 = bb - B^T*xx */ 2714 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2715 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2716 2717 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2718 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2719 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2720 2721 /* upper diagonal part: bb1 = bb1 - B*x */ 2722 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2723 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2724 2725 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2726 2727 /* diagonal sweep */ 2728 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2729 } 2730 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2731 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2732 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2733 PetscFunctionReturn(0); 2734 } 2735 2736 #undef __FUNCT__ 2737 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2738 /*@ 2739 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2740 CSR format the local rows. 2741 2742 Collective on MPI_Comm 2743 2744 Input Parameters: 2745 + comm - MPI communicator 2746 . bs - the block size, only a block size of 1 is supported 2747 . m - number of local rows (Cannot be PETSC_DECIDE) 2748 . n - This value should be the same as the local size used in creating the 2749 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2750 calculated if N is given) For square matrices n is almost always m. 2751 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2752 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2753 . i - row indices 2754 . j - column indices 2755 - a - matrix values 2756 2757 Output Parameter: 2758 . mat - the matrix 2759 2760 Level: intermediate 2761 2762 Notes: 2763 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2764 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2765 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2766 2767 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2768 2769 .keywords: matrix, aij, compressed row, sparse, parallel 2770 2771 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2772 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2773 @*/ 2774 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2775 { 2776 PetscErrorCode ierr; 2777 2778 2779 PetscFunctionBegin; 2780 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2781 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2782 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2783 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2784 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2785 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2786 PetscFunctionReturn(0); 2787 } 2788 2789 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2792 /*@C 2793 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2794 (the default parallel PETSc format). 2795 2796 Collective on MPI_Comm 2797 2798 Input Parameters: 2799 + B - the matrix 2800 . bs - the block size 2801 . i - the indices into j for the start of each local row (starts with zero) 2802 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2803 - v - optional values in the matrix 2804 2805 Level: developer 2806 2807 .keywords: matrix, aij, compressed row, sparse, parallel 2808 2809 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2810 @*/ 2811 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2812 { 2813 PetscErrorCode ierr; 2814 2815 PetscFunctionBegin; 2816 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2817 PetscFunctionReturn(0); 2818 } 2819 2820 #undef __FUNCT__ 2821 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 2822 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2823 { 2824 PetscErrorCode ierr; 2825 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2826 PetscInt *indx; 2827 PetscScalar *values; 2828 2829 PetscFunctionBegin; 2830 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2831 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2832 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2833 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 2834 PetscInt *bindx,rmax=a->rmax,j; 2835 2836 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2837 mbs = m/bs; Nbs = N/cbs; 2838 if (n == PETSC_DECIDE) { 2839 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 2840 } 2841 /* Check sum(n) = Nbs */ 2842 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2843 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 2844 2845 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2846 rstart -= mbs; 2847 2848 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 2849 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 2850 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2851 for (i=0; i<mbs; i++) { 2852 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 2853 nnz = nnz/bs; 2854 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2855 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 2856 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 2857 } 2858 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2859 ierr = PetscFree(bindx);CHKERRQ(ierr); 2860 2861 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2862 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2863 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 2864 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 2865 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2866 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2867 } 2868 2869 /* numeric phase */ 2870 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2871 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 2872 2873 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2874 for (i=0; i<m; i++) { 2875 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2876 Ii = i + rstart; 2877 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2878 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2879 } 2880 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2881 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2882 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2883 PetscFunctionReturn(0); 2884 } 2885