1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 EXTERN_C_BEGIN 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 26 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 27 { 28 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 29 PetscErrorCode ierr; 30 31 PetscFunctionBegin; 32 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 33 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN_C_BEGIN 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 EXTERN_C_END 52 53 54 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 55 { \ 56 \ 57 brow = row/bs; \ 58 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 59 rmax = aimax[brow]; nrow = ailen[brow]; \ 60 bcol = col/bs; \ 61 ridx = row % bs; cidx = col % bs; \ 62 low = 0; high = nrow; \ 63 while (high-low > 3) { \ 64 t = (low+high)/2; \ 65 if (rp[t] > bcol) high = t; \ 66 else low = t; \ 67 } \ 68 for (_i=low; _i<high; _i++) { \ 69 if (rp[_i] > bcol) break; \ 70 if (rp[_i] == bcol) { \ 71 bap = ap + bs2*_i + bs*cidx + ridx; \ 72 if (addv == ADD_VALUES) *bap += value; \ 73 else *bap = value; \ 74 goto a_noinsert; \ 75 } \ 76 } \ 77 if (a->nonew == 1) goto a_noinsert; \ 78 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 79 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 80 N = nrow++ - 1; \ 81 /* shift up all the later entries in this row */ \ 82 for (ii=N; ii>=_i; ii--) { \ 83 rp[ii+1] = rp[ii]; \ 84 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 85 } \ 86 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 87 rp[_i] = bcol; \ 88 ap[bs2*_i + bs*cidx + ridx] = value; \ 89 a_noinsert:; \ 90 ailen[brow] = nrow; \ 91 } 92 93 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 94 { \ 95 brow = row/bs; \ 96 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 97 rmax = bimax[brow]; nrow = bilen[brow]; \ 98 bcol = col/bs; \ 99 ridx = row % bs; cidx = col % bs; \ 100 low = 0; high = nrow; \ 101 while (high-low > 3) { \ 102 t = (low+high)/2; \ 103 if (rp[t] > bcol) high = t; \ 104 else low = t; \ 105 } \ 106 for (_i=low; _i<high; _i++) { \ 107 if (rp[_i] > bcol) break; \ 108 if (rp[_i] == bcol) { \ 109 bap = ap + bs2*_i + bs*cidx + ridx; \ 110 if (addv == ADD_VALUES) *bap += value; \ 111 else *bap = value; \ 112 goto b_noinsert; \ 113 } \ 114 } \ 115 if (b->nonew == 1) goto b_noinsert; \ 116 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 117 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 118 N = nrow++ - 1; \ 119 /* shift up all the later entries in this row */ \ 120 for (ii=N; ii>=_i; ii--) { \ 121 rp[ii+1] = rp[ii]; \ 122 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 } \ 124 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 125 rp[_i] = bcol; \ 126 ap[bs2*_i + bs*cidx + ridx] = value; \ 127 b_noinsert:; \ 128 bilen[brow] = nrow; \ 129 } 130 131 /* Only add/insert a(i,j) with i<=j (blocks). 132 Any a(i,j) with i>j input by user is ingored. 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetValues_MPISBAIJ" 136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 137 { 138 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 139 MatScalar value; 140 PetscBool roworiented = baij->roworiented; 141 PetscErrorCode ierr; 142 PetscInt i,j,row,col; 143 PetscInt rstart_orig=mat->rmap->rstart; 144 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 145 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 146 147 /* Some Variables required in the macro */ 148 Mat A = baij->A; 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 150 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 151 MatScalar *aa=a->a; 152 153 Mat B = baij->B; 154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 155 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 156 MatScalar *ba=b->a; 157 158 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 159 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 160 MatScalar *ap,*bap; 161 162 /* for stash */ 163 PetscInt n_loc, *in_loc = PETSC_NULL; 164 MatScalar *v_loc = PETSC_NULL; 165 166 PetscFunctionBegin; 167 if (v) PetscValidScalarPointer(v,6); 168 if (!baij->donotstash) { 169 if (n > baij->n_loc) { 170 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 171 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 172 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 174 baij->n_loc = n; 175 } 176 in_loc = baij->in_loc; 177 v_loc = baij->v_loc; 178 } 179 180 for (i=0; i<m; i++) { 181 if (im[i] < 0) continue; 182 #if defined(PETSC_USE_DEBUG) 183 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 184 #endif 185 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 186 row = im[i] - rstart_orig; /* local row index */ 187 for (j=0; j<n; j++) { 188 if (im[i]/bs > in[j]/bs) { 189 if (a->ignore_ltriangular) { 190 continue; /* ignore lower triangular blocks */ 191 } else { 192 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 193 } 194 } 195 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 196 col = in[j] - cstart_orig; /* local col index */ 197 brow = row/bs; bcol = col/bs; 198 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 199 if (roworiented) value = v[i*n+j]; 200 else value = v[i+j*m]; 201 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 202 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 203 } else if (in[j] < 0) continue; 204 #if defined(PETSC_USE_DEBUG) 205 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 206 #endif 207 else { /* off-diag entry (B) */ 208 if (mat->was_assembled) { 209 if (!baij->colmap) { 210 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 211 } 212 #if defined (PETSC_USE_CTABLE) 213 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 214 col = col - 1; 215 #else 216 col = baij->colmap[in[j]/bs] - 1; 217 #endif 218 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 219 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 220 col = in[j]; 221 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 222 B = baij->B; 223 b = (Mat_SeqBAIJ*)(B)->data; 224 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 225 ba=b->a; 226 } else col += in[j]%bs; 227 } else col = in[j]; 228 if (roworiented) value = v[i*n+j]; 229 else value = v[i+j*m]; 230 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } 233 } 234 } else { /* off processor entry */ 235 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 236 if (!baij->donotstash) { 237 mat->assembled = PETSC_FALSE; 238 n_loc = 0; 239 for (j=0; j<n; j++) { 240 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 241 in_loc[n_loc] = in[j]; 242 if (roworiented) { 243 v_loc[n_loc] = v[i*n+j]; 244 } else { 245 v_loc[n_loc] = v[j*m+i]; 246 } 247 n_loc++; 248 } 249 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 250 } 251 } 252 } 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 259 { 260 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 261 const MatScalar *value; 262 MatScalar *barray=baij->barray; 263 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 264 PetscErrorCode ierr; 265 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 266 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 267 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 268 269 PetscFunctionBegin; 270 if (!barray) { 271 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 272 baij->barray = barray; 273 } 274 275 if (roworiented) { 276 stepval = (n-1)*bs; 277 } else { 278 stepval = (m-1)*bs; 279 } 280 for (i=0; i<m; i++) { 281 if (im[i] < 0) continue; 282 #if defined(PETSC_USE_DEBUG) 283 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 284 #endif 285 if (im[i] >= rstart && im[i] < rend) { 286 row = im[i] - rstart; 287 for (j=0; j<n; j++) { 288 if (im[i] > in[j]) { 289 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 290 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 291 } 292 /* If NumCol = 1 then a copy is not required */ 293 if ((roworiented) && (n == 1)) { 294 barray = (MatScalar*) v + i*bs2; 295 } else if ((!roworiented) && (m == 1)) { 296 barray = (MatScalar*) v + j*bs2; 297 } else { /* Here a copy is required */ 298 if (roworiented) { 299 value = v + i*(stepval+bs)*bs + j*bs; 300 } else { 301 value = v + j*(stepval+bs)*bs + i*bs; 302 } 303 for (ii=0; ii<bs; ii++,value+=stepval) { 304 for (jj=0; jj<bs; jj++) { 305 *barray++ = *value++; 306 } 307 } 308 barray -=bs2; 309 } 310 311 if (in[j] >= cstart && in[j] < cend) { 312 col = in[j] - cstart; 313 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 314 } 315 else if (in[j] < 0) continue; 316 #if defined(PETSC_USE_DEBUG) 317 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 318 #endif 319 else { 320 if (mat->was_assembled) { 321 if (!baij->colmap) { 322 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 323 } 324 325 #if defined(PETSC_USE_DEBUG) 326 #if defined (PETSC_USE_CTABLE) 327 { PetscInt data; 328 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 329 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 } 331 #else 332 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 333 #endif 334 #endif 335 #if defined (PETSC_USE_CTABLE) 336 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 337 col = (col - 1)/bs; 338 #else 339 col = (baij->colmap[in[j]] - 1)/bs; 340 #endif 341 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 342 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 343 col = in[j]; 344 } 345 } 346 else col = in[j]; 347 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 348 } 349 } 350 } else { 351 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 352 if (!baij->donotstash) { 353 if (roworiented) { 354 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 355 } else { 356 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 357 } 358 } 359 } 360 } 361 PetscFunctionReturn(0); 362 } 363 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatGetValues_MPISBAIJ" 366 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 367 { 368 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 369 PetscErrorCode ierr; 370 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 371 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 372 373 PetscFunctionBegin; 374 for (i=0; i<m; i++) { 375 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 376 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 377 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 378 row = idxm[i] - bsrstart; 379 for (j=0; j<n; j++) { 380 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 381 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 382 if (idxn[j] >= bscstart && idxn[j] < bscend) { 383 col = idxn[j] - bscstart; 384 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 385 } else { 386 if (!baij->colmap) { 387 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 388 } 389 #if defined (PETSC_USE_CTABLE) 390 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 391 data --; 392 #else 393 data = baij->colmap[idxn[j]/bs]-1; 394 #endif 395 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 396 else { 397 col = data + idxn[j]%bs; 398 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 399 } 400 } 401 } 402 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 403 } 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatNorm_MPISBAIJ" 409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 410 { 411 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscReal sum[2],*lnorm2; 414 415 PetscFunctionBegin; 416 if (baij->size == 1) { 417 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 418 } else { 419 if (type == NORM_FROBENIUS) { 420 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 421 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 422 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 423 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 424 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 425 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 426 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 427 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 428 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 429 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 430 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 431 PetscReal *rsum,*rsum2,vabs; 432 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 433 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 434 MatScalar *v; 435 436 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 437 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 438 /* Amat */ 439 v = amat->a; jj = amat->j; 440 for (brow=0; brow<mbs; brow++) { 441 grow = bs*(rstart + brow); 442 nz = amat->i[brow+1] - amat->i[brow]; 443 for (bcol=0; bcol<nz; bcol++) { 444 gcol = bs*(rstart + *jj); jj++; 445 for (col=0; col<bs; col++) { 446 for (row=0; row<bs; row++) { 447 vabs = PetscAbsScalar(*v); v++; 448 rsum[gcol+col] += vabs; 449 /* non-diagonal block */ 450 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 451 } 452 } 453 } 454 } 455 /* Bmat */ 456 v = bmat->a; jj = bmat->j; 457 for (brow=0; brow<mbs; brow++) { 458 grow = bs*(rstart + brow); 459 nz = bmat->i[brow+1] - bmat->i[brow]; 460 for (bcol=0; bcol<nz; bcol++) { 461 gcol = bs*garray[*jj]; jj++; 462 for (col=0; col<bs; col++) { 463 for (row=0; row<bs; row++) { 464 vabs = PetscAbsScalar(*v); v++; 465 rsum[gcol+col] += vabs; 466 rsum[grow+row] += vabs; 467 } 468 } 469 } 470 } 471 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 472 *norm = 0.0; 473 for (col=0; col<mat->cmap->N; col++) { 474 if (rsum2[col] > *norm) *norm = rsum2[col]; 475 } 476 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 477 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 478 } 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 484 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 485 { 486 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 487 PetscErrorCode ierr; 488 PetscInt nstash,reallocs; 489 InsertMode addv; 490 491 PetscFunctionBegin; 492 if (baij->donotstash || mat->nooffprocentries) { 493 PetscFunctionReturn(0); 494 } 495 496 /* make sure all processors are either in INSERTMODE or ADDMODE */ 497 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 498 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 499 mat->insertmode = addv; /* in case this processor had no cache */ 500 501 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 502 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 503 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 504 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 505 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 506 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 513 { 514 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 515 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 516 PetscErrorCode ierr; 517 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 518 PetscInt *row,*col; 519 PetscBool other_disassembled; 520 PetscMPIInt n; 521 PetscBool r1,r2,r3; 522 MatScalar *val; 523 InsertMode addv = mat->insertmode; 524 525 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 526 PetscFunctionBegin; 527 if (!baij->donotstash && !mat->nooffprocentries) { 528 while (1) { 529 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 530 if (!flg) break; 531 532 for (i=0; i<n;) { 533 /* Now identify the consecutive vals belonging to the same row */ 534 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 535 if (j < n) ncols = j-i; 536 else ncols = n-i; 537 /* Now assemble all these values with a single function call */ 538 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 539 i = j; 540 } 541 } 542 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 543 /* Now process the block-stash. Since the values are stashed column-oriented, 544 set the roworiented flag to column oriented, and after MatSetValues() 545 restore the original flags */ 546 r1 = baij->roworiented; 547 r2 = a->roworiented; 548 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 549 baij->roworiented = PETSC_FALSE; 550 a->roworiented = PETSC_FALSE; 551 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 552 while (1) { 553 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 554 if (!flg) break; 555 556 for (i=0; i<n;) { 557 /* Now identify the consecutive vals belonging to the same row */ 558 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 559 if (j < n) ncols = j-i; 560 else ncols = n-i; 561 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 562 i = j; 563 } 564 } 565 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 566 baij->roworiented = r1; 567 a->roworiented = r2; 568 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 569 } 570 571 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 572 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 573 574 /* determine if any processor has disassembled, if so we must 575 also disassemble ourselfs, in order that we may reassemble. */ 576 /* 577 if nonzero structure of submatrix B cannot change then we know that 578 no processor disassembled thus we can skip this stuff 579 */ 580 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 581 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 582 if (mat->was_assembled && !other_disassembled) { 583 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 584 } 585 } 586 587 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 588 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 589 } 590 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr); 591 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 592 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 593 594 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 595 baij->rowvalues = 0; 596 597 PetscFunctionReturn(0); 598 } 599 600 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 603 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 604 { 605 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 606 PetscErrorCode ierr; 607 PetscInt bs = mat->rmap->bs; 608 PetscMPIInt size = baij->size,rank = baij->rank; 609 PetscBool iascii,isdraw; 610 PetscViewer sviewer; 611 PetscViewerFormat format; 612 613 PetscFunctionBegin; 614 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 615 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 616 if (iascii) { 617 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 618 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 619 MatInfo info; 620 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 621 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 622 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 623 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 624 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 625 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 626 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 628 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 630 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 631 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } else if (format == PETSC_VIEWER_ASCII_INFO) { 634 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 635 PetscFunctionReturn(0); 636 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 637 PetscFunctionReturn(0); 638 } 639 } 640 641 if (isdraw) { 642 PetscDraw draw; 643 PetscBool isnull; 644 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 645 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 646 } 647 648 if (size == 1) { 649 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 650 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 651 } else { 652 /* assemble the entire matrix onto first processor. */ 653 Mat A; 654 Mat_SeqSBAIJ *Aloc; 655 Mat_SeqBAIJ *Bloc; 656 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 657 MatScalar *a; 658 659 /* Should this be the same type as mat? */ 660 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 661 if (!rank) { 662 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 663 } else { 664 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 665 } 666 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 667 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 668 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 669 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 670 671 /* copy over the A part */ 672 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 673 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 674 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 675 676 for (i=0; i<mbs; i++) { 677 rvals[0] = bs*(baij->rstartbs + i); 678 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 679 for (j=ai[i]; j<ai[i+1]; j++) { 680 col = (baij->cstartbs+aj[j])*bs; 681 for (k=0; k<bs; k++) { 682 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 683 col++; a += bs; 684 } 685 } 686 } 687 /* copy over the B part */ 688 Bloc = (Mat_SeqBAIJ*)baij->B->data; 689 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 690 for (i=0; i<mbs; i++) { 691 692 rvals[0] = bs*(baij->rstartbs + i); 693 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 694 for (j=ai[i]; j<ai[i+1]; j++) { 695 col = baij->garray[aj[j]]*bs; 696 for (k=0; k<bs; k++) { 697 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 698 col++; a += bs; 699 } 700 } 701 } 702 ierr = PetscFree(rvals);CHKERRQ(ierr); 703 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 705 /* 706 Everyone has to call to draw the matrix since the graphics waits are 707 synchronized across all processors that share the PetscDraw object 708 */ 709 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 710 if (!rank) { 711 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 712 /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/ 713 PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ); 714 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 715 } 716 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 717 ierr = MatDestroy(&A);CHKERRQ(ierr); 718 } 719 PetscFunctionReturn(0); 720 } 721 722 #undef __FUNCT__ 723 #define __FUNCT__ "MatView_MPISBAIJ" 724 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 725 { 726 PetscErrorCode ierr; 727 PetscBool iascii,isdraw,issocket,isbinary; 728 729 PetscFunctionBegin; 730 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 731 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 732 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 733 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 734 if (iascii || isdraw || issocket || isbinary) { 735 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatDestroy_MPISBAIJ" 742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 743 { 744 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 #if defined(PETSC_USE_LOG) 749 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 750 #endif 751 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 752 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 753 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 754 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 755 #if defined (PETSC_USE_CTABLE) 756 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 757 #else 758 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 759 #endif 760 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 761 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 762 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 763 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 764 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 765 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 766 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 767 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 768 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 769 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 770 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 771 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 772 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 773 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 774 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 775 #if defined(PETSC_USE_REAL_MAT_SINGLE) 776 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 777 #endif 778 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 779 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 780 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 781 ierr = PetscFree(mat->data);CHKERRQ(ierr); 782 783 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 784 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 785 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 786 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 787 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 789 PetscFunctionReturn(0); 790 } 791 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 794 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 795 { 796 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 797 PetscErrorCode ierr; 798 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 799 PetscScalar *x,*from; 800 801 PetscFunctionBegin; 802 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 803 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 804 805 /* diagonal part */ 806 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 807 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 808 809 /* subdiagonal part */ 810 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 811 812 /* copy x into the vec slvec0 */ 813 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 814 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 815 816 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 817 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 818 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 819 820 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 821 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 822 /* supperdiagonal part */ 823 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNCT__ 828 #define __FUNCT__ "MatMult_MPISBAIJ" 829 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 830 { 831 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 832 PetscErrorCode ierr; 833 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 834 PetscScalar *x,*from; 835 836 PetscFunctionBegin; 837 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 838 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 839 840 /* diagonal part */ 841 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 842 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 843 844 /* subdiagonal part */ 845 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 846 847 /* copy x into the vec slvec0 */ 848 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 849 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 850 851 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 852 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 853 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 854 855 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 856 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 857 /* supperdiagonal part */ 858 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 864 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 865 { 866 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 867 PetscErrorCode ierr; 868 PetscInt nt; 869 870 PetscFunctionBegin; 871 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 872 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 873 874 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 875 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 876 877 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 /* do diagonal part */ 879 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 880 /* do supperdiagonal part */ 881 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 883 /* do subdiagonal part */ 884 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 885 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 886 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 887 888 PetscFunctionReturn(0); 889 } 890 891 #undef __FUNCT__ 892 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 893 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 894 { 895 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 896 PetscErrorCode ierr; 897 PetscInt mbs=a->mbs,bs=A->rmap->bs; 898 PetscScalar *x,*from,zero=0.0; 899 900 PetscFunctionBegin; 901 /* 902 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 903 PetscSynchronizedFlush(((PetscObject)A)->comm); 904 */ 905 /* diagonal part */ 906 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 907 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 908 909 /* subdiagonal part */ 910 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 911 912 /* copy x into the vec slvec0 */ 913 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 914 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 915 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 916 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 917 918 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 919 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 920 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 921 922 /* supperdiagonal part */ 923 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 924 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 930 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 931 { 932 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 933 PetscErrorCode ierr; 934 935 PetscFunctionBegin; 936 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 937 /* do diagonal part */ 938 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 939 /* do supperdiagonal part */ 940 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 942 943 /* do subdiagonal part */ 944 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 945 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 946 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 947 948 PetscFunctionReturn(0); 949 } 950 951 /* 952 This only works correctly for square matrices where the subblock A->A is the 953 diagonal block 954 */ 955 #undef __FUNCT__ 956 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 957 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 958 { 959 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 960 PetscErrorCode ierr; 961 962 PetscFunctionBegin; 963 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 964 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatScale_MPISBAIJ" 970 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 971 { 972 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 973 PetscErrorCode ierr; 974 975 PetscFunctionBegin; 976 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 977 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatGetRow_MPISBAIJ" 983 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 984 { 985 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 986 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 987 PetscErrorCode ierr; 988 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 989 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 990 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 991 992 PetscFunctionBegin; 993 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 994 mat->getrowactive = PETSC_TRUE; 995 996 if (!mat->rowvalues && (idx || v)) { 997 /* 998 allocate enough space to hold information from the longest row. 999 */ 1000 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1001 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1002 PetscInt max = 1,mbs = mat->mbs,tmp; 1003 for (i=0; i<mbs; i++) { 1004 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1005 if (max < tmp) { max = tmp; } 1006 } 1007 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1008 } 1009 1010 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1011 lrow = row - brstart; /* local row index */ 1012 1013 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1014 if (!v) {pvA = 0; pvB = 0;} 1015 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1016 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1017 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1018 nztot = nzA + nzB; 1019 1020 cmap = mat->garray; 1021 if (v || idx) { 1022 if (nztot) { 1023 /* Sort by increasing column numbers, assuming A and B already sorted */ 1024 PetscInt imark = -1; 1025 if (v) { 1026 *v = v_p = mat->rowvalues; 1027 for (i=0; i<nzB; i++) { 1028 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1029 else break; 1030 } 1031 imark = i; 1032 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1033 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1034 } 1035 if (idx) { 1036 *idx = idx_p = mat->rowindices; 1037 if (imark > -1) { 1038 for (i=0; i<imark; i++) { 1039 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1040 } 1041 } else { 1042 for (i=0; i<nzB; i++) { 1043 if (cmap[cworkB[i]/bs] < cstart) 1044 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1045 else break; 1046 } 1047 imark = i; 1048 } 1049 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1050 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1051 } 1052 } else { 1053 if (idx) *idx = 0; 1054 if (v) *v = 0; 1055 } 1056 } 1057 *nz = nztot; 1058 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1059 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1065 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1066 { 1067 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1068 1069 PetscFunctionBegin; 1070 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1071 baij->getrowactive = PETSC_FALSE; 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1077 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1078 { 1079 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1080 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1081 1082 PetscFunctionBegin; 1083 aA->getrow_utriangular = PETSC_TRUE; 1084 PetscFunctionReturn(0); 1085 } 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1088 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1089 { 1090 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1091 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1092 1093 PetscFunctionBegin; 1094 aA->getrow_utriangular = PETSC_FALSE; 1095 PetscFunctionReturn(0); 1096 } 1097 1098 #undef __FUNCT__ 1099 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1100 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1101 { 1102 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1103 PetscErrorCode ierr; 1104 1105 PetscFunctionBegin; 1106 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1107 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1108 PetscFunctionReturn(0); 1109 } 1110 1111 #undef __FUNCT__ 1112 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1113 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1114 { 1115 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1116 PetscErrorCode ierr; 1117 1118 PetscFunctionBegin; 1119 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1120 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1126 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1127 { 1128 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1129 PetscErrorCode ierr; 1130 1131 PetscFunctionBegin; 1132 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1133 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1139 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1140 { 1141 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1142 Mat A = a->A,B = a->B; 1143 PetscErrorCode ierr; 1144 PetscReal isend[5],irecv[5]; 1145 1146 PetscFunctionBegin; 1147 info->block_size = (PetscReal)matin->rmap->bs; 1148 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1149 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1150 isend[3] = info->memory; isend[4] = info->mallocs; 1151 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1152 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1153 isend[3] += info->memory; isend[4] += info->mallocs; 1154 if (flag == MAT_LOCAL) { 1155 info->nz_used = isend[0]; 1156 info->nz_allocated = isend[1]; 1157 info->nz_unneeded = isend[2]; 1158 info->memory = isend[3]; 1159 info->mallocs = isend[4]; 1160 } else if (flag == MAT_GLOBAL_MAX) { 1161 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1162 info->nz_used = irecv[0]; 1163 info->nz_allocated = irecv[1]; 1164 info->nz_unneeded = irecv[2]; 1165 info->memory = irecv[3]; 1166 info->mallocs = irecv[4]; 1167 } else if (flag == MAT_GLOBAL_SUM) { 1168 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1169 info->nz_used = irecv[0]; 1170 info->nz_allocated = irecv[1]; 1171 info->nz_unneeded = irecv[2]; 1172 info->memory = irecv[3]; 1173 info->mallocs = irecv[4]; 1174 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1175 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1176 info->fill_ratio_needed = 0; 1177 info->factor_mallocs = 0; 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1183 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1184 { 1185 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1186 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 switch (op) { 1191 case MAT_NEW_NONZERO_LOCATIONS: 1192 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1193 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1194 case MAT_KEEP_NONZERO_PATTERN: 1195 case MAT_NEW_NONZERO_LOCATION_ERR: 1196 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1197 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1198 break; 1199 case MAT_ROW_ORIENTED: 1200 a->roworiented = flg; 1201 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1202 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1203 break; 1204 case MAT_NEW_DIAGONALS: 1205 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1206 break; 1207 case MAT_IGNORE_OFF_PROC_ENTRIES: 1208 a->donotstash = flg; 1209 break; 1210 case MAT_USE_HASH_TABLE: 1211 a->ht_flag = flg; 1212 break; 1213 case MAT_HERMITIAN: 1214 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1215 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1216 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1217 break; 1218 case MAT_SPD: 1219 A->spd_set = PETSC_TRUE; 1220 A->spd = flg; 1221 if (flg) { 1222 A->symmetric = PETSC_TRUE; 1223 A->structurally_symmetric = PETSC_TRUE; 1224 A->symmetric_set = PETSC_TRUE; 1225 A->structurally_symmetric_set = PETSC_TRUE; 1226 } 1227 break; 1228 case MAT_SYMMETRIC: 1229 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1230 break; 1231 case MAT_STRUCTURALLY_SYMMETRIC: 1232 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1233 break; 1234 case MAT_SYMMETRY_ETERNAL: 1235 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1236 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1237 break; 1238 case MAT_IGNORE_LOWER_TRIANGULAR: 1239 aA->ignore_ltriangular = flg; 1240 break; 1241 case MAT_ERROR_LOWER_TRIANGULAR: 1242 aA->ignore_ltriangular = flg; 1243 break; 1244 case MAT_GETROW_UPPERTRIANGULAR: 1245 aA->getrow_utriangular = flg; 1246 break; 1247 default: 1248 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1249 } 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1255 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1256 { 1257 PetscErrorCode ierr; 1258 1259 PetscFunctionBegin; 1260 if (MAT_INITIAL_MATRIX || *B != A) { 1261 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1262 } 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1268 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1269 { 1270 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1271 Mat a=baij->A, b=baij->B; 1272 PetscErrorCode ierr; 1273 PetscInt nv,m,n; 1274 PetscBool flg; 1275 1276 PetscFunctionBegin; 1277 if (ll != rr) { 1278 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1279 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1280 } 1281 if (!ll) PetscFunctionReturn(0); 1282 1283 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1284 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1285 1286 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1287 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1288 1289 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1290 1291 /* left diagonalscale the off-diagonal part */ 1292 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1293 1294 /* scale the diagonal part */ 1295 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1296 1297 /* right diagonalscale the off-diagonal part */ 1298 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1299 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1305 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1306 { 1307 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1308 PetscErrorCode ierr; 1309 1310 PetscFunctionBegin; 1311 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1312 PetscFunctionReturn(0); 1313 } 1314 1315 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatEqual_MPISBAIJ" 1319 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1320 { 1321 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1322 Mat a,b,c,d; 1323 PetscBool flg; 1324 PetscErrorCode ierr; 1325 1326 PetscFunctionBegin; 1327 a = matA->A; b = matA->B; 1328 c = matB->A; d = matB->B; 1329 1330 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1331 if (flg) { 1332 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1333 } 1334 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatCopy_MPISBAIJ" 1340 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1341 { 1342 PetscErrorCode ierr; 1343 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1344 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1345 1346 PetscFunctionBegin; 1347 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1348 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1349 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1350 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1351 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1352 } else { 1353 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1354 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1355 } 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1361 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1362 { 1363 PetscErrorCode ierr; 1364 1365 PetscFunctionBegin; 1366 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1372 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1373 { 1374 PetscErrorCode ierr; 1375 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1376 PetscBLASInt bnz,one=1; 1377 Mat_SeqSBAIJ *xa,*ya; 1378 Mat_SeqBAIJ *xb,*yb; 1379 1380 PetscFunctionBegin; 1381 if (str == SAME_NONZERO_PATTERN) { 1382 PetscScalar alpha = a; 1383 xa = (Mat_SeqSBAIJ *)xx->A->data; 1384 ya = (Mat_SeqSBAIJ *)yy->A->data; 1385 bnz = PetscBLASIntCast(xa->nz); 1386 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1387 xb = (Mat_SeqBAIJ *)xx->B->data; 1388 yb = (Mat_SeqBAIJ *)yy->B->data; 1389 bnz = PetscBLASIntCast(xb->nz); 1390 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1391 } else { 1392 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1393 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1394 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1395 } 1396 PetscFunctionReturn(0); 1397 } 1398 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1401 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1402 { 1403 PetscErrorCode ierr; 1404 PetscInt i; 1405 PetscBool flg; 1406 1407 PetscFunctionBegin; 1408 for (i=0; i<n; i++) { 1409 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1410 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1411 } 1412 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 1417 /* -------------------------------------------------------------------*/ 1418 static struct _MatOps MatOps_Values = { 1419 MatSetValues_MPISBAIJ, 1420 MatGetRow_MPISBAIJ, 1421 MatRestoreRow_MPISBAIJ, 1422 MatMult_MPISBAIJ, 1423 /* 4*/ MatMultAdd_MPISBAIJ, 1424 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1425 MatMultAdd_MPISBAIJ, 1426 0, 1427 0, 1428 0, 1429 /* 10*/ 0, 1430 0, 1431 0, 1432 MatSOR_MPISBAIJ, 1433 MatTranspose_MPISBAIJ, 1434 /* 15*/ MatGetInfo_MPISBAIJ, 1435 MatEqual_MPISBAIJ, 1436 MatGetDiagonal_MPISBAIJ, 1437 MatDiagonalScale_MPISBAIJ, 1438 MatNorm_MPISBAIJ, 1439 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1440 MatAssemblyEnd_MPISBAIJ, 1441 MatSetOption_MPISBAIJ, 1442 MatZeroEntries_MPISBAIJ, 1443 /* 24*/ 0, 1444 0, 1445 0, 1446 0, 1447 0, 1448 /* 29*/ MatSetUp_MPISBAIJ, 1449 0, 1450 0, 1451 0, 1452 0, 1453 /* 34*/ MatDuplicate_MPISBAIJ, 1454 0, 1455 0, 1456 0, 1457 0, 1458 /* 39*/ MatAXPY_MPISBAIJ, 1459 MatGetSubMatrices_MPISBAIJ, 1460 MatIncreaseOverlap_MPISBAIJ, 1461 MatGetValues_MPISBAIJ, 1462 MatCopy_MPISBAIJ, 1463 /* 44*/ 0, 1464 MatScale_MPISBAIJ, 1465 0, 1466 0, 1467 0, 1468 /* 49*/ 0, 1469 0, 1470 0, 1471 0, 1472 0, 1473 /* 54*/ 0, 1474 0, 1475 MatSetUnfactored_MPISBAIJ, 1476 0, 1477 MatSetValuesBlocked_MPISBAIJ, 1478 /* 59*/ 0, 1479 0, 1480 0, 1481 0, 1482 0, 1483 /* 64*/ 0, 1484 0, 1485 0, 1486 0, 1487 0, 1488 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1489 0, 1490 0, 1491 0, 1492 0, 1493 /* 74*/ 0, 1494 0, 1495 0, 1496 0, 1497 0, 1498 /* 79*/ 0, 1499 0, 1500 0, 1501 0, 1502 MatLoad_MPISBAIJ, 1503 /* 84*/ 0, 1504 0, 1505 0, 1506 0, 1507 0, 1508 /* 89*/ 0, 1509 0, 1510 0, 1511 0, 1512 0, 1513 /* 94*/ 0, 1514 0, 1515 0, 1516 0, 1517 0, 1518 /* 99*/ 0, 1519 0, 1520 0, 1521 0, 1522 0, 1523 /*104*/ 0, 1524 MatRealPart_MPISBAIJ, 1525 MatImaginaryPart_MPISBAIJ, 1526 MatGetRowUpperTriangular_MPISBAIJ, 1527 MatRestoreRowUpperTriangular_MPISBAIJ, 1528 /*109*/ 0, 1529 0, 1530 0, 1531 0, 1532 0, 1533 /*114*/ 0, 1534 0, 1535 0, 1536 0, 1537 0, 1538 /*119*/ 0, 1539 0, 1540 0, 1541 0 1542 }; 1543 1544 1545 EXTERN_C_BEGIN 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1548 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1549 { 1550 PetscFunctionBegin; 1551 *a = ((Mat_MPISBAIJ *)A->data)->A; 1552 PetscFunctionReturn(0); 1553 } 1554 EXTERN_C_END 1555 1556 EXTERN_C_BEGIN 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1559 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1560 { 1561 Mat_MPISBAIJ *b; 1562 PetscErrorCode ierr; 1563 PetscInt i,mbs,Mbs; 1564 1565 PetscFunctionBegin; 1566 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1567 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1568 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1569 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1570 1571 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1572 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1573 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1574 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1575 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1576 1577 if (d_nnz) { 1578 for (i=0; i<B->rmap->n/bs; i++) { 1579 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1580 } 1581 } 1582 if (o_nnz) { 1583 for (i=0; i<B->rmap->n/bs; i++) { 1584 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1585 } 1586 } 1587 1588 b = (Mat_MPISBAIJ*)B->data; 1589 mbs = B->rmap->n/bs; 1590 Mbs = B->rmap->N/bs; 1591 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1592 1593 B->rmap->bs = bs; 1594 b->bs2 = bs*bs; 1595 b->mbs = mbs; 1596 b->nbs = mbs; 1597 b->Mbs = Mbs; 1598 b->Nbs = Mbs; 1599 1600 for (i=0; i<=b->size; i++) { 1601 b->rangebs[i] = B->rmap->range[i]/bs; 1602 } 1603 b->rstartbs = B->rmap->rstart/bs; 1604 b->rendbs = B->rmap->rend/bs; 1605 1606 b->cstartbs = B->cmap->rstart/bs; 1607 b->cendbs = B->cmap->rend/bs; 1608 1609 if (!B->preallocated) { 1610 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1611 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1612 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1613 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1614 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1615 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1616 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1617 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1618 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1619 } 1620 1621 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1622 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1623 B->preallocated = PETSC_TRUE; 1624 PetscFunctionReturn(0); 1625 } 1626 EXTERN_C_END 1627 1628 EXTERN_C_BEGIN 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1631 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1632 { 1633 PetscInt m,rstart,cstart,cend; 1634 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1635 const PetscInt *JJ=0; 1636 PetscScalar *values=0; 1637 PetscErrorCode ierr; 1638 1639 PetscFunctionBegin; 1640 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1641 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1642 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1643 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1644 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1645 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1646 m = B->rmap->n/bs; 1647 rstart = B->rmap->rstart/bs; 1648 cstart = B->cmap->rstart/bs; 1649 cend = B->cmap->rend/bs; 1650 1651 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1652 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 1653 for (i=0; i<m; i++) { 1654 nz = ii[i+1] - ii[i]; 1655 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1656 nz_max = PetscMax(nz_max,nz); 1657 JJ = jj + ii[i]; 1658 for (j=0; j<nz; j++) { 1659 if (*JJ >= cstart) break; 1660 JJ++; 1661 } 1662 d = 0; 1663 for (; j<nz; j++) { 1664 if (*JJ++ >= cend) break; 1665 d++; 1666 } 1667 d_nnz[i] = d; 1668 o_nnz[i] = nz - d; 1669 } 1670 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1671 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1672 1673 values = (PetscScalar*)V; 1674 if (!values) { 1675 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 1676 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1677 } 1678 for (i=0; i<m; i++) { 1679 PetscInt row = i + rstart; 1680 PetscInt ncols = ii[i+1] - ii[i]; 1681 const PetscInt *icols = jj + ii[i]; 1682 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1683 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1684 } 1685 1686 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1687 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1689 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1690 PetscFunctionReturn(0); 1691 } 1692 EXTERN_C_END 1693 1694 EXTERN_C_BEGIN 1695 #if defined(PETSC_HAVE_MUMPS) 1696 extern PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1697 #endif 1698 #if defined(PETSC_HAVE_PASTIX) 1699 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1700 #endif 1701 EXTERN_C_END 1702 1703 /*MC 1704 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1705 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1706 the matrix is stored. 1707 1708 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1709 can call MatSetOption(Mat, MAT_HERMITIAN); 1710 1711 Options Database Keys: 1712 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1713 1714 Level: beginner 1715 1716 .seealso: MatCreateMPISBAIJ 1717 M*/ 1718 1719 EXTERN_C_BEGIN 1720 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 1721 EXTERN_C_END 1722 1723 EXTERN_C_BEGIN 1724 #undef __FUNCT__ 1725 #define __FUNCT__ "MatCreate_MPISBAIJ" 1726 PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1727 { 1728 Mat_MPISBAIJ *b; 1729 PetscErrorCode ierr; 1730 PetscBool flg; 1731 1732 PetscFunctionBegin; 1733 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1734 B->data = (void*)b; 1735 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1736 1737 B->ops->destroy = MatDestroy_MPISBAIJ; 1738 B->ops->view = MatView_MPISBAIJ; 1739 B->assembled = PETSC_FALSE; 1740 1741 B->insertmode = NOT_SET_VALUES; 1742 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1743 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1744 1745 /* build local table of row and column ownerships */ 1746 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1747 1748 /* build cache for off array entries formed */ 1749 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1750 b->donotstash = PETSC_FALSE; 1751 b->colmap = PETSC_NULL; 1752 b->garray = PETSC_NULL; 1753 b->roworiented = PETSC_TRUE; 1754 1755 /* stuff used in block assembly */ 1756 b->barray = 0; 1757 1758 /* stuff used for matrix vector multiply */ 1759 b->lvec = 0; 1760 b->Mvctx = 0; 1761 b->slvec0 = 0; 1762 b->slvec0b = 0; 1763 b->slvec1 = 0; 1764 b->slvec1a = 0; 1765 b->slvec1b = 0; 1766 b->sMvctx = 0; 1767 1768 /* stuff for MatGetRow() */ 1769 b->rowindices = 0; 1770 b->rowvalues = 0; 1771 b->getrowactive = PETSC_FALSE; 1772 1773 /* hash table stuff */ 1774 b->ht = 0; 1775 b->hd = 0; 1776 b->ht_size = 0; 1777 b->ht_flag = PETSC_FALSE; 1778 b->ht_fact = 0; 1779 b->ht_total_ct = 0; 1780 b->ht_insert_ct = 0; 1781 1782 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1783 b->ijonly = PETSC_FALSE; 1784 1785 b->in_loc = 0; 1786 b->v_loc = 0; 1787 b->n_loc = 0; 1788 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1789 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1790 if (flg) { 1791 PetscReal fact = 1.39; 1792 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1793 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1794 if (fact <= 1.0) fact = 1.39; 1795 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1796 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1797 } 1798 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1799 1800 #if defined(PETSC_HAVE_PASTIX) 1801 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1802 "MatGetFactor_mpisbaij_pastix", 1803 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1804 #endif 1805 #if defined(PETSC_HAVE_MUMPS) 1806 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1807 "MatGetFactor_sbaij_mumps", 1808 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1809 #endif 1810 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1811 "MatStoreValues_MPISBAIJ", 1812 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1813 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1814 "MatRetrieveValues_MPISBAIJ", 1815 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1816 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1817 "MatGetDiagonalBlock_MPISBAIJ", 1818 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1819 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1820 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1821 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1822 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C", 1823 "MatMPISBAIJSetPreallocationCSR_MPISBAIJ", 1824 MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1825 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C", 1826 "MatConvert_MPISBAIJ_MPISBSTRM", 1827 MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1828 1829 B->symmetric = PETSC_TRUE; 1830 B->structurally_symmetric = PETSC_TRUE; 1831 B->symmetric_set = PETSC_TRUE; 1832 B->structurally_symmetric_set = PETSC_TRUE; 1833 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1834 PetscFunctionReturn(0); 1835 } 1836 EXTERN_C_END 1837 1838 /*MC 1839 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1840 1841 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1842 and MATMPISBAIJ otherwise. 1843 1844 Options Database Keys: 1845 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1846 1847 Level: beginner 1848 1849 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1850 M*/ 1851 1852 #undef __FUNCT__ 1853 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1854 /*@C 1855 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1856 the user should preallocate the matrix storage by setting the parameters 1857 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1858 performance can be increased by more than a factor of 50. 1859 1860 Collective on Mat 1861 1862 Input Parameters: 1863 + A - the matrix 1864 . bs - size of blockk 1865 . d_nz - number of block nonzeros per block row in diagonal portion of local 1866 submatrix (same for all local rows) 1867 . d_nnz - array containing the number of block nonzeros in the various block rows 1868 in the upper triangular and diagonal part of the in diagonal portion of the local 1869 (possibly different for each block row) or PETSC_NULL. If you plan to factor the matrix you must leave room 1870 for the diagonal entry and set a value even if it is zero. 1871 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1872 submatrix (same for all local rows). 1873 - o_nnz - array containing the number of nonzeros in the various block rows of the 1874 off-diagonal portion of the local submatrix that is right of the diagonal 1875 (possibly different for each block row) or PETSC_NULL. 1876 1877 1878 Options Database Keys: 1879 . -mat_no_unroll - uses code that does not unroll the loops in the 1880 block calculations (much slower) 1881 . -mat_block_size - size of the blocks to use 1882 1883 Notes: 1884 1885 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1886 than it must be used on all processors that share the object for that argument. 1887 1888 If the *_nnz parameter is given then the *_nz parameter is ignored 1889 1890 Storage Information: 1891 For a square global matrix we define each processor's diagonal portion 1892 to be its local rows and the corresponding columns (a square submatrix); 1893 each processor's off-diagonal portion encompasses the remainder of the 1894 local matrix (a rectangular submatrix). 1895 1896 The user can specify preallocated storage for the diagonal part of 1897 the local submatrix with either d_nz or d_nnz (not both). Set 1898 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1899 memory allocation. Likewise, specify preallocated storage for the 1900 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1901 1902 You can call MatGetInfo() to get information on how effective the preallocation was; 1903 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1904 You can also run with the option -info and look for messages with the string 1905 malloc in them to see if additional memory allocation was needed. 1906 1907 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1908 the figure below we depict these three local rows and all columns (0-11). 1909 1910 .vb 1911 0 1 2 3 4 5 6 7 8 9 10 11 1912 ------------------- 1913 row 3 | . . . d d d o o o o o o 1914 row 4 | . . . d d d o o o o o o 1915 row 5 | . . . d d d o o o o o o 1916 ------------------- 1917 .ve 1918 1919 Thus, any entries in the d locations are stored in the d (diagonal) 1920 submatrix, and any entries in the o locations are stored in the 1921 o (off-diagonal) submatrix. Note that the d matrix is stored in 1922 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1923 1924 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1925 plus the diagonal part of the d matrix, 1926 and o_nz should indicate the number of block nonzeros per row in the o matrix 1927 1928 In general, for PDE problems in which most nonzeros are near the diagonal, 1929 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1930 or you will get TERRIBLE performance; see the users' manual chapter on 1931 matrices. 1932 1933 Level: intermediate 1934 1935 .keywords: matrix, block, aij, compressed row, sparse, parallel 1936 1937 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 1938 @*/ 1939 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1940 { 1941 PetscErrorCode ierr; 1942 1943 PetscFunctionBegin; 1944 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1945 PetscValidType(B,1); 1946 PetscValidLogicalCollectiveInt(B,bs,2); 1947 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatCreateSBAIJ" 1953 /*@C 1954 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1955 (block compressed row). For good matrix assembly performance 1956 the user should preallocate the matrix storage by setting the parameters 1957 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1958 performance can be increased by more than a factor of 50. 1959 1960 Collective on MPI_Comm 1961 1962 Input Parameters: 1963 + comm - MPI communicator 1964 . bs - size of blockk 1965 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1966 This value should be the same as the local size used in creating the 1967 y vector for the matrix-vector product y = Ax. 1968 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1969 This value should be the same as the local size used in creating the 1970 x vector for the matrix-vector product y = Ax. 1971 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1972 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1973 . d_nz - number of block nonzeros per block row in diagonal portion of local 1974 submatrix (same for all local rows) 1975 . d_nnz - array containing the number of block nonzeros in the various block rows 1976 in the upper triangular portion of the in diagonal portion of the local 1977 (possibly different for each block block row) or PETSC_NULL. 1978 If you plan to factor the matrix you must leave room for the diagonal entry and 1979 set its value even if it is zero. 1980 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1981 submatrix (same for all local rows). 1982 - o_nnz - array containing the number of nonzeros in the various block rows of the 1983 off-diagonal portion of the local submatrix (possibly different for 1984 each block row) or PETSC_NULL. 1985 1986 Output Parameter: 1987 . A - the matrix 1988 1989 Options Database Keys: 1990 . -mat_no_unroll - uses code that does not unroll the loops in the 1991 block calculations (much slower) 1992 . -mat_block_size - size of the blocks to use 1993 . -mat_mpi - use the parallel matrix data structures even on one processor 1994 (defaults to using SeqBAIJ format on one processor) 1995 1996 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1997 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1998 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1999 2000 Notes: 2001 The number of rows and columns must be divisible by blocksize. 2002 This matrix type does not support complex Hermitian operation. 2003 2004 The user MUST specify either the local or global matrix dimensions 2005 (possibly both). 2006 2007 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2008 than it must be used on all processors that share the object for that argument. 2009 2010 If the *_nnz parameter is given then the *_nz parameter is ignored 2011 2012 Storage Information: 2013 For a square global matrix we define each processor's diagonal portion 2014 to be its local rows and the corresponding columns (a square submatrix); 2015 each processor's off-diagonal portion encompasses the remainder of the 2016 local matrix (a rectangular submatrix). 2017 2018 The user can specify preallocated storage for the diagonal part of 2019 the local submatrix with either d_nz or d_nnz (not both). Set 2020 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2021 memory allocation. Likewise, specify preallocated storage for the 2022 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2023 2024 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2025 the figure below we depict these three local rows and all columns (0-11). 2026 2027 .vb 2028 0 1 2 3 4 5 6 7 8 9 10 11 2029 ------------------- 2030 row 3 | . . . d d d o o o o o o 2031 row 4 | . . . d d d o o o o o o 2032 row 5 | . . . d d d o o o o o o 2033 ------------------- 2034 .ve 2035 2036 Thus, any entries in the d locations are stored in the d (diagonal) 2037 submatrix, and any entries in the o locations are stored in the 2038 o (off-diagonal) submatrix. Note that the d matrix is stored in 2039 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2040 2041 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2042 plus the diagonal part of the d matrix, 2043 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2044 In general, for PDE problems in which most nonzeros are near the diagonal, 2045 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2046 or you will get TERRIBLE performance; see the users' manual chapter on 2047 matrices. 2048 2049 Level: intermediate 2050 2051 .keywords: matrix, block, aij, compressed row, sparse, parallel 2052 2053 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2054 @*/ 2055 2056 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2057 { 2058 PetscErrorCode ierr; 2059 PetscMPIInt size; 2060 2061 PetscFunctionBegin; 2062 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2063 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2064 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2065 if (size > 1) { 2066 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2067 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2068 } else { 2069 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2070 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2078 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2079 { 2080 Mat mat; 2081 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2082 PetscErrorCode ierr; 2083 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2084 PetscScalar *array; 2085 2086 PetscFunctionBegin; 2087 *newmat = 0; 2088 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2089 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2090 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2091 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2092 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2093 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2094 2095 mat->factortype = matin->factortype; 2096 mat->preallocated = PETSC_TRUE; 2097 mat->assembled = PETSC_TRUE; 2098 mat->insertmode = NOT_SET_VALUES; 2099 2100 a = (Mat_MPISBAIJ*)mat->data; 2101 a->bs2 = oldmat->bs2; 2102 a->mbs = oldmat->mbs; 2103 a->nbs = oldmat->nbs; 2104 a->Mbs = oldmat->Mbs; 2105 a->Nbs = oldmat->Nbs; 2106 2107 2108 a->size = oldmat->size; 2109 a->rank = oldmat->rank; 2110 a->donotstash = oldmat->donotstash; 2111 a->roworiented = oldmat->roworiented; 2112 a->rowindices = 0; 2113 a->rowvalues = 0; 2114 a->getrowactive = PETSC_FALSE; 2115 a->barray = 0; 2116 a->rstartbs = oldmat->rstartbs; 2117 a->rendbs = oldmat->rendbs; 2118 a->cstartbs = oldmat->cstartbs; 2119 a->cendbs = oldmat->cendbs; 2120 2121 /* hash table stuff */ 2122 a->ht = 0; 2123 a->hd = 0; 2124 a->ht_size = 0; 2125 a->ht_flag = oldmat->ht_flag; 2126 a->ht_fact = oldmat->ht_fact; 2127 a->ht_total_ct = 0; 2128 a->ht_insert_ct = 0; 2129 2130 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2131 if (oldmat->colmap) { 2132 #if defined (PETSC_USE_CTABLE) 2133 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2134 #else 2135 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2136 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2137 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2138 #endif 2139 } else a->colmap = 0; 2140 2141 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2142 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2143 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2144 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2145 } else a->garray = 0; 2146 2147 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2148 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2149 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2150 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2151 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2152 2153 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2154 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2155 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2156 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2157 2158 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2159 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2160 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2161 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2162 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2163 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2164 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2165 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2166 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2167 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2168 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2169 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2170 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2171 2172 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2173 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2174 a->sMvctx = oldmat->sMvctx; 2175 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2176 2177 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2178 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2179 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2180 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2181 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2182 *newmat = mat; 2183 PetscFunctionReturn(0); 2184 } 2185 2186 #undef __FUNCT__ 2187 #define __FUNCT__ "MatLoad_MPISBAIJ" 2188 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2189 { 2190 PetscErrorCode ierr; 2191 PetscInt i,nz,j,rstart,rend; 2192 PetscScalar *vals,*buf; 2193 MPI_Comm comm = ((PetscObject)viewer)->comm; 2194 MPI_Status status; 2195 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2196 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2197 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2198 PetscInt bs=1,Mbs,mbs,extra_rows; 2199 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2200 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2201 int fd; 2202 2203 PetscFunctionBegin; 2204 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2205 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2206 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2207 2208 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2209 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2210 if (!rank) { 2211 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2212 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2213 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2214 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2215 } 2216 2217 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2218 2219 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2220 M = header[1]; N = header[2]; 2221 2222 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2223 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2224 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2225 2226 /* If global sizes are set, check if they are consistent with that given in the file */ 2227 if (sizesset) { 2228 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2229 } 2230 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2231 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2232 2233 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2234 2235 /* 2236 This code adds extra rows to make sure the number of rows is 2237 divisible by the blocksize 2238 */ 2239 Mbs = M/bs; 2240 extra_rows = bs - M + bs*(Mbs); 2241 if (extra_rows == bs) extra_rows = 0; 2242 else Mbs++; 2243 if (extra_rows &&!rank) { 2244 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2245 } 2246 2247 /* determine ownership of all rows */ 2248 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2249 mbs = Mbs/size + ((Mbs % size) > rank); 2250 m = mbs*bs; 2251 } else { /* User Set */ 2252 m = newmat->rmap->n; 2253 mbs = m/bs; 2254 } 2255 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2256 mmbs = PetscMPIIntCast(mbs); 2257 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2258 rowners[0] = 0; 2259 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2260 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2261 rstart = rowners[rank]; 2262 rend = rowners[rank+1]; 2263 2264 /* distribute row lengths to all processors */ 2265 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2266 if (!rank) { 2267 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2268 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2269 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2270 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2271 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2272 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2273 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2274 } else { 2275 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2276 } 2277 2278 if (!rank) { /* procs[0] */ 2279 /* calculate the number of nonzeros on each processor */ 2280 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2281 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2282 for (i=0; i<size; i++) { 2283 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2284 procsnz[i] += rowlengths[j]; 2285 } 2286 } 2287 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2288 2289 /* determine max buffer needed and allocate it */ 2290 maxnz = 0; 2291 for (i=0; i<size; i++) { 2292 maxnz = PetscMax(maxnz,procsnz[i]); 2293 } 2294 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2295 2296 /* read in my part of the matrix column indices */ 2297 nz = procsnz[0]; 2298 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2299 mycols = ibuf; 2300 if (size == 1) nz -= extra_rows; 2301 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2302 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2303 2304 /* read in every ones (except the last) and ship off */ 2305 for (i=1; i<size-1; i++) { 2306 nz = procsnz[i]; 2307 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2308 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2309 } 2310 /* read in the stuff for the last proc */ 2311 if (size != 1) { 2312 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2313 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2314 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2315 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2316 } 2317 ierr = PetscFree(cols);CHKERRQ(ierr); 2318 } else { /* procs[i], i>0 */ 2319 /* determine buffer space needed for message */ 2320 nz = 0; 2321 for (i=0; i<m; i++) { 2322 nz += locrowlens[i]; 2323 } 2324 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2325 mycols = ibuf; 2326 /* receive message of column indices*/ 2327 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2328 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2329 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2330 } 2331 2332 /* loop over local rows, determining number of off diagonal entries */ 2333 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2334 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2335 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2336 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2337 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2338 rowcount = 0; 2339 nzcount = 0; 2340 for (i=0; i<mbs; i++) { 2341 dcount = 0; 2342 odcount = 0; 2343 for (j=0; j<bs; j++) { 2344 kmax = locrowlens[rowcount]; 2345 for (k=0; k<kmax; k++) { 2346 tmp = mycols[nzcount++]/bs; /* block col. index */ 2347 if (!mask[tmp]) { 2348 mask[tmp] = 1; 2349 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2350 else masked1[dcount++] = tmp; /* entry in diag portion */ 2351 } 2352 } 2353 rowcount++; 2354 } 2355 2356 dlens[i] = dcount; /* d_nzz[i] */ 2357 odlens[i] = odcount; /* o_nzz[i] */ 2358 2359 /* zero out the mask elements we set */ 2360 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2361 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2362 } 2363 if (!sizesset) { 2364 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2365 } 2366 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2367 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2368 2369 if (!rank) { 2370 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2371 /* read in my part of the matrix numerical values */ 2372 nz = procsnz[0]; 2373 vals = buf; 2374 mycols = ibuf; 2375 if (size == 1) nz -= extra_rows; 2376 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2377 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2378 2379 /* insert into matrix */ 2380 jj = rstart*bs; 2381 for (i=0; i<m; i++) { 2382 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2383 mycols += locrowlens[i]; 2384 vals += locrowlens[i]; 2385 jj++; 2386 } 2387 2388 /* read in other processors (except the last one) and ship out */ 2389 for (i=1; i<size-1; i++) { 2390 nz = procsnz[i]; 2391 vals = buf; 2392 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2393 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2394 } 2395 /* the last proc */ 2396 if (size != 1) { 2397 nz = procsnz[i] - extra_rows; 2398 vals = buf; 2399 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2400 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2401 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2402 } 2403 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2404 2405 } else { 2406 /* receive numeric values */ 2407 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2408 2409 /* receive message of values*/ 2410 vals = buf; 2411 mycols = ibuf; 2412 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2413 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2414 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2415 2416 /* insert into matrix */ 2417 jj = rstart*bs; 2418 for (i=0; i<m; i++) { 2419 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2420 mycols += locrowlens[i]; 2421 vals += locrowlens[i]; 2422 jj++; 2423 } 2424 } 2425 2426 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2427 ierr = PetscFree(buf);CHKERRQ(ierr); 2428 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2429 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2430 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2431 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2432 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2433 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 #undef __FUNCT__ 2438 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2439 /*XXXXX@ 2440 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2441 2442 Input Parameters: 2443 . mat - the matrix 2444 . fact - factor 2445 2446 Not Collective on Mat, each process can have a different hash factor 2447 2448 Level: advanced 2449 2450 Notes: 2451 This can also be set by the command line option: -mat_use_hash_table fact 2452 2453 .keywords: matrix, hashtable, factor, HT 2454 2455 .seealso: MatSetOption() 2456 @XXXXX*/ 2457 2458 2459 #undef __FUNCT__ 2460 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2461 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2462 { 2463 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2464 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2465 PetscReal atmp; 2466 PetscReal *work,*svalues,*rvalues; 2467 PetscErrorCode ierr; 2468 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2469 PetscMPIInt rank,size; 2470 PetscInt *rowners_bs,dest,count,source; 2471 PetscScalar *va; 2472 MatScalar *ba; 2473 MPI_Status stat; 2474 2475 PetscFunctionBegin; 2476 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2477 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2478 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2479 2480 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2481 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2482 2483 bs = A->rmap->bs; 2484 mbs = a->mbs; 2485 Mbs = a->Mbs; 2486 ba = b->a; 2487 bi = b->i; 2488 bj = b->j; 2489 2490 /* find ownerships */ 2491 rowners_bs = A->rmap->range; 2492 2493 /* each proc creates an array to be distributed */ 2494 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2495 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2496 2497 /* row_max for B */ 2498 if (rank != size-1) { 2499 for (i=0; i<mbs; i++) { 2500 ncols = bi[1] - bi[0]; bi++; 2501 brow = bs*i; 2502 for (j=0; j<ncols; j++) { 2503 bcol = bs*(*bj); 2504 for (kcol=0; kcol<bs; kcol++) { 2505 col = bcol + kcol; /* local col index */ 2506 col += rowners_bs[rank+1]; /* global col index */ 2507 for (krow=0; krow<bs; krow++) { 2508 atmp = PetscAbsScalar(*ba); ba++; 2509 row = brow + krow; /* local row index */ 2510 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2511 if (work[col] < atmp) work[col] = atmp; 2512 } 2513 } 2514 bj++; 2515 } 2516 } 2517 2518 /* send values to its owners */ 2519 for (dest=rank+1; dest<size; dest++) { 2520 svalues = work + rowners_bs[dest]; 2521 count = rowners_bs[dest+1]-rowners_bs[dest]; 2522 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2523 } 2524 } 2525 2526 /* receive values */ 2527 if (rank) { 2528 rvalues = work; 2529 count = rowners_bs[rank+1]-rowners_bs[rank]; 2530 for (source=0; source<rank; source++) { 2531 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2532 /* process values */ 2533 for (i=0; i<count; i++) { 2534 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2535 } 2536 } 2537 } 2538 2539 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2540 ierr = PetscFree(work);CHKERRQ(ierr); 2541 PetscFunctionReturn(0); 2542 } 2543 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "MatSOR_MPISBAIJ" 2546 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2547 { 2548 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2549 PetscErrorCode ierr; 2550 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2551 PetscScalar *x,*ptr,*from; 2552 Vec bb1; 2553 const PetscScalar *b; 2554 2555 PetscFunctionBegin; 2556 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2557 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2558 2559 if (flag == SOR_APPLY_UPPER) { 2560 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2561 PetscFunctionReturn(0); 2562 } 2563 2564 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2565 if (flag & SOR_ZERO_INITIAL_GUESS) { 2566 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2567 its--; 2568 } 2569 2570 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2571 while (its--) { 2572 2573 /* lower triangular part: slvec0b = - B^T*xx */ 2574 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2575 2576 /* copy xx into slvec0a */ 2577 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2578 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2579 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2580 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2581 2582 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2583 2584 /* copy bb into slvec1a */ 2585 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2586 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2587 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2588 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2589 2590 /* set slvec1b = 0 */ 2591 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2592 2593 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2594 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2595 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2596 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2597 2598 /* upper triangular part: bb1 = bb1 - B*x */ 2599 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2600 2601 /* local diagonal sweep */ 2602 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2603 } 2604 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2605 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2607 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2608 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2609 } else if (flag & SOR_EISENSTAT) { 2610 Vec xx1; 2611 PetscBool hasop; 2612 const PetscScalar *diag; 2613 PetscScalar *sl,scale = (omega - 2.0)/omega; 2614 PetscInt i,n; 2615 2616 if (!mat->xx1) { 2617 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2618 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2619 } 2620 xx1 = mat->xx1; 2621 bb1 = mat->bb1; 2622 2623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2624 2625 if (!mat->diag) { 2626 /* this is wrong for same matrix with new nonzero values */ 2627 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2628 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2629 } 2630 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2631 2632 if (hasop) { 2633 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2634 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2635 } else { 2636 /* 2637 These two lines are replaced by code that may be a bit faster for a good compiler 2638 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2639 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2640 */ 2641 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2642 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2643 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2644 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2645 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2646 if (omega == 1.0) { 2647 for (i=0; i<n; i++) { 2648 sl[i] = b[i] - diag[i]*x[i]; 2649 } 2650 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2651 } else { 2652 for (i=0; i<n; i++) { 2653 sl[i] = b[i] + scale*diag[i]*x[i]; 2654 } 2655 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2656 } 2657 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2658 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2659 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2660 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2661 } 2662 2663 /* multiply off-diagonal portion of matrix */ 2664 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2665 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2666 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2667 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2668 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2669 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2670 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2671 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2672 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2673 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2674 2675 /* local sweep */ 2676 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2677 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2678 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2679 PetscFunctionReturn(0); 2680 } 2681 2682 #undef __FUNCT__ 2683 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2684 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2685 { 2686 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2687 PetscErrorCode ierr; 2688 Vec lvec1,bb1; 2689 2690 PetscFunctionBegin; 2691 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2692 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2693 2694 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2695 if (flag & SOR_ZERO_INITIAL_GUESS) { 2696 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2697 its--; 2698 } 2699 2700 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2701 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2702 while (its--) { 2703 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2704 2705 /* lower diagonal part: bb1 = bb - B^T*xx */ 2706 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2707 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2708 2709 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2710 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2711 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2712 2713 /* upper diagonal part: bb1 = bb1 - B*x */ 2714 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2715 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2716 2717 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2718 2719 /* diagonal sweep */ 2720 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2721 } 2722 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2723 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2724 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2725 PetscFunctionReturn(0); 2726 } 2727 2728 #undef __FUNCT__ 2729 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2730 /*@ 2731 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2732 CSR format the local rows. 2733 2734 Collective on MPI_Comm 2735 2736 Input Parameters: 2737 + comm - MPI communicator 2738 . bs - the block size, only a block size of 1 is supported 2739 . m - number of local rows (Cannot be PETSC_DECIDE) 2740 . n - This value should be the same as the local size used in creating the 2741 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2742 calculated if N is given) For square matrices n is almost always m. 2743 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2744 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2745 . i - row indices 2746 . j - column indices 2747 - a - matrix values 2748 2749 Output Parameter: 2750 . mat - the matrix 2751 2752 Level: intermediate 2753 2754 Notes: 2755 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2756 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2757 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2758 2759 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2760 2761 .keywords: matrix, aij, compressed row, sparse, parallel 2762 2763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2764 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2765 @*/ 2766 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2767 { 2768 PetscErrorCode ierr; 2769 2770 2771 PetscFunctionBegin; 2772 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2773 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2774 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2775 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2776 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2777 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2778 PetscFunctionReturn(0); 2779 } 2780 2781 2782 #undef __FUNCT__ 2783 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2784 /*@C 2785 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2786 (the default parallel PETSc format). 2787 2788 Collective on MPI_Comm 2789 2790 Input Parameters: 2791 + A - the matrix 2792 . bs - the block size 2793 . i - the indices into j for the start of each local row (starts with zero) 2794 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2795 - v - optional values in the matrix 2796 2797 Level: developer 2798 2799 .keywords: matrix, aij, compressed row, sparse, parallel 2800 2801 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2802 @*/ 2803 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2804 { 2805 PetscErrorCode ierr; 2806 2807 PetscFunctionBegin; 2808 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2809 PetscFunctionReturn(0); 2810 } 2811 2812 2813