1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 EXTERN_C_BEGIN 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 26 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 27 { 28 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 29 PetscErrorCode ierr; 30 31 PetscFunctionBegin; 32 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 33 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN_C_BEGIN 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 EXTERN_C_END 52 53 54 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 55 { \ 56 \ 57 brow = row/bs; \ 58 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 59 rmax = aimax[brow]; nrow = ailen[brow]; \ 60 bcol = col/bs; \ 61 ridx = row % bs; cidx = col % bs; \ 62 low = 0; high = nrow; \ 63 while (high-low > 3) { \ 64 t = (low+high)/2; \ 65 if (rp[t] > bcol) high = t; \ 66 else low = t; \ 67 } \ 68 for (_i=low; _i<high; _i++) { \ 69 if (rp[_i] > bcol) break; \ 70 if (rp[_i] == bcol) { \ 71 bap = ap + bs2*_i + bs*cidx + ridx; \ 72 if (addv == ADD_VALUES) *bap += value; \ 73 else *bap = value; \ 74 goto a_noinsert; \ 75 } \ 76 } \ 77 if (a->nonew == 1) goto a_noinsert; \ 78 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 79 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 80 N = nrow++ - 1; \ 81 /* shift up all the later entries in this row */ \ 82 for (ii=N; ii>=_i; ii--) { \ 83 rp[ii+1] = rp[ii]; \ 84 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 85 } \ 86 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 87 rp[_i] = bcol; \ 88 ap[bs2*_i + bs*cidx + ridx] = value; \ 89 a_noinsert:; \ 90 ailen[brow] = nrow; \ 91 } 92 93 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 94 { \ 95 brow = row/bs; \ 96 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 97 rmax = bimax[brow]; nrow = bilen[brow]; \ 98 bcol = col/bs; \ 99 ridx = row % bs; cidx = col % bs; \ 100 low = 0; high = nrow; \ 101 while (high-low > 3) { \ 102 t = (low+high)/2; \ 103 if (rp[t] > bcol) high = t; \ 104 else low = t; \ 105 } \ 106 for (_i=low; _i<high; _i++) { \ 107 if (rp[_i] > bcol) break; \ 108 if (rp[_i] == bcol) { \ 109 bap = ap + bs2*_i + bs*cidx + ridx; \ 110 if (addv == ADD_VALUES) *bap += value; \ 111 else *bap = value; \ 112 goto b_noinsert; \ 113 } \ 114 } \ 115 if (b->nonew == 1) goto b_noinsert; \ 116 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 117 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 118 N = nrow++ - 1; \ 119 /* shift up all the later entries in this row */ \ 120 for (ii=N; ii>=_i; ii--) { \ 121 rp[ii+1] = rp[ii]; \ 122 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 } \ 124 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 125 rp[_i] = bcol; \ 126 ap[bs2*_i + bs*cidx + ridx] = value; \ 127 b_noinsert:; \ 128 bilen[brow] = nrow; \ 129 } 130 131 /* Only add/insert a(i,j) with i<=j (blocks). 132 Any a(i,j) with i>j input by user is ingored. 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetValues_MPISBAIJ" 136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 137 { 138 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 139 MatScalar value; 140 PetscBool roworiented = baij->roworiented; 141 PetscErrorCode ierr; 142 PetscInt i,j,row,col; 143 PetscInt rstart_orig=mat->rmap->rstart; 144 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 145 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 146 147 /* Some Variables required in the macro */ 148 Mat A = baij->A; 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 150 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 151 MatScalar *aa=a->a; 152 153 Mat B = baij->B; 154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 155 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 156 MatScalar *ba=b->a; 157 158 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 159 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 160 MatScalar *ap,*bap; 161 162 /* for stash */ 163 PetscInt n_loc, *in_loc = PETSC_NULL; 164 MatScalar *v_loc = PETSC_NULL; 165 166 PetscFunctionBegin; 167 if (v) PetscValidScalarPointer(v,6); 168 if (!baij->donotstash) { 169 if (n > baij->n_loc) { 170 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 171 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 172 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 174 baij->n_loc = n; 175 } 176 in_loc = baij->in_loc; 177 v_loc = baij->v_loc; 178 } 179 180 for (i=0; i<m; i++) { 181 if (im[i] < 0) continue; 182 #if defined(PETSC_USE_DEBUG) 183 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 184 #endif 185 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 186 row = im[i] - rstart_orig; /* local row index */ 187 for (j=0; j<n; j++) { 188 if (im[i]/bs > in[j]/bs) { 189 if (a->ignore_ltriangular) { 190 continue; /* ignore lower triangular blocks */ 191 } else { 192 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 193 } 194 } 195 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 196 col = in[j] - cstart_orig; /* local col index */ 197 brow = row/bs; bcol = col/bs; 198 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 199 if (roworiented) value = v[i*n+j]; 200 else value = v[i+j*m]; 201 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 202 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 203 } else if (in[j] < 0) continue; 204 #if defined(PETSC_USE_DEBUG) 205 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 206 #endif 207 else { /* off-diag entry (B) */ 208 if (mat->was_assembled) { 209 if (!baij->colmap) { 210 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 211 } 212 #if defined (PETSC_USE_CTABLE) 213 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 214 col = col - 1; 215 #else 216 col = baij->colmap[in[j]/bs] - 1; 217 #endif 218 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 219 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 220 col = in[j]; 221 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 222 B = baij->B; 223 b = (Mat_SeqBAIJ*)(B)->data; 224 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 225 ba=b->a; 226 } else col += in[j]%bs; 227 } else col = in[j]; 228 if (roworiented) value = v[i*n+j]; 229 else value = v[i+j*m]; 230 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } 233 } 234 } else { /* off processor entry */ 235 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 236 if (!baij->donotstash) { 237 mat->assembled = PETSC_FALSE; 238 n_loc = 0; 239 for (j=0; j<n; j++) { 240 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 241 in_loc[n_loc] = in[j]; 242 if (roworiented) { 243 v_loc[n_loc] = v[i*n+j]; 244 } else { 245 v_loc[n_loc] = v[j*m+i]; 246 } 247 n_loc++; 248 } 249 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 250 } 251 } 252 } 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 259 { 260 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 261 const MatScalar *value; 262 MatScalar *barray=baij->barray; 263 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 264 PetscErrorCode ierr; 265 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 266 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 267 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 268 269 PetscFunctionBegin; 270 if (!barray) { 271 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 272 baij->barray = barray; 273 } 274 275 if (roworiented) { 276 stepval = (n-1)*bs; 277 } else { 278 stepval = (m-1)*bs; 279 } 280 for (i=0; i<m; i++) { 281 if (im[i] < 0) continue; 282 #if defined(PETSC_USE_DEBUG) 283 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 284 #endif 285 if (im[i] >= rstart && im[i] < rend) { 286 row = im[i] - rstart; 287 for (j=0; j<n; j++) { 288 if (im[i] > in[j]) { 289 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 290 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 291 } 292 /* If NumCol = 1 then a copy is not required */ 293 if ((roworiented) && (n == 1)) { 294 barray = (MatScalar*) v + i*bs2; 295 } else if ((!roworiented) && (m == 1)) { 296 barray = (MatScalar*) v + j*bs2; 297 } else { /* Here a copy is required */ 298 if (roworiented) { 299 value = v + i*(stepval+bs)*bs + j*bs; 300 } else { 301 value = v + j*(stepval+bs)*bs + i*bs; 302 } 303 for (ii=0; ii<bs; ii++,value+=stepval) { 304 for (jj=0; jj<bs; jj++) { 305 *barray++ = *value++; 306 } 307 } 308 barray -=bs2; 309 } 310 311 if (in[j] >= cstart && in[j] < cend) { 312 col = in[j] - cstart; 313 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 314 } 315 else if (in[j] < 0) continue; 316 #if defined(PETSC_USE_DEBUG) 317 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 318 #endif 319 else { 320 if (mat->was_assembled) { 321 if (!baij->colmap) { 322 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 323 } 324 325 #if defined(PETSC_USE_DEBUG) 326 #if defined (PETSC_USE_CTABLE) 327 { PetscInt data; 328 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 329 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 } 331 #else 332 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 333 #endif 334 #endif 335 #if defined (PETSC_USE_CTABLE) 336 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 337 col = (col - 1)/bs; 338 #else 339 col = (baij->colmap[in[j]] - 1)/bs; 340 #endif 341 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 342 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 343 col = in[j]; 344 } 345 } 346 else col = in[j]; 347 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 348 } 349 } 350 } else { 351 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 352 if (!baij->donotstash) { 353 if (roworiented) { 354 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 355 } else { 356 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 357 } 358 } 359 } 360 } 361 PetscFunctionReturn(0); 362 } 363 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatGetValues_MPISBAIJ" 366 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 367 { 368 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 369 PetscErrorCode ierr; 370 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 371 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 372 373 PetscFunctionBegin; 374 for (i=0; i<m; i++) { 375 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 376 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 377 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 378 row = idxm[i] - bsrstart; 379 for (j=0; j<n; j++) { 380 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 381 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 382 if (idxn[j] >= bscstart && idxn[j] < bscend) { 383 col = idxn[j] - bscstart; 384 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 385 } else { 386 if (!baij->colmap) { 387 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 388 } 389 #if defined (PETSC_USE_CTABLE) 390 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 391 data --; 392 #else 393 data = baij->colmap[idxn[j]/bs]-1; 394 #endif 395 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 396 else { 397 col = data + idxn[j]%bs; 398 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 399 } 400 } 401 } 402 } else { 403 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 404 } 405 } 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatNorm_MPISBAIJ" 411 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 412 { 413 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 414 PetscErrorCode ierr; 415 PetscReal sum[2],*lnorm2; 416 417 PetscFunctionBegin; 418 if (baij->size == 1) { 419 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 420 } else { 421 if (type == NORM_FROBENIUS) { 422 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 423 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 424 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 425 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 426 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 427 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 428 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 429 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 430 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 431 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 432 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 433 PetscReal *rsum,*rsum2,vabs; 434 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 435 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 436 MatScalar *v; 437 438 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 439 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 440 /* Amat */ 441 v = amat->a; jj = amat->j; 442 for (brow=0; brow<mbs; brow++) { 443 grow = bs*(rstart + brow); 444 nz = amat->i[brow+1] - amat->i[brow]; 445 for (bcol=0; bcol<nz; bcol++) { 446 gcol = bs*(rstart + *jj); jj++; 447 for (col=0; col<bs; col++) { 448 for (row=0; row<bs; row++) { 449 vabs = PetscAbsScalar(*v); v++; 450 rsum[gcol+col] += vabs; 451 /* non-diagonal block */ 452 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 453 } 454 } 455 } 456 } 457 /* Bmat */ 458 v = bmat->a; jj = bmat->j; 459 for (brow=0; brow<mbs; brow++) { 460 grow = bs*(rstart + brow); 461 nz = bmat->i[brow+1] - bmat->i[brow]; 462 for (bcol=0; bcol<nz; bcol++) { 463 gcol = bs*garray[*jj]; jj++; 464 for (col=0; col<bs; col++) { 465 for (row=0; row<bs; row++) { 466 vabs = PetscAbsScalar(*v); v++; 467 rsum[gcol+col] += vabs; 468 rsum[grow+row] += vabs; 469 } 470 } 471 } 472 } 473 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 474 *norm = 0.0; 475 for (col=0; col<mat->cmap->N; col++) { 476 if (rsum2[col] > *norm) *norm = rsum2[col]; 477 } 478 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 479 } else { 480 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 481 } 482 } 483 PetscFunctionReturn(0); 484 } 485 486 #undef __FUNCT__ 487 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 488 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 489 { 490 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 491 PetscErrorCode ierr; 492 PetscInt nstash,reallocs; 493 InsertMode addv; 494 495 PetscFunctionBegin; 496 if (baij->donotstash || mat->nooffprocentries) { 497 PetscFunctionReturn(0); 498 } 499 500 /* make sure all processors are either in INSERTMODE or ADDMODE */ 501 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 502 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 503 mat->insertmode = addv; /* in case this processor had no cache */ 504 505 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 506 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 507 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 508 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 509 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 510 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 511 PetscFunctionReturn(0); 512 } 513 514 #undef __FUNCT__ 515 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 516 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 517 { 518 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 519 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 520 PetscErrorCode ierr; 521 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 522 PetscInt *row,*col; 523 PetscBool other_disassembled; 524 PetscMPIInt n; 525 PetscBool r1,r2,r3; 526 MatScalar *val; 527 InsertMode addv = mat->insertmode; 528 529 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 530 PetscFunctionBegin; 531 532 if (!baij->donotstash && !mat->nooffprocentries) { 533 while (1) { 534 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 535 if (!flg) break; 536 537 for (i=0; i<n;) { 538 /* Now identify the consecutive vals belonging to the same row */ 539 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 540 if (j < n) ncols = j-i; 541 else ncols = n-i; 542 /* Now assemble all these values with a single function call */ 543 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 544 i = j; 545 } 546 } 547 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 548 /* Now process the block-stash. Since the values are stashed column-oriented, 549 set the roworiented flag to column oriented, and after MatSetValues() 550 restore the original flags */ 551 r1 = baij->roworiented; 552 r2 = a->roworiented; 553 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 554 baij->roworiented = PETSC_FALSE; 555 a->roworiented = PETSC_FALSE; 556 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 557 while (1) { 558 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 559 if (!flg) break; 560 561 for (i=0; i<n;) { 562 /* Now identify the consecutive vals belonging to the same row */ 563 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 564 if (j < n) ncols = j-i; 565 else ncols = n-i; 566 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 567 i = j; 568 } 569 } 570 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 571 baij->roworiented = r1; 572 a->roworiented = r2; 573 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 574 } 575 576 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 577 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 578 579 /* determine if any processor has disassembled, if so we must 580 also disassemble ourselfs, in order that we may reassemble. */ 581 /* 582 if nonzero structure of submatrix B cannot change then we know that 583 no processor disassembled thus we can skip this stuff 584 */ 585 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 586 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 587 if (mat->was_assembled && !other_disassembled) { 588 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 589 } 590 } 591 592 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 593 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 594 } 595 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr); 596 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 597 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 598 599 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 600 baij->rowvalues = 0; 601 602 PetscFunctionReturn(0); 603 } 604 605 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 608 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 609 { 610 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt bs = mat->rmap->bs; 613 PetscMPIInt size = baij->size,rank = baij->rank; 614 PetscBool iascii,isdraw; 615 PetscViewer sviewer; 616 PetscViewerFormat format; 617 618 PetscFunctionBegin; 619 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 620 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 621 if (iascii) { 622 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 623 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 624 MatInfo info; 625 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 626 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 628 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 629 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 630 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 631 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 632 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 633 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 634 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 635 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 636 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } else if (format == PETSC_VIEWER_ASCII_INFO) { 639 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 640 PetscFunctionReturn(0); 641 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 642 PetscFunctionReturn(0); 643 } 644 } 645 646 if (isdraw) { 647 PetscDraw draw; 648 PetscBool isnull; 649 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 650 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 651 } 652 653 if (size == 1) { 654 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 655 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 656 } else { 657 /* assemble the entire matrix onto first processor. */ 658 Mat A; 659 Mat_SeqSBAIJ *Aloc; 660 Mat_SeqBAIJ *Bloc; 661 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 662 MatScalar *a; 663 664 /* Should this be the same type as mat? */ 665 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 666 if (!rank) { 667 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 668 } else { 669 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 670 } 671 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 672 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 673 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 674 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 675 676 /* copy over the A part */ 677 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 678 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 679 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 680 681 for (i=0; i<mbs; i++) { 682 rvals[0] = bs*(baij->rstartbs + i); 683 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 684 for (j=ai[i]; j<ai[i+1]; j++) { 685 col = (baij->cstartbs+aj[j])*bs; 686 for (k=0; k<bs; k++) { 687 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 688 col++; a += bs; 689 } 690 } 691 } 692 /* copy over the B part */ 693 Bloc = (Mat_SeqBAIJ*)baij->B->data; 694 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 695 for (i=0; i<mbs; i++) { 696 697 rvals[0] = bs*(baij->rstartbs + i); 698 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 699 for (j=ai[i]; j<ai[i+1]; j++) { 700 col = baij->garray[aj[j]]*bs; 701 for (k=0; k<bs; k++) { 702 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 703 col++; a += bs; 704 } 705 } 706 } 707 ierr = PetscFree(rvals);CHKERRQ(ierr); 708 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 709 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 710 /* 711 Everyone has to call to draw the matrix since the graphics waits are 712 synchronized across all processors that share the PetscDraw object 713 */ 714 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 715 if (!rank) { 716 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 717 /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/ 718 PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ); 719 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 720 } 721 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 722 ierr = MatDestroy(&A);CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatView_MPISBAIJ" 729 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 730 { 731 PetscErrorCode ierr; 732 PetscBool iascii,isdraw,issocket,isbinary; 733 734 PetscFunctionBegin; 735 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 736 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 737 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 738 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 739 if (iascii || isdraw || issocket || isbinary) { 740 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 741 } else { 742 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 743 } 744 PetscFunctionReturn(0); 745 } 746 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatDestroy_MPISBAIJ" 749 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 750 { 751 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 752 PetscErrorCode ierr; 753 754 PetscFunctionBegin; 755 #if defined(PETSC_USE_LOG) 756 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 757 #endif 758 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 759 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 760 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 761 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 762 #if defined (PETSC_USE_CTABLE) 763 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 764 #else 765 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 766 #endif 767 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 768 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 769 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 770 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 771 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 772 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 773 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 774 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 775 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 776 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 777 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 778 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 779 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 780 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 781 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 782 #if defined(PETSC_USE_REAL_MAT_SINGLE) 783 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 784 #endif 785 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 786 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 787 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 788 ierr = PetscFree(mat->data);CHKERRQ(ierr); 789 790 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 792 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 793 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 794 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 795 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 796 PetscFunctionReturn(0); 797 } 798 799 #undef __FUNCT__ 800 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 801 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 802 { 803 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 804 PetscErrorCode ierr; 805 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 806 PetscScalar *x,*from; 807 808 PetscFunctionBegin; 809 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 810 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 811 812 /* diagonal part */ 813 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 814 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 815 816 /* subdiagonal part */ 817 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 818 819 /* copy x into the vec slvec0 */ 820 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 821 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 822 823 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 824 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 825 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 826 827 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 829 /* supperdiagonal part */ 830 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 831 PetscFunctionReturn(0); 832 } 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatMult_MPISBAIJ" 836 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 837 { 838 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 839 PetscErrorCode ierr; 840 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 841 PetscScalar *x,*from; 842 843 PetscFunctionBegin; 844 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 845 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 846 847 /* diagonal part */ 848 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 849 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 850 851 /* subdiagonal part */ 852 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 853 854 /* copy x into the vec slvec0 */ 855 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 856 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 857 858 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 859 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 860 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 861 862 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 863 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 864 /* supperdiagonal part */ 865 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 866 PetscFunctionReturn(0); 867 } 868 869 #undef __FUNCT__ 870 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 871 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 872 { 873 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 874 PetscErrorCode ierr; 875 PetscInt nt; 876 877 PetscFunctionBegin; 878 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 879 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 880 881 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 882 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 883 884 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 /* do diagonal part */ 886 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 887 /* do supperdiagonal part */ 888 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 890 /* do subdiagonal part */ 891 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 892 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 893 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 894 895 PetscFunctionReturn(0); 896 } 897 898 #undef __FUNCT__ 899 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 900 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 901 { 902 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 903 PetscErrorCode ierr; 904 PetscInt mbs=a->mbs,bs=A->rmap->bs; 905 PetscScalar *x,*from,zero=0.0; 906 907 PetscFunctionBegin; 908 /* 909 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 910 PetscSynchronizedFlush(((PetscObject)A)->comm); 911 */ 912 /* diagonal part */ 913 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 914 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 915 916 /* subdiagonal part */ 917 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 918 919 /* copy x into the vec slvec0 */ 920 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 921 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 922 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 923 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 924 925 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 926 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 927 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 929 /* supperdiagonal part */ 930 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 931 932 PetscFunctionReturn(0); 933 } 934 935 #undef __FUNCT__ 936 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 937 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 938 { 939 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 940 PetscErrorCode ierr; 941 942 PetscFunctionBegin; 943 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944 /* do diagonal part */ 945 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 946 /* do supperdiagonal part */ 947 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 949 950 /* do subdiagonal part */ 951 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 952 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 953 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 954 955 PetscFunctionReturn(0); 956 } 957 958 /* 959 This only works correctly for square matrices where the subblock A->A is the 960 diagonal block 961 */ 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 964 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 965 { 966 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 967 PetscErrorCode ierr; 968 969 PetscFunctionBegin; 970 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 971 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatScale_MPISBAIJ" 977 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 978 { 979 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 980 PetscErrorCode ierr; 981 982 PetscFunctionBegin; 983 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 984 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } 987 988 #undef __FUNCT__ 989 #define __FUNCT__ "MatGetRow_MPISBAIJ" 990 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 991 { 992 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 993 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 994 PetscErrorCode ierr; 995 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 996 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 997 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 998 999 PetscFunctionBegin; 1000 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1001 mat->getrowactive = PETSC_TRUE; 1002 1003 if (!mat->rowvalues && (idx || v)) { 1004 /* 1005 allocate enough space to hold information from the longest row. 1006 */ 1007 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1008 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1009 PetscInt max = 1,mbs = mat->mbs,tmp; 1010 for (i=0; i<mbs; i++) { 1011 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1012 if (max < tmp) { max = tmp; } 1013 } 1014 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1015 } 1016 1017 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1018 lrow = row - brstart; /* local row index */ 1019 1020 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1021 if (!v) {pvA = 0; pvB = 0;} 1022 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1023 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1024 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1025 nztot = nzA + nzB; 1026 1027 cmap = mat->garray; 1028 if (v || idx) { 1029 if (nztot) { 1030 /* Sort by increasing column numbers, assuming A and B already sorted */ 1031 PetscInt imark = -1; 1032 if (v) { 1033 *v = v_p = mat->rowvalues; 1034 for (i=0; i<nzB; i++) { 1035 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1036 else break; 1037 } 1038 imark = i; 1039 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1040 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1041 } 1042 if (idx) { 1043 *idx = idx_p = mat->rowindices; 1044 if (imark > -1) { 1045 for (i=0; i<imark; i++) { 1046 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1047 } 1048 } else { 1049 for (i=0; i<nzB; i++) { 1050 if (cmap[cworkB[i]/bs] < cstart) 1051 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1052 else break; 1053 } 1054 imark = i; 1055 } 1056 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1057 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1058 } 1059 } else { 1060 if (idx) *idx = 0; 1061 if (v) *v = 0; 1062 } 1063 } 1064 *nz = nztot; 1065 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1066 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1072 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1073 { 1074 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1075 1076 PetscFunctionBegin; 1077 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1078 baij->getrowactive = PETSC_FALSE; 1079 PetscFunctionReturn(0); 1080 } 1081 1082 #undef __FUNCT__ 1083 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1084 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1085 { 1086 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1087 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1088 1089 PetscFunctionBegin; 1090 aA->getrow_utriangular = PETSC_TRUE; 1091 PetscFunctionReturn(0); 1092 } 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1095 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1096 { 1097 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1098 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1099 1100 PetscFunctionBegin; 1101 aA->getrow_utriangular = PETSC_FALSE; 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1107 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1108 { 1109 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1110 PetscErrorCode ierr; 1111 1112 PetscFunctionBegin; 1113 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1114 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1120 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1121 { 1122 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1127 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1133 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1134 { 1135 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1136 PetscErrorCode ierr; 1137 1138 PetscFunctionBegin; 1139 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1140 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1146 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1147 { 1148 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1149 Mat A = a->A,B = a->B; 1150 PetscErrorCode ierr; 1151 PetscReal isend[5],irecv[5]; 1152 1153 PetscFunctionBegin; 1154 info->block_size = (PetscReal)matin->rmap->bs; 1155 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1156 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1157 isend[3] = info->memory; isend[4] = info->mallocs; 1158 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1159 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1160 isend[3] += info->memory; isend[4] += info->mallocs; 1161 if (flag == MAT_LOCAL) { 1162 info->nz_used = isend[0]; 1163 info->nz_allocated = isend[1]; 1164 info->nz_unneeded = isend[2]; 1165 info->memory = isend[3]; 1166 info->mallocs = isend[4]; 1167 } else if (flag == MAT_GLOBAL_MAX) { 1168 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1169 info->nz_used = irecv[0]; 1170 info->nz_allocated = irecv[1]; 1171 info->nz_unneeded = irecv[2]; 1172 info->memory = irecv[3]; 1173 info->mallocs = irecv[4]; 1174 } else if (flag == MAT_GLOBAL_SUM) { 1175 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1176 info->nz_used = irecv[0]; 1177 info->nz_allocated = irecv[1]; 1178 info->nz_unneeded = irecv[2]; 1179 info->memory = irecv[3]; 1180 info->mallocs = irecv[4]; 1181 } else { 1182 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1183 } 1184 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1185 info->fill_ratio_needed = 0; 1186 info->factor_mallocs = 0; 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1192 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1193 { 1194 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1195 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1196 PetscErrorCode ierr; 1197 1198 PetscFunctionBegin; 1199 switch (op) { 1200 case MAT_NEW_NONZERO_LOCATIONS: 1201 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1202 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1203 case MAT_KEEP_NONZERO_PATTERN: 1204 case MAT_NEW_NONZERO_LOCATION_ERR: 1205 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1206 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1207 break; 1208 case MAT_ROW_ORIENTED: 1209 a->roworiented = flg; 1210 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1211 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1212 break; 1213 case MAT_NEW_DIAGONALS: 1214 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1215 break; 1216 case MAT_IGNORE_OFF_PROC_ENTRIES: 1217 a->donotstash = flg; 1218 break; 1219 case MAT_USE_HASH_TABLE: 1220 a->ht_flag = flg; 1221 break; 1222 case MAT_HERMITIAN: 1223 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1224 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1225 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1226 break; 1227 case MAT_SPD: 1228 A->spd_set = PETSC_TRUE; 1229 A->spd = flg; 1230 if (flg) { 1231 A->symmetric = PETSC_TRUE; 1232 A->structurally_symmetric = PETSC_TRUE; 1233 A->symmetric_set = PETSC_TRUE; 1234 A->structurally_symmetric_set = PETSC_TRUE; 1235 } 1236 break; 1237 case MAT_SYMMETRIC: 1238 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1239 break; 1240 case MAT_STRUCTURALLY_SYMMETRIC: 1241 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1242 break; 1243 case MAT_SYMMETRY_ETERNAL: 1244 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1245 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1246 break; 1247 case MAT_IGNORE_LOWER_TRIANGULAR: 1248 aA->ignore_ltriangular = flg; 1249 break; 1250 case MAT_ERROR_LOWER_TRIANGULAR: 1251 aA->ignore_ltriangular = flg; 1252 break; 1253 case MAT_GETROW_UPPERTRIANGULAR: 1254 aA->getrow_utriangular = flg; 1255 break; 1256 default: 1257 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1258 } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1264 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1265 { 1266 PetscErrorCode ierr; 1267 PetscFunctionBegin; 1268 if (MAT_INITIAL_MATRIX || *B != A) { 1269 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1270 } 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #undef __FUNCT__ 1275 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1276 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1277 { 1278 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1279 Mat a=baij->A, b=baij->B; 1280 PetscErrorCode ierr; 1281 PetscInt nv,m,n; 1282 PetscBool flg; 1283 1284 PetscFunctionBegin; 1285 if (ll != rr) { 1286 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1287 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1288 } 1289 if (!ll) PetscFunctionReturn(0); 1290 1291 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1292 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1293 1294 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1295 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1296 1297 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1298 1299 /* left diagonalscale the off-diagonal part */ 1300 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1301 1302 /* scale the diagonal part */ 1303 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1304 1305 /* right diagonalscale the off-diagonal part */ 1306 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1307 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1313 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1314 { 1315 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatEqual_MPISBAIJ" 1327 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1328 { 1329 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1330 Mat a,b,c,d; 1331 PetscBool flg; 1332 PetscErrorCode ierr; 1333 1334 PetscFunctionBegin; 1335 a = matA->A; b = matA->B; 1336 c = matB->A; d = matB->B; 1337 1338 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1339 if (flg) { 1340 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1341 } 1342 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatCopy_MPISBAIJ" 1348 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1349 { 1350 PetscErrorCode ierr; 1351 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1352 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1353 1354 PetscFunctionBegin; 1355 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1356 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1357 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1358 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1359 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1360 } else { 1361 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1362 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1363 } 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1369 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1370 { 1371 PetscErrorCode ierr; 1372 1373 PetscFunctionBegin; 1374 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 #undef __FUNCT__ 1379 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1380 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1381 { 1382 PetscErrorCode ierr; 1383 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1384 PetscBLASInt bnz,one=1; 1385 Mat_SeqSBAIJ *xa,*ya; 1386 Mat_SeqBAIJ *xb,*yb; 1387 1388 PetscFunctionBegin; 1389 if (str == SAME_NONZERO_PATTERN) { 1390 PetscScalar alpha = a; 1391 xa = (Mat_SeqSBAIJ *)xx->A->data; 1392 ya = (Mat_SeqSBAIJ *)yy->A->data; 1393 bnz = PetscBLASIntCast(xa->nz); 1394 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1395 xb = (Mat_SeqBAIJ *)xx->B->data; 1396 yb = (Mat_SeqBAIJ *)yy->B->data; 1397 bnz = PetscBLASIntCast(xb->nz); 1398 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1399 } else { 1400 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1401 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1402 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1403 } 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1409 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1410 { 1411 PetscErrorCode ierr; 1412 PetscInt i; 1413 PetscBool flg; 1414 1415 PetscFunctionBegin; 1416 for (i=0; i<n; i++) { 1417 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1418 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1419 } 1420 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 1425 /* -------------------------------------------------------------------*/ 1426 static struct _MatOps MatOps_Values = { 1427 MatSetValues_MPISBAIJ, 1428 MatGetRow_MPISBAIJ, 1429 MatRestoreRow_MPISBAIJ, 1430 MatMult_MPISBAIJ, 1431 /* 4*/ MatMultAdd_MPISBAIJ, 1432 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1433 MatMultAdd_MPISBAIJ, 1434 0, 1435 0, 1436 0, 1437 /*10*/ 0, 1438 0, 1439 0, 1440 MatSOR_MPISBAIJ, 1441 MatTranspose_MPISBAIJ, 1442 /*15*/ MatGetInfo_MPISBAIJ, 1443 MatEqual_MPISBAIJ, 1444 MatGetDiagonal_MPISBAIJ, 1445 MatDiagonalScale_MPISBAIJ, 1446 MatNorm_MPISBAIJ, 1447 /*20*/ MatAssemblyBegin_MPISBAIJ, 1448 MatAssemblyEnd_MPISBAIJ, 1449 MatSetOption_MPISBAIJ, 1450 MatZeroEntries_MPISBAIJ, 1451 /*24*/ 0, 1452 0, 1453 0, 1454 0, 1455 0, 1456 /*29*/ MatSetUp_MPISBAIJ, 1457 0, 1458 0, 1459 0, 1460 0, 1461 /*34*/ MatDuplicate_MPISBAIJ, 1462 0, 1463 0, 1464 0, 1465 0, 1466 /*39*/ MatAXPY_MPISBAIJ, 1467 MatGetSubMatrices_MPISBAIJ, 1468 MatIncreaseOverlap_MPISBAIJ, 1469 MatGetValues_MPISBAIJ, 1470 MatCopy_MPISBAIJ, 1471 /*44*/ 0, 1472 MatScale_MPISBAIJ, 1473 0, 1474 0, 1475 0, 1476 /*49*/ 0, 1477 0, 1478 0, 1479 0, 1480 0, 1481 /*54*/ 0, 1482 0, 1483 MatSetUnfactored_MPISBAIJ, 1484 0, 1485 MatSetValuesBlocked_MPISBAIJ, 1486 /*59*/ 0, 1487 0, 1488 0, 1489 0, 1490 0, 1491 /*64*/ 0, 1492 0, 1493 0, 1494 0, 1495 0, 1496 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1497 0, 1498 0, 1499 0, 1500 0, 1501 /*74*/ 0, 1502 0, 1503 0, 1504 0, 1505 0, 1506 /*79*/ 0, 1507 0, 1508 0, 1509 0, 1510 MatLoad_MPISBAIJ, 1511 /*84*/ 0, 1512 0, 1513 0, 1514 0, 1515 0, 1516 /*89*/ 0, 1517 0, 1518 0, 1519 0, 1520 0, 1521 /*94*/ 0, 1522 0, 1523 0, 1524 0, 1525 0, 1526 /*99*/ 0, 1527 0, 1528 0, 1529 0, 1530 0, 1531 /*104*/0, 1532 MatRealPart_MPISBAIJ, 1533 MatImaginaryPart_MPISBAIJ, 1534 MatGetRowUpperTriangular_MPISBAIJ, 1535 MatRestoreRowUpperTriangular_MPISBAIJ, 1536 /*109*/0, 1537 0, 1538 0, 1539 0, 1540 0, 1541 /*114*/0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 /*119*/0, 1547 0, 1548 0, 1549 0 1550 }; 1551 1552 1553 EXTERN_C_BEGIN 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1556 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1557 { 1558 PetscFunctionBegin; 1559 *a = ((Mat_MPISBAIJ *)A->data)->A; 1560 PetscFunctionReturn(0); 1561 } 1562 EXTERN_C_END 1563 1564 EXTERN_C_BEGIN 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1567 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1568 { 1569 Mat_MPISBAIJ *b; 1570 PetscErrorCode ierr; 1571 PetscInt i,mbs,Mbs; 1572 1573 PetscFunctionBegin; 1574 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1575 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1576 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1577 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1578 1579 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1580 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1581 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1582 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1583 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1584 1585 if (d_nnz) { 1586 for (i=0; i<B->rmap->n/bs; i++) { 1587 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1588 } 1589 } 1590 if (o_nnz) { 1591 for (i=0; i<B->rmap->n/bs; i++) { 1592 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1593 } 1594 } 1595 1596 b = (Mat_MPISBAIJ*)B->data; 1597 mbs = B->rmap->n/bs; 1598 Mbs = B->rmap->N/bs; 1599 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1600 1601 B->rmap->bs = bs; 1602 b->bs2 = bs*bs; 1603 b->mbs = mbs; 1604 b->nbs = mbs; 1605 b->Mbs = Mbs; 1606 b->Nbs = Mbs; 1607 1608 for (i=0; i<=b->size; i++) { 1609 b->rangebs[i] = B->rmap->range[i]/bs; 1610 } 1611 b->rstartbs = B->rmap->rstart/bs; 1612 b->rendbs = B->rmap->rend/bs; 1613 1614 b->cstartbs = B->cmap->rstart/bs; 1615 b->cendbs = B->cmap->rend/bs; 1616 1617 if (!B->preallocated) { 1618 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1619 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1620 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1621 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1622 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1623 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1624 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1625 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1626 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1627 } 1628 1629 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1630 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1631 B->preallocated = PETSC_TRUE; 1632 PetscFunctionReturn(0); 1633 } 1634 EXTERN_C_END 1635 1636 EXTERN_C_BEGIN 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1639 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1640 { 1641 PetscInt m,rstart,cstart,cend; 1642 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1643 const PetscInt *JJ=0; 1644 PetscScalar *values=0; 1645 PetscErrorCode ierr; 1646 1647 PetscFunctionBegin; 1648 1649 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1650 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1651 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1652 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1653 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1654 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1655 m = B->rmap->n/bs; 1656 rstart = B->rmap->rstart/bs; 1657 cstart = B->cmap->rstart/bs; 1658 cend = B->cmap->rend/bs; 1659 1660 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1661 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 1662 for (i=0; i<m; i++) { 1663 nz = ii[i+1] - ii[i]; 1664 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1665 nz_max = PetscMax(nz_max,nz); 1666 JJ = jj + ii[i]; 1667 for (j=0; j<nz; j++) { 1668 if (*JJ >= cstart) break; 1669 JJ++; 1670 } 1671 d = 0; 1672 for (; j<nz; j++) { 1673 if (*JJ++ >= cend) break; 1674 d++; 1675 } 1676 d_nnz[i] = d; 1677 o_nnz[i] = nz - d; 1678 } 1679 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1680 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1681 1682 values = (PetscScalar*)V; 1683 if (!values) { 1684 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 1685 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1686 } 1687 for (i=0; i<m; i++) { 1688 PetscInt row = i + rstart; 1689 PetscInt ncols = ii[i+1] - ii[i]; 1690 const PetscInt *icols = jj + ii[i]; 1691 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1692 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1693 } 1694 1695 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1696 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1697 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1698 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 EXTERN_C_END 1702 1703 EXTERN_C_BEGIN 1704 #if defined(PETSC_HAVE_MUMPS) 1705 extern PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1706 #endif 1707 #if defined(PETSC_HAVE_PASTIX) 1708 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1709 #endif 1710 EXTERN_C_END 1711 1712 /*MC 1713 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1714 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1715 the matrix is stored. 1716 1717 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1718 can call MatSetOption(Mat, MAT_HERMITIAN); 1719 1720 Options Database Keys: 1721 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1722 1723 Level: beginner 1724 1725 .seealso: MatCreateMPISBAIJ 1726 M*/ 1727 1728 EXTERN_C_BEGIN 1729 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 1730 EXTERN_C_END 1731 1732 EXTERN_C_BEGIN 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatCreate_MPISBAIJ" 1735 PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1736 { 1737 Mat_MPISBAIJ *b; 1738 PetscErrorCode ierr; 1739 PetscBool flg; 1740 1741 PetscFunctionBegin; 1742 1743 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1744 B->data = (void*)b; 1745 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1746 1747 B->ops->destroy = MatDestroy_MPISBAIJ; 1748 B->ops->view = MatView_MPISBAIJ; 1749 B->assembled = PETSC_FALSE; 1750 1751 B->insertmode = NOT_SET_VALUES; 1752 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1753 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1754 1755 /* build local table of row and column ownerships */ 1756 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1757 1758 /* build cache for off array entries formed */ 1759 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1760 b->donotstash = PETSC_FALSE; 1761 b->colmap = PETSC_NULL; 1762 b->garray = PETSC_NULL; 1763 b->roworiented = PETSC_TRUE; 1764 1765 /* stuff used in block assembly */ 1766 b->barray = 0; 1767 1768 /* stuff used for matrix vector multiply */ 1769 b->lvec = 0; 1770 b->Mvctx = 0; 1771 b->slvec0 = 0; 1772 b->slvec0b = 0; 1773 b->slvec1 = 0; 1774 b->slvec1a = 0; 1775 b->slvec1b = 0; 1776 b->sMvctx = 0; 1777 1778 /* stuff for MatGetRow() */ 1779 b->rowindices = 0; 1780 b->rowvalues = 0; 1781 b->getrowactive = PETSC_FALSE; 1782 1783 /* hash table stuff */ 1784 b->ht = 0; 1785 b->hd = 0; 1786 b->ht_size = 0; 1787 b->ht_flag = PETSC_FALSE; 1788 b->ht_fact = 0; 1789 b->ht_total_ct = 0; 1790 b->ht_insert_ct = 0; 1791 1792 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1793 b->ijonly = PETSC_FALSE; 1794 1795 b->in_loc = 0; 1796 b->v_loc = 0; 1797 b->n_loc = 0; 1798 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1799 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1800 if (flg) { 1801 PetscReal fact = 1.39; 1802 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1803 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1804 if (fact <= 1.0) fact = 1.39; 1805 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1806 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1807 } 1808 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1809 1810 #if defined(PETSC_HAVE_PASTIX) 1811 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1812 "MatGetFactor_mpisbaij_pastix", 1813 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1814 #endif 1815 #if defined(PETSC_HAVE_MUMPS) 1816 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1817 "MatGetFactor_sbaij_mumps", 1818 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1819 #endif 1820 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1821 "MatStoreValues_MPISBAIJ", 1822 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1823 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1824 "MatRetrieveValues_MPISBAIJ", 1825 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1826 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1827 "MatGetDiagonalBlock_MPISBAIJ", 1828 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1829 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1830 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1831 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1832 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C", 1833 "MatMPISBAIJSetPreallocationCSR_MPISBAIJ", 1834 MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1835 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C", 1836 "MatConvert_MPISBAIJ_MPISBSTRM", 1837 MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1838 1839 B->symmetric = PETSC_TRUE; 1840 B->structurally_symmetric = PETSC_TRUE; 1841 B->symmetric_set = PETSC_TRUE; 1842 B->structurally_symmetric_set = PETSC_TRUE; 1843 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1844 PetscFunctionReturn(0); 1845 } 1846 EXTERN_C_END 1847 1848 /*MC 1849 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1850 1851 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1852 and MATMPISBAIJ otherwise. 1853 1854 Options Database Keys: 1855 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1856 1857 Level: beginner 1858 1859 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1860 M*/ 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1864 /*@C 1865 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1866 the user should preallocate the matrix storage by setting the parameters 1867 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1868 performance can be increased by more than a factor of 50. 1869 1870 Collective on Mat 1871 1872 Input Parameters: 1873 + A - the matrix 1874 . bs - size of blockk 1875 . d_nz - number of block nonzeros per block row in diagonal portion of local 1876 submatrix (same for all local rows) 1877 . d_nnz - array containing the number of block nonzeros in the various block rows 1878 in the upper triangular and diagonal part of the in diagonal portion of the local 1879 (possibly different for each block row) or PETSC_NULL. If you plan to factor the matrix you must leave room 1880 for the diagonal entry and set a value even if it is zero. 1881 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1882 submatrix (same for all local rows). 1883 - o_nnz - array containing the number of nonzeros in the various block rows of the 1884 off-diagonal portion of the local submatrix that is right of the diagonal 1885 (possibly different for each block row) or PETSC_NULL. 1886 1887 1888 Options Database Keys: 1889 . -mat_no_unroll - uses code that does not unroll the loops in the 1890 block calculations (much slower) 1891 . -mat_block_size - size of the blocks to use 1892 1893 Notes: 1894 1895 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1896 than it must be used on all processors that share the object for that argument. 1897 1898 If the *_nnz parameter is given then the *_nz parameter is ignored 1899 1900 Storage Information: 1901 For a square global matrix we define each processor's diagonal portion 1902 to be its local rows and the corresponding columns (a square submatrix); 1903 each processor's off-diagonal portion encompasses the remainder of the 1904 local matrix (a rectangular submatrix). 1905 1906 The user can specify preallocated storage for the diagonal part of 1907 the local submatrix with either d_nz or d_nnz (not both). Set 1908 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1909 memory allocation. Likewise, specify preallocated storage for the 1910 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1911 1912 You can call MatGetInfo() to get information on how effective the preallocation was; 1913 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1914 You can also run with the option -info and look for messages with the string 1915 malloc in them to see if additional memory allocation was needed. 1916 1917 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1918 the figure below we depict these three local rows and all columns (0-11). 1919 1920 .vb 1921 0 1 2 3 4 5 6 7 8 9 10 11 1922 ------------------- 1923 row 3 | . . . d d d o o o o o o 1924 row 4 | . . . d d d o o o o o o 1925 row 5 | . . . d d d o o o o o o 1926 ------------------- 1927 .ve 1928 1929 Thus, any entries in the d locations are stored in the d (diagonal) 1930 submatrix, and any entries in the o locations are stored in the 1931 o (off-diagonal) submatrix. Note that the d matrix is stored in 1932 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1933 1934 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1935 plus the diagonal part of the d matrix, 1936 and o_nz should indicate the number of block nonzeros per row in the o matrix 1937 1938 In general, for PDE problems in which most nonzeros are near the diagonal, 1939 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1940 or you will get TERRIBLE performance; see the users' manual chapter on 1941 matrices. 1942 1943 Level: intermediate 1944 1945 .keywords: matrix, block, aij, compressed row, sparse, parallel 1946 1947 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 1948 @*/ 1949 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1950 { 1951 PetscErrorCode ierr; 1952 1953 PetscFunctionBegin; 1954 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1955 PetscValidType(B,1); 1956 PetscValidLogicalCollectiveInt(B,bs,2); 1957 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatCreateSBAIJ" 1963 /*@C 1964 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1965 (block compressed row). For good matrix assembly performance 1966 the user should preallocate the matrix storage by setting the parameters 1967 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1968 performance can be increased by more than a factor of 50. 1969 1970 Collective on MPI_Comm 1971 1972 Input Parameters: 1973 + comm - MPI communicator 1974 . bs - size of blockk 1975 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1976 This value should be the same as the local size used in creating the 1977 y vector for the matrix-vector product y = Ax. 1978 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1979 This value should be the same as the local size used in creating the 1980 x vector for the matrix-vector product y = Ax. 1981 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1982 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1983 . d_nz - number of block nonzeros per block row in diagonal portion of local 1984 submatrix (same for all local rows) 1985 . d_nnz - array containing the number of block nonzeros in the various block rows 1986 in the upper triangular portion of the in diagonal portion of the local 1987 (possibly different for each block block row) or PETSC_NULL. 1988 If you plan to factor the matrix you must leave room for the diagonal entry and 1989 set its value even if it is zero. 1990 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1991 submatrix (same for all local rows). 1992 - o_nnz - array containing the number of nonzeros in the various block rows of the 1993 off-diagonal portion of the local submatrix (possibly different for 1994 each block row) or PETSC_NULL. 1995 1996 Output Parameter: 1997 . A - the matrix 1998 1999 Options Database Keys: 2000 . -mat_no_unroll - uses code that does not unroll the loops in the 2001 block calculations (much slower) 2002 . -mat_block_size - size of the blocks to use 2003 . -mat_mpi - use the parallel matrix data structures even on one processor 2004 (defaults to using SeqBAIJ format on one processor) 2005 2006 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2007 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2008 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2009 2010 Notes: 2011 The number of rows and columns must be divisible by blocksize. 2012 This matrix type does not support complex Hermitian operation. 2013 2014 The user MUST specify either the local or global matrix dimensions 2015 (possibly both). 2016 2017 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2018 than it must be used on all processors that share the object for that argument. 2019 2020 If the *_nnz parameter is given then the *_nz parameter is ignored 2021 2022 Storage Information: 2023 For a square global matrix we define each processor's diagonal portion 2024 to be its local rows and the corresponding columns (a square submatrix); 2025 each processor's off-diagonal portion encompasses the remainder of the 2026 local matrix (a rectangular submatrix). 2027 2028 The user can specify preallocated storage for the diagonal part of 2029 the local submatrix with either d_nz or d_nnz (not both). Set 2030 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2031 memory allocation. Likewise, specify preallocated storage for the 2032 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2033 2034 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2035 the figure below we depict these three local rows and all columns (0-11). 2036 2037 .vb 2038 0 1 2 3 4 5 6 7 8 9 10 11 2039 ------------------- 2040 row 3 | . . . d d d o o o o o o 2041 row 4 | . . . d d d o o o o o o 2042 row 5 | . . . d d d o o o o o o 2043 ------------------- 2044 .ve 2045 2046 Thus, any entries in the d locations are stored in the d (diagonal) 2047 submatrix, and any entries in the o locations are stored in the 2048 o (off-diagonal) submatrix. Note that the d matrix is stored in 2049 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2050 2051 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2052 plus the diagonal part of the d matrix, 2053 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2054 In general, for PDE problems in which most nonzeros are near the diagonal, 2055 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2056 or you will get TERRIBLE performance; see the users' manual chapter on 2057 matrices. 2058 2059 Level: intermediate 2060 2061 .keywords: matrix, block, aij, compressed row, sparse, parallel 2062 2063 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2064 @*/ 2065 2066 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2067 { 2068 PetscErrorCode ierr; 2069 PetscMPIInt size; 2070 2071 PetscFunctionBegin; 2072 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2073 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2074 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2075 if (size > 1) { 2076 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2077 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2078 } else { 2079 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2080 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2081 } 2082 PetscFunctionReturn(0); 2083 } 2084 2085 2086 #undef __FUNCT__ 2087 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2088 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2089 { 2090 Mat mat; 2091 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2092 PetscErrorCode ierr; 2093 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2094 PetscScalar *array; 2095 2096 PetscFunctionBegin; 2097 *newmat = 0; 2098 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2099 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2100 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2101 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2102 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2103 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2104 2105 mat->factortype = matin->factortype; 2106 mat->preallocated = PETSC_TRUE; 2107 mat->assembled = PETSC_TRUE; 2108 mat->insertmode = NOT_SET_VALUES; 2109 2110 a = (Mat_MPISBAIJ*)mat->data; 2111 a->bs2 = oldmat->bs2; 2112 a->mbs = oldmat->mbs; 2113 a->nbs = oldmat->nbs; 2114 a->Mbs = oldmat->Mbs; 2115 a->Nbs = oldmat->Nbs; 2116 2117 2118 a->size = oldmat->size; 2119 a->rank = oldmat->rank; 2120 a->donotstash = oldmat->donotstash; 2121 a->roworiented = oldmat->roworiented; 2122 a->rowindices = 0; 2123 a->rowvalues = 0; 2124 a->getrowactive = PETSC_FALSE; 2125 a->barray = 0; 2126 a->rstartbs = oldmat->rstartbs; 2127 a->rendbs = oldmat->rendbs; 2128 a->cstartbs = oldmat->cstartbs; 2129 a->cendbs = oldmat->cendbs; 2130 2131 /* hash table stuff */ 2132 a->ht = 0; 2133 a->hd = 0; 2134 a->ht_size = 0; 2135 a->ht_flag = oldmat->ht_flag; 2136 a->ht_fact = oldmat->ht_fact; 2137 a->ht_total_ct = 0; 2138 a->ht_insert_ct = 0; 2139 2140 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2141 if (oldmat->colmap) { 2142 #if defined (PETSC_USE_CTABLE) 2143 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2144 #else 2145 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2146 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2147 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2148 #endif 2149 } else a->colmap = 0; 2150 2151 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2152 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2153 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2154 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2155 } else a->garray = 0; 2156 2157 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2158 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2159 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2160 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2161 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2162 2163 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2164 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2165 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2166 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2167 2168 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2169 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2170 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2171 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2172 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2173 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2174 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2175 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2176 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2177 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2178 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2179 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2180 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2181 2182 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2183 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2184 a->sMvctx = oldmat->sMvctx; 2185 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2186 2187 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2188 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2189 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2190 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2191 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2192 *newmat = mat; 2193 PetscFunctionReturn(0); 2194 } 2195 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatLoad_MPISBAIJ" 2198 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2199 { 2200 PetscErrorCode ierr; 2201 PetscInt i,nz,j,rstart,rend; 2202 PetscScalar *vals,*buf; 2203 MPI_Comm comm = ((PetscObject)viewer)->comm; 2204 MPI_Status status; 2205 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2206 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2207 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2208 PetscInt bs=1,Mbs,mbs,extra_rows; 2209 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2210 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2211 int fd; 2212 2213 PetscFunctionBegin; 2214 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2215 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2216 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2217 2218 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2219 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2220 if (!rank) { 2221 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2222 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2223 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2224 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2225 } 2226 2227 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2228 2229 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2230 M = header[1]; N = header[2]; 2231 2232 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2233 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2234 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2235 2236 /* If global sizes are set, check if they are consistent with that given in the file */ 2237 if (sizesset) { 2238 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2239 } 2240 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2241 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2242 2243 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2244 2245 /* 2246 This code adds extra rows to make sure the number of rows is 2247 divisible by the blocksize 2248 */ 2249 Mbs = M/bs; 2250 extra_rows = bs - M + bs*(Mbs); 2251 if (extra_rows == bs) extra_rows = 0; 2252 else Mbs++; 2253 if (extra_rows &&!rank) { 2254 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2255 } 2256 2257 /* determine ownership of all rows */ 2258 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2259 mbs = Mbs/size + ((Mbs % size) > rank); 2260 m = mbs*bs; 2261 } else { /* User Set */ 2262 m = newmat->rmap->n; 2263 mbs = m/bs; 2264 } 2265 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2266 mmbs = PetscMPIIntCast(mbs); 2267 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2268 rowners[0] = 0; 2269 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2270 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2271 rstart = rowners[rank]; 2272 rend = rowners[rank+1]; 2273 2274 /* distribute row lengths to all processors */ 2275 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2276 if (!rank) { 2277 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2278 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2279 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2280 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2281 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2282 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2283 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2284 } else { 2285 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2286 } 2287 2288 if (!rank) { /* procs[0] */ 2289 /* calculate the number of nonzeros on each processor */ 2290 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2291 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2292 for (i=0; i<size; i++) { 2293 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2294 procsnz[i] += rowlengths[j]; 2295 } 2296 } 2297 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2298 2299 /* determine max buffer needed and allocate it */ 2300 maxnz = 0; 2301 for (i=0; i<size; i++) { 2302 maxnz = PetscMax(maxnz,procsnz[i]); 2303 } 2304 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2305 2306 /* read in my part of the matrix column indices */ 2307 nz = procsnz[0]; 2308 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2309 mycols = ibuf; 2310 if (size == 1) nz -= extra_rows; 2311 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2312 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2313 2314 /* read in every ones (except the last) and ship off */ 2315 for (i=1; i<size-1; i++) { 2316 nz = procsnz[i]; 2317 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2318 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2319 } 2320 /* read in the stuff for the last proc */ 2321 if (size != 1) { 2322 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2323 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2324 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2325 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2326 } 2327 ierr = PetscFree(cols);CHKERRQ(ierr); 2328 } else { /* procs[i], i>0 */ 2329 /* determine buffer space needed for message */ 2330 nz = 0; 2331 for (i=0; i<m; i++) { 2332 nz += locrowlens[i]; 2333 } 2334 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2335 mycols = ibuf; 2336 /* receive message of column indices*/ 2337 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2338 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2339 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2340 } 2341 2342 /* loop over local rows, determining number of off diagonal entries */ 2343 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2344 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2345 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2346 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2347 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2348 rowcount = 0; 2349 nzcount = 0; 2350 for (i=0; i<mbs; i++) { 2351 dcount = 0; 2352 odcount = 0; 2353 for (j=0; j<bs; j++) { 2354 kmax = locrowlens[rowcount]; 2355 for (k=0; k<kmax; k++) { 2356 tmp = mycols[nzcount++]/bs; /* block col. index */ 2357 if (!mask[tmp]) { 2358 mask[tmp] = 1; 2359 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2360 else masked1[dcount++] = tmp; /* entry in diag portion */ 2361 } 2362 } 2363 rowcount++; 2364 } 2365 2366 dlens[i] = dcount; /* d_nzz[i] */ 2367 odlens[i] = odcount; /* o_nzz[i] */ 2368 2369 /* zero out the mask elements we set */ 2370 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2371 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2372 } 2373 if (!sizesset) { 2374 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2375 } 2376 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2377 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2378 2379 if (!rank) { 2380 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2381 /* read in my part of the matrix numerical values */ 2382 nz = procsnz[0]; 2383 vals = buf; 2384 mycols = ibuf; 2385 if (size == 1) nz -= extra_rows; 2386 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2387 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2388 2389 /* insert into matrix */ 2390 jj = rstart*bs; 2391 for (i=0; i<m; i++) { 2392 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2393 mycols += locrowlens[i]; 2394 vals += locrowlens[i]; 2395 jj++; 2396 } 2397 2398 /* read in other processors (except the last one) and ship out */ 2399 for (i=1; i<size-1; i++) { 2400 nz = procsnz[i]; 2401 vals = buf; 2402 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2403 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2404 } 2405 /* the last proc */ 2406 if (size != 1) { 2407 nz = procsnz[i] - extra_rows; 2408 vals = buf; 2409 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2410 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2411 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2412 } 2413 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2414 2415 } else { 2416 /* receive numeric values */ 2417 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2418 2419 /* receive message of values*/ 2420 vals = buf; 2421 mycols = ibuf; 2422 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2423 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2424 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2425 2426 /* insert into matrix */ 2427 jj = rstart*bs; 2428 for (i=0; i<m; i++) { 2429 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2430 mycols += locrowlens[i]; 2431 vals += locrowlens[i]; 2432 jj++; 2433 } 2434 } 2435 2436 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2437 ierr = PetscFree(buf);CHKERRQ(ierr); 2438 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2439 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2440 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2441 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2442 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2443 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2449 /*XXXXX@ 2450 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2451 2452 Input Parameters: 2453 . mat - the matrix 2454 . fact - factor 2455 2456 Not Collective on Mat, each process can have a different hash factor 2457 2458 Level: advanced 2459 2460 Notes: 2461 This can also be set by the command line option: -mat_use_hash_table fact 2462 2463 .keywords: matrix, hashtable, factor, HT 2464 2465 .seealso: MatSetOption() 2466 @XXXXX*/ 2467 2468 2469 #undef __FUNCT__ 2470 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2471 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2472 { 2473 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2474 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2475 PetscReal atmp; 2476 PetscReal *work,*svalues,*rvalues; 2477 PetscErrorCode ierr; 2478 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2479 PetscMPIInt rank,size; 2480 PetscInt *rowners_bs,dest,count,source; 2481 PetscScalar *va; 2482 MatScalar *ba; 2483 MPI_Status stat; 2484 2485 PetscFunctionBegin; 2486 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2487 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2488 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2489 2490 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2491 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2492 2493 bs = A->rmap->bs; 2494 mbs = a->mbs; 2495 Mbs = a->Mbs; 2496 ba = b->a; 2497 bi = b->i; 2498 bj = b->j; 2499 2500 /* find ownerships */ 2501 rowners_bs = A->rmap->range; 2502 2503 /* each proc creates an array to be distributed */ 2504 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2505 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2506 2507 /* row_max for B */ 2508 if (rank != size-1) { 2509 for (i=0; i<mbs; i++) { 2510 ncols = bi[1] - bi[0]; bi++; 2511 brow = bs*i; 2512 for (j=0; j<ncols; j++) { 2513 bcol = bs*(*bj); 2514 for (kcol=0; kcol<bs; kcol++) { 2515 col = bcol + kcol; /* local col index */ 2516 col += rowners_bs[rank+1]; /* global col index */ 2517 for (krow=0; krow<bs; krow++) { 2518 atmp = PetscAbsScalar(*ba); ba++; 2519 row = brow + krow; /* local row index */ 2520 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2521 if (work[col] < atmp) work[col] = atmp; 2522 } 2523 } 2524 bj++; 2525 } 2526 } 2527 2528 /* send values to its owners */ 2529 for (dest=rank+1; dest<size; dest++) { 2530 svalues = work + rowners_bs[dest]; 2531 count = rowners_bs[dest+1]-rowners_bs[dest]; 2532 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2533 } 2534 } 2535 2536 /* receive values */ 2537 if (rank) { 2538 rvalues = work; 2539 count = rowners_bs[rank+1]-rowners_bs[rank]; 2540 for (source=0; source<rank; source++) { 2541 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2542 /* process values */ 2543 for (i=0; i<count; i++) { 2544 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2545 } 2546 } 2547 } 2548 2549 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2550 ierr = PetscFree(work);CHKERRQ(ierr); 2551 PetscFunctionReturn(0); 2552 } 2553 2554 #undef __FUNCT__ 2555 #define __FUNCT__ "MatSOR_MPISBAIJ" 2556 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2557 { 2558 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2559 PetscErrorCode ierr; 2560 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2561 PetscScalar *x,*ptr,*from; 2562 Vec bb1; 2563 const PetscScalar *b; 2564 2565 PetscFunctionBegin; 2566 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2567 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2568 2569 if (flag == SOR_APPLY_UPPER) { 2570 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2571 PetscFunctionReturn(0); 2572 } 2573 2574 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2575 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2576 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2577 its--; 2578 } 2579 2580 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2581 while (its--) { 2582 2583 /* lower triangular part: slvec0b = - B^T*xx */ 2584 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2585 2586 /* copy xx into slvec0a */ 2587 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2588 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2589 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2590 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2591 2592 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2593 2594 /* copy bb into slvec1a */ 2595 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2596 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2597 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2598 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2599 2600 /* set slvec1b = 0 */ 2601 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2602 2603 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2604 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2605 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2606 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2607 2608 /* upper triangular part: bb1 = bb1 - B*x */ 2609 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2610 2611 /* local diagonal sweep */ 2612 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2613 } 2614 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2615 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2616 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2617 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2618 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2619 } else if (flag & SOR_EISENSTAT) { 2620 Vec xx1; 2621 PetscBool hasop; 2622 const PetscScalar *diag; 2623 PetscScalar *sl,scale = (omega - 2.0)/omega; 2624 PetscInt i,n; 2625 2626 if (!mat->xx1) { 2627 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2628 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2629 } 2630 xx1 = mat->xx1; 2631 bb1 = mat->bb1; 2632 2633 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2634 2635 if (!mat->diag) { 2636 /* this is wrong for same matrix with new nonzero values */ 2637 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2638 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2639 } 2640 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2641 2642 if (hasop) { 2643 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2644 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2645 } else { 2646 /* 2647 These two lines are replaced by code that may be a bit faster for a good compiler 2648 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2649 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2650 */ 2651 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2652 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2653 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2654 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2655 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2656 if (omega == 1.0) { 2657 for (i=0; i<n; i++) { 2658 sl[i] = b[i] - diag[i]*x[i]; 2659 } 2660 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2661 } else { 2662 for (i=0; i<n; i++) { 2663 sl[i] = b[i] + scale*diag[i]*x[i]; 2664 } 2665 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2666 } 2667 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2668 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2669 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2670 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2671 } 2672 2673 /* multiply off-diagonal portion of matrix */ 2674 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2675 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2676 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2677 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2678 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2679 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2680 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2681 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2682 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2683 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2684 2685 /* local sweep */ 2686 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2687 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2688 } else { 2689 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2690 } 2691 PetscFunctionReturn(0); 2692 } 2693 2694 #undef __FUNCT__ 2695 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2696 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2697 { 2698 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2699 PetscErrorCode ierr; 2700 Vec lvec1,bb1; 2701 2702 PetscFunctionBegin; 2703 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2704 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2705 2706 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2707 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2708 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2709 its--; 2710 } 2711 2712 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2713 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2714 while (its--) { 2715 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2716 2717 /* lower diagonal part: bb1 = bb - B^T*xx */ 2718 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2719 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2720 2721 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2722 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2723 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2724 2725 /* upper diagonal part: bb1 = bb1 - B*x */ 2726 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2727 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2728 2729 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2730 2731 /* diagonal sweep */ 2732 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2733 } 2734 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2735 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2736 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2737 PetscFunctionReturn(0); 2738 } 2739 2740 #undef __FUNCT__ 2741 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2742 /*@ 2743 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2744 CSR format the local rows. 2745 2746 Collective on MPI_Comm 2747 2748 Input Parameters: 2749 + comm - MPI communicator 2750 . bs - the block size, only a block size of 1 is supported 2751 . m - number of local rows (Cannot be PETSC_DECIDE) 2752 . n - This value should be the same as the local size used in creating the 2753 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2754 calculated if N is given) For square matrices n is almost always m. 2755 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2756 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2757 . i - row indices 2758 . j - column indices 2759 - a - matrix values 2760 2761 Output Parameter: 2762 . mat - the matrix 2763 2764 Level: intermediate 2765 2766 Notes: 2767 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2768 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2769 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2770 2771 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2772 2773 .keywords: matrix, aij, compressed row, sparse, parallel 2774 2775 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2776 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2777 @*/ 2778 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2779 { 2780 PetscErrorCode ierr; 2781 2782 2783 PetscFunctionBegin; 2784 if (i[0]) { 2785 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2786 } 2787 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2788 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2789 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2790 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2791 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2792 PetscFunctionReturn(0); 2793 } 2794 2795 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2798 /*@C 2799 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2800 (the default parallel PETSc format). 2801 2802 Collective on MPI_Comm 2803 2804 Input Parameters: 2805 + A - the matrix 2806 . bs - the block size 2807 . i - the indices into j for the start of each local row (starts with zero) 2808 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2809 - v - optional values in the matrix 2810 2811 Level: developer 2812 2813 .keywords: matrix, aij, compressed row, sparse, parallel 2814 2815 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2816 @*/ 2817 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2818 { 2819 PetscErrorCode ierr; 2820 2821 PetscFunctionBegin; 2822 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2823 PetscFunctionReturn(0); 2824 } 2825 2826 2827