1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h" 5 #include "../src/mat/impls/sbaij/seq/sbaij.h" 6 #include "petscblaslapack.h" 7 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 EXTERN_C_BEGIN 25 #undef __FUNCT__ 26 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 28 { 29 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 30 PetscErrorCode ierr; 31 32 PetscFunctionBegin; 33 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 34 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 EXTERN_C_BEGIN 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 43 { 44 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 45 PetscErrorCode ierr; 46 47 PetscFunctionBegin; 48 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 49 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 EXTERN_C_END 53 54 55 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 56 { \ 57 \ 58 brow = row/bs; \ 59 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 60 rmax = aimax[brow]; nrow = ailen[brow]; \ 61 bcol = col/bs; \ 62 ridx = row % bs; cidx = col % bs; \ 63 low = 0; high = nrow; \ 64 while (high-low > 3) { \ 65 t = (low+high)/2; \ 66 if (rp[t] > bcol) high = t; \ 67 else low = t; \ 68 } \ 69 for (_i=low; _i<high; _i++) { \ 70 if (rp[_i] > bcol) break; \ 71 if (rp[_i] == bcol) { \ 72 bap = ap + bs2*_i + bs*cidx + ridx; \ 73 if (addv == ADD_VALUES) *bap += value; \ 74 else *bap = value; \ 75 goto a_noinsert; \ 76 } \ 77 } \ 78 if (a->nonew == 1) goto a_noinsert; \ 79 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 80 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 81 N = nrow++ - 1; \ 82 /* shift up all the later entries in this row */ \ 83 for (ii=N; ii>=_i; ii--) { \ 84 rp[ii+1] = rp[ii]; \ 85 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 86 } \ 87 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 88 rp[_i] = bcol; \ 89 ap[bs2*_i + bs*cidx + ridx] = value; \ 90 a_noinsert:; \ 91 ailen[brow] = nrow; \ 92 } 93 94 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 95 { \ 96 brow = row/bs; \ 97 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 98 rmax = bimax[brow]; nrow = bilen[brow]; \ 99 bcol = col/bs; \ 100 ridx = row % bs; cidx = col % bs; \ 101 low = 0; high = nrow; \ 102 while (high-low > 3) { \ 103 t = (low+high)/2; \ 104 if (rp[t] > bcol) high = t; \ 105 else low = t; \ 106 } \ 107 for (_i=low; _i<high; _i++) { \ 108 if (rp[_i] > bcol) break; \ 109 if (rp[_i] == bcol) { \ 110 bap = ap + bs2*_i + bs*cidx + ridx; \ 111 if (addv == ADD_VALUES) *bap += value; \ 112 else *bap = value; \ 113 goto b_noinsert; \ 114 } \ 115 } \ 116 if (b->nonew == 1) goto b_noinsert; \ 117 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 118 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 119 N = nrow++ - 1; \ 120 /* shift up all the later entries in this row */ \ 121 for (ii=N; ii>=_i; ii--) { \ 122 rp[ii+1] = rp[ii]; \ 123 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 124 } \ 125 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 126 rp[_i] = bcol; \ 127 ap[bs2*_i + bs*cidx + ridx] = value; \ 128 b_noinsert:; \ 129 bilen[brow] = nrow; \ 130 } 131 132 /* Only add/insert a(i,j) with i<=j (blocks). 133 Any a(i,j) with i>j input by user is ingored. 134 */ 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatSetValues_MPISBAIJ" 137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 138 { 139 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 140 MatScalar value; 141 PetscTruth roworiented = baij->roworiented; 142 PetscErrorCode ierr; 143 PetscInt i,j,row,col; 144 PetscInt rstart_orig=mat->rmap->rstart; 145 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 146 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 147 148 /* Some Variables required in the macro */ 149 Mat A = baij->A; 150 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 151 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 152 MatScalar *aa=a->a; 153 154 Mat B = baij->B; 155 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 156 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 157 MatScalar *ba=b->a; 158 159 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 160 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 161 MatScalar *ap,*bap; 162 163 /* for stash */ 164 PetscInt n_loc, *in_loc = PETSC_NULL; 165 MatScalar *v_loc = PETSC_NULL; 166 167 PetscFunctionBegin; 168 if (v) PetscValidScalarPointer(v,6); 169 if (!baij->donotstash){ 170 if (n > baij->n_loc) { 171 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 172 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 174 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 175 baij->n_loc = n; 176 } 177 in_loc = baij->in_loc; 178 v_loc = baij->v_loc; 179 } 180 181 for (i=0; i<m; i++) { 182 if (im[i] < 0) continue; 183 #if defined(PETSC_USE_DEBUG) 184 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 185 #endif 186 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 187 row = im[i] - rstart_orig; /* local row index */ 188 for (j=0; j<n; j++) { 189 if (im[i]/bs > in[j]/bs){ 190 if (a->ignore_ltriangular){ 191 continue; /* ignore lower triangular blocks */ 192 } else { 193 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 194 } 195 } 196 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 197 col = in[j] - cstart_orig; /* local col index */ 198 brow = row/bs; bcol = col/bs; 199 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 200 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 201 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 202 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 203 } else if (in[j] < 0) continue; 204 #if defined(PETSC_USE_DEBUG) 205 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 206 #endif 207 else { /* off-diag entry (B) */ 208 if (mat->was_assembled) { 209 if (!baij->colmap) { 210 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 211 } 212 #if defined (PETSC_USE_CTABLE) 213 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 214 col = col - 1; 215 #else 216 col = baij->colmap[in[j]/bs] - 1; 217 #endif 218 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 219 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 220 col = in[j]; 221 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 222 B = baij->B; 223 b = (Mat_SeqBAIJ*)(B)->data; 224 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 225 ba=b->a; 226 } else col += in[j]%bs; 227 } else col = in[j]; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } 232 } 233 } else { /* off processor entry */ 234 if (!baij->donotstash) { 235 n_loc = 0; 236 for (j=0; j<n; j++){ 237 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 238 in_loc[n_loc] = in[j]; 239 if (roworiented) { 240 v_loc[n_loc] = v[i*n+j]; 241 } else { 242 v_loc[n_loc] = v[j*m+i]; 243 } 244 n_loc++; 245 } 246 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 247 } 248 } 249 } 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 256 { 257 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 258 const MatScalar *value; 259 MatScalar *barray=baij->barray; 260 PetscTruth roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 261 PetscErrorCode ierr; 262 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 263 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 264 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 265 266 PetscFunctionBegin; 267 if(!barray) { 268 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 269 baij->barray = barray; 270 } 271 272 if (roworiented) { 273 stepval = (n-1)*bs; 274 } else { 275 stepval = (m-1)*bs; 276 } 277 for (i=0; i<m; i++) { 278 if (im[i] < 0) continue; 279 #if defined(PETSC_USE_DEBUG) 280 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 281 #endif 282 if (im[i] >= rstart && im[i] < rend) { 283 row = im[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (im[i] > in[j]) { 286 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 287 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 288 } 289 /* If NumCol = 1 then a copy is not required */ 290 if ((roworiented) && (n == 1)) { 291 barray = (MatScalar*) v + i*bs2; 292 } else if((!roworiented) && (m == 1)) { 293 barray = (MatScalar*) v + j*bs2; 294 } else { /* Here a copy is required */ 295 if (roworiented) { 296 value = v + i*(stepval+bs)*bs + j*bs; 297 } else { 298 value = v + j*(stepval+bs)*bs + i*bs; 299 } 300 for (ii=0; ii<bs; ii++,value+=stepval) { 301 for (jj=0; jj<bs; jj++) { 302 *barray++ = *value++; 303 } 304 } 305 barray -=bs2; 306 } 307 308 if (in[j] >= cstart && in[j] < cend){ 309 col = in[j] - cstart; 310 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 311 } 312 else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined (PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined (PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } 343 else col = in[j]; 344 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 345 } 346 } 347 } else { 348 if (!baij->donotstash) { 349 if (roworiented) { 350 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 351 } else { 352 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 353 } 354 } 355 } 356 } 357 PetscFunctionReturn(0); 358 } 359 360 #undef __FUNCT__ 361 #define __FUNCT__ "MatGetValues_MPISBAIJ" 362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 363 { 364 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 365 PetscErrorCode ierr; 366 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 367 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 368 369 PetscFunctionBegin; 370 for (i=0; i<m; i++) { 371 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 372 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 373 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 374 row = idxm[i] - bsrstart; 375 for (j=0; j<n; j++) { 376 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 377 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 378 if (idxn[j] >= bscstart && idxn[j] < bscend){ 379 col = idxn[j] - bscstart; 380 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 381 } else { 382 if (!baij->colmap) { 383 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 384 } 385 #if defined (PETSC_USE_CTABLE) 386 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 387 data --; 388 #else 389 data = baij->colmap[idxn[j]/bs]-1; 390 #endif 391 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 392 else { 393 col = data + idxn[j]%bs; 394 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 395 } 396 } 397 } 398 } else { 399 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 400 } 401 } 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatNorm_MPISBAIJ" 407 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 408 { 409 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 410 PetscErrorCode ierr; 411 PetscReal sum[2],*lnorm2; 412 413 PetscFunctionBegin; 414 if (baij->size == 1) { 415 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 416 } else { 417 if (type == NORM_FROBENIUS) { 418 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 419 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 420 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 421 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 422 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 423 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 424 *norm = sqrt(sum[0] + 2*sum[1]); 425 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 426 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 427 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 428 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 429 PetscReal *rsum,*rsum2,vabs; 430 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 431 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 432 MatScalar *v; 433 434 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 435 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 436 /* Amat */ 437 v = amat->a; jj = amat->j; 438 for (brow=0; brow<mbs; brow++) { 439 grow = bs*(rstart + brow); 440 nz = amat->i[brow+1] - amat->i[brow]; 441 for (bcol=0; bcol<nz; bcol++){ 442 gcol = bs*(rstart + *jj); jj++; 443 for (col=0; col<bs; col++){ 444 for (row=0; row<bs; row++){ 445 vabs = PetscAbsScalar(*v); v++; 446 rsum[gcol+col] += vabs; 447 /* non-diagonal block */ 448 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 449 } 450 } 451 } 452 } 453 /* Bmat */ 454 v = bmat->a; jj = bmat->j; 455 for (brow=0; brow<mbs; brow++) { 456 grow = bs*(rstart + brow); 457 nz = bmat->i[brow+1] - bmat->i[brow]; 458 for (bcol=0; bcol<nz; bcol++){ 459 gcol = bs*garray[*jj]; jj++; 460 for (col=0; col<bs; col++){ 461 for (row=0; row<bs; row++){ 462 vabs = PetscAbsScalar(*v); v++; 463 rsum[gcol+col] += vabs; 464 rsum[grow+row] += vabs; 465 } 466 } 467 } 468 } 469 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 470 *norm = 0.0; 471 for (col=0; col<mat->cmap->N; col++) { 472 if (rsum2[col] > *norm) *norm = rsum2[col]; 473 } 474 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 475 } else { 476 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 477 } 478 } 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 484 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 485 { 486 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 487 PetscErrorCode ierr; 488 PetscInt nstash,reallocs; 489 InsertMode addv; 490 491 PetscFunctionBegin; 492 if (baij->donotstash) { 493 PetscFunctionReturn(0); 494 } 495 496 /* make sure all processors are either in INSERTMODE or ADDMODE */ 497 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 498 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 499 mat->insertmode = addv; /* in case this processor had no cache */ 500 501 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 502 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 503 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 504 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 505 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 506 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 513 { 514 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 515 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 516 PetscErrorCode ierr; 517 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 518 PetscInt *row,*col; 519 PetscTruth other_disassembled; 520 PetscMPIInt n; 521 PetscTruth r1,r2,r3; 522 MatScalar *val; 523 InsertMode addv = mat->insertmode; 524 525 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 526 PetscFunctionBegin; 527 528 if (!baij->donotstash) { 529 while (1) { 530 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 531 if (!flg) break; 532 533 for (i=0; i<n;) { 534 /* Now identify the consecutive vals belonging to the same row */ 535 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 536 if (j < n) ncols = j-i; 537 else ncols = n-i; 538 /* Now assemble all these values with a single function call */ 539 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 540 i = j; 541 } 542 } 543 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 544 /* Now process the block-stash. Since the values are stashed column-oriented, 545 set the roworiented flag to column oriented, and after MatSetValues() 546 restore the original flags */ 547 r1 = baij->roworiented; 548 r2 = a->roworiented; 549 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 550 baij->roworiented = PETSC_FALSE; 551 a->roworiented = PETSC_FALSE; 552 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 553 while (1) { 554 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 555 if (!flg) break; 556 557 for (i=0; i<n;) { 558 /* Now identify the consecutive vals belonging to the same row */ 559 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 560 if (j < n) ncols = j-i; 561 else ncols = n-i; 562 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 563 i = j; 564 } 565 } 566 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 567 baij->roworiented = r1; 568 a->roworiented = r2; 569 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 570 } 571 572 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 573 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 574 575 /* determine if any processor has disassembled, if so we must 576 also disassemble ourselfs, in order that we may reassemble. */ 577 /* 578 if nonzero structure of submatrix B cannot change then we know that 579 no processor disassembled thus we can skip this stuff 580 */ 581 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 582 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 583 if (mat->was_assembled && !other_disassembled) { 584 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 585 } 586 } 587 588 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 589 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 590 } 591 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 592 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 593 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 594 595 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 596 baij->rowvalues = 0; 597 598 PetscFunctionReturn(0); 599 } 600 601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 602 #undef __FUNCT__ 603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 605 { 606 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 607 PetscErrorCode ierr; 608 PetscInt bs = mat->rmap->bs; 609 PetscMPIInt size = baij->size,rank = baij->rank; 610 PetscTruth iascii,isdraw; 611 PetscViewer sviewer; 612 PetscViewerFormat format; 613 614 PetscFunctionBegin; 615 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 616 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 617 if (iascii) { 618 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 619 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 620 MatInfo info; 621 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 622 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 623 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 624 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 625 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 626 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 628 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 630 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 631 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 632 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 633 PetscFunctionReturn(0); 634 } else if (format == PETSC_VIEWER_ASCII_INFO) { 635 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 638 PetscFunctionReturn(0); 639 } 640 } 641 642 if (isdraw) { 643 PetscDraw draw; 644 PetscTruth isnull; 645 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 646 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 647 } 648 649 if (size == 1) { 650 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 651 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 652 } else { 653 /* assemble the entire matrix onto first processor. */ 654 Mat A; 655 Mat_SeqSBAIJ *Aloc; 656 Mat_SeqBAIJ *Bloc; 657 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 658 MatScalar *a; 659 660 /* Should this be the same type as mat? */ 661 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 662 if (!rank) { 663 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 664 } else { 665 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 666 } 667 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 668 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 669 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 670 671 /* copy over the A part */ 672 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 673 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 674 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 675 676 for (i=0; i<mbs; i++) { 677 rvals[0] = bs*(baij->rstartbs + i); 678 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 679 for (j=ai[i]; j<ai[i+1]; j++) { 680 col = (baij->cstartbs+aj[j])*bs; 681 for (k=0; k<bs; k++) { 682 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 683 col++; a += bs; 684 } 685 } 686 } 687 /* copy over the B part */ 688 Bloc = (Mat_SeqBAIJ*)baij->B->data; 689 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 690 for (i=0; i<mbs; i++) { 691 692 rvals[0] = bs*(baij->rstartbs + i); 693 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 694 for (j=ai[i]; j<ai[i+1]; j++) { 695 col = baij->garray[aj[j]]*bs; 696 for (k=0; k<bs; k++) { 697 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 698 col++; a += bs; 699 } 700 } 701 } 702 ierr = PetscFree(rvals);CHKERRQ(ierr); 703 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 705 /* 706 Everyone has to call to draw the matrix since the graphics waits are 707 synchronized across all processors that share the PetscDraw object 708 */ 709 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 710 if (!rank) { 711 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 712 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 713 } 714 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 715 ierr = MatDestroy(A);CHKERRQ(ierr); 716 } 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNCT__ 721 #define __FUNCT__ "MatView_MPISBAIJ" 722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 723 { 724 PetscErrorCode ierr; 725 PetscTruth iascii,isdraw,issocket,isbinary; 726 727 PetscFunctionBegin; 728 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 729 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 730 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 731 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 732 if (iascii || isdraw || issocket || isbinary) { 733 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 734 } else { 735 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatDestroy_MPISBAIJ" 742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 743 { 744 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 #if defined(PETSC_USE_LOG) 749 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 750 #endif 751 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 752 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 753 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 754 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 755 #if defined (PETSC_USE_CTABLE) 756 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 757 #else 758 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 759 #endif 760 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 761 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 762 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 763 if (baij->slvec0) { 764 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 765 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 766 } 767 if (baij->slvec1) { 768 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 769 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 770 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 771 } 772 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 773 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 774 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 775 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 776 if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);} 777 if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);} 778 if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);} 779 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 780 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 781 #endif 782 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 783 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 784 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 785 ierr = PetscFree(baij);CHKERRQ(ierr); 786 787 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 798 { 799 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 800 PetscErrorCode ierr; 801 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 802 PetscScalar *x,*from; 803 804 PetscFunctionBegin; 805 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 806 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 807 808 /* diagonal part */ 809 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 810 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 811 812 /* subdiagonal part */ 813 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 814 815 /* copy x into the vec slvec0 */ 816 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 819 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 820 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 821 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 822 823 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 /* supperdiagonal part */ 826 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatMult_MPISBAIJ" 832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 833 { 834 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 835 PetscErrorCode ierr; 836 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 837 PetscScalar *x,*from; 838 839 PetscFunctionBegin; 840 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 841 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 842 843 /* diagonal part */ 844 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 845 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 846 847 /* subdiagonal part */ 848 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 849 850 /* copy x into the vec slvec0 */ 851 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 852 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 853 854 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 855 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 856 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 857 858 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 /* supperdiagonal part */ 861 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 868 { 869 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 870 PetscErrorCode ierr; 871 PetscInt nt; 872 873 PetscFunctionBegin; 874 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 875 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 876 877 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 878 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 879 880 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 /* do diagonal part */ 882 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 883 /* do supperdiagonal part */ 884 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 886 /* do subdiagonal part */ 887 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 888 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 889 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 890 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 896 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 897 { 898 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 899 PetscErrorCode ierr; 900 PetscInt mbs=a->mbs,bs=A->rmap->bs; 901 PetscScalar *x,*from,zero=0.0; 902 903 PetscFunctionBegin; 904 /* 905 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 906 PetscSynchronizedFlush(((PetscObject)A)->comm); 907 */ 908 /* diagonal part */ 909 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 910 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 911 912 /* subdiagonal part */ 913 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 914 915 /* copy x into the vec slvec0 */ 916 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 917 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 918 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 919 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 920 921 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 923 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 924 925 /* supperdiagonal part */ 926 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 927 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 934 { 935 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 936 PetscErrorCode ierr; 937 938 PetscFunctionBegin; 939 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 /* do diagonal part */ 941 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 942 /* do supperdiagonal part */ 943 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 945 946 /* do subdiagonal part */ 947 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 948 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 949 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 950 951 PetscFunctionReturn(0); 952 } 953 954 /* 955 This only works correctly for square matrices where the subblock A->A is the 956 diagonal block 957 */ 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 961 { 962 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 967 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatScale_MPISBAIJ" 973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 974 { 975 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 980 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatGetRow_MPISBAIJ" 986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 987 { 988 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 989 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 990 PetscErrorCode ierr; 991 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 992 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 993 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 994 995 PetscFunctionBegin; 996 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 997 mat->getrowactive = PETSC_TRUE; 998 999 if (!mat->rowvalues && (idx || v)) { 1000 /* 1001 allocate enough space to hold information from the longest row. 1002 */ 1003 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1004 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1005 PetscInt max = 1,mbs = mat->mbs,tmp; 1006 for (i=0; i<mbs; i++) { 1007 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1008 if (max < tmp) { max = tmp; } 1009 } 1010 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1011 } 1012 1013 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1014 lrow = row - brstart; /* local row index */ 1015 1016 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1017 if (!v) {pvA = 0; pvB = 0;} 1018 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1019 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1020 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1021 nztot = nzA + nzB; 1022 1023 cmap = mat->garray; 1024 if (v || idx) { 1025 if (nztot) { 1026 /* Sort by increasing column numbers, assuming A and B already sorted */ 1027 PetscInt imark = -1; 1028 if (v) { 1029 *v = v_p = mat->rowvalues; 1030 for (i=0; i<nzB; i++) { 1031 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1032 else break; 1033 } 1034 imark = i; 1035 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1036 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1037 } 1038 if (idx) { 1039 *idx = idx_p = mat->rowindices; 1040 if (imark > -1) { 1041 for (i=0; i<imark; i++) { 1042 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1043 } 1044 } else { 1045 for (i=0; i<nzB; i++) { 1046 if (cmap[cworkB[i]/bs] < cstart) 1047 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1048 else break; 1049 } 1050 imark = i; 1051 } 1052 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1053 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1054 } 1055 } else { 1056 if (idx) *idx = 0; 1057 if (v) *v = 0; 1058 } 1059 } 1060 *nz = nztot; 1061 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1062 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1068 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1069 { 1070 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1071 1072 PetscFunctionBegin; 1073 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1074 baij->getrowactive = PETSC_FALSE; 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1080 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1081 { 1082 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1083 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1084 1085 PetscFunctionBegin; 1086 aA->getrow_utriangular = PETSC_TRUE; 1087 PetscFunctionReturn(0); 1088 } 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1091 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1092 { 1093 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1094 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1095 1096 PetscFunctionBegin; 1097 aA->getrow_utriangular = PETSC_FALSE; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1103 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1104 { 1105 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1110 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1116 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1117 { 1118 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1123 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1129 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1130 { 1131 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1136 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1142 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1143 { 1144 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1145 Mat A = a->A,B = a->B; 1146 PetscErrorCode ierr; 1147 PetscReal isend[5],irecv[5]; 1148 1149 PetscFunctionBegin; 1150 info->block_size = (PetscReal)matin->rmap->bs; 1151 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1152 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1153 isend[3] = info->memory; isend[4] = info->mallocs; 1154 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1155 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1156 isend[3] += info->memory; isend[4] += info->mallocs; 1157 if (flag == MAT_LOCAL) { 1158 info->nz_used = isend[0]; 1159 info->nz_allocated = isend[1]; 1160 info->nz_unneeded = isend[2]; 1161 info->memory = isend[3]; 1162 info->mallocs = isend[4]; 1163 } else if (flag == MAT_GLOBAL_MAX) { 1164 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1165 info->nz_used = irecv[0]; 1166 info->nz_allocated = irecv[1]; 1167 info->nz_unneeded = irecv[2]; 1168 info->memory = irecv[3]; 1169 info->mallocs = irecv[4]; 1170 } else if (flag == MAT_GLOBAL_SUM) { 1171 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1172 info->nz_used = irecv[0]; 1173 info->nz_allocated = irecv[1]; 1174 info->nz_unneeded = irecv[2]; 1175 info->memory = irecv[3]; 1176 info->mallocs = irecv[4]; 1177 } else { 1178 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1179 } 1180 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1181 info->fill_ratio_needed = 0; 1182 info->factor_mallocs = 0; 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg) 1189 { 1190 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1191 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 switch (op) { 1196 case MAT_NEW_NONZERO_LOCATIONS: 1197 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1198 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1199 case MAT_KEEP_NONZERO_PATTERN: 1200 case MAT_NEW_NONZERO_LOCATION_ERR: 1201 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1202 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1203 break; 1204 case MAT_ROW_ORIENTED: 1205 a->roworiented = flg; 1206 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1207 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1208 break; 1209 case MAT_NEW_DIAGONALS: 1210 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1211 break; 1212 case MAT_IGNORE_OFF_PROC_ENTRIES: 1213 a->donotstash = flg; 1214 break; 1215 case MAT_USE_HASH_TABLE: 1216 a->ht_flag = flg; 1217 break; 1218 case MAT_HERMITIAN: 1219 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1220 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1221 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1222 break; 1223 case MAT_SYMMETRIC: 1224 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1225 break; 1226 case MAT_STRUCTURALLY_SYMMETRIC: 1227 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1228 break; 1229 case MAT_SYMMETRY_ETERNAL: 1230 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1231 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1232 break; 1233 case MAT_IGNORE_LOWER_TRIANGULAR: 1234 aA->ignore_ltriangular = flg; 1235 break; 1236 case MAT_ERROR_LOWER_TRIANGULAR: 1237 aA->ignore_ltriangular = flg; 1238 break; 1239 case MAT_GETROW_UPPERTRIANGULAR: 1240 aA->getrow_utriangular = flg; 1241 break; 1242 default: 1243 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1244 } 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1250 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1251 { 1252 PetscErrorCode ierr; 1253 PetscFunctionBegin; 1254 if (MAT_INITIAL_MATRIX || *B != A) { 1255 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1262 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1263 { 1264 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1265 Mat a=baij->A, b=baij->B; 1266 PetscErrorCode ierr; 1267 PetscInt nv,m,n; 1268 PetscTruth flg; 1269 1270 PetscFunctionBegin; 1271 if (ll != rr){ 1272 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1273 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1274 } 1275 if (!ll) PetscFunctionReturn(0); 1276 1277 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1278 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1279 1280 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1281 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1282 1283 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 1285 /* left diagonalscale the off-diagonal part */ 1286 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1287 1288 /* scale the diagonal part */ 1289 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1290 1291 /* right diagonalscale the off-diagonal part */ 1292 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1293 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1299 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1300 { 1301 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1302 PetscErrorCode ierr; 1303 1304 PetscFunctionBegin; 1305 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatEqual_MPISBAIJ" 1313 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1314 { 1315 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1316 Mat a,b,c,d; 1317 PetscTruth flg; 1318 PetscErrorCode ierr; 1319 1320 PetscFunctionBegin; 1321 a = matA->A; b = matA->B; 1322 c = matB->A; d = matB->B; 1323 1324 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1325 if (flg) { 1326 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1327 } 1328 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatCopy_MPISBAIJ" 1334 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1335 { 1336 PetscErrorCode ierr; 1337 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1338 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1339 1340 PetscFunctionBegin; 1341 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1342 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1343 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1344 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1345 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1346 } else { 1347 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1348 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1349 } 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1355 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1356 { 1357 PetscErrorCode ierr; 1358 1359 PetscFunctionBegin; 1360 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1366 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1367 { 1368 PetscErrorCode ierr; 1369 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1370 PetscBLASInt bnz,one=1; 1371 Mat_SeqSBAIJ *xa,*ya; 1372 Mat_SeqBAIJ *xb,*yb; 1373 1374 PetscFunctionBegin; 1375 if (str == SAME_NONZERO_PATTERN) { 1376 PetscScalar alpha = a; 1377 xa = (Mat_SeqSBAIJ *)xx->A->data; 1378 ya = (Mat_SeqSBAIJ *)yy->A->data; 1379 bnz = PetscBLASIntCast(xa->nz); 1380 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1381 xb = (Mat_SeqBAIJ *)xx->B->data; 1382 yb = (Mat_SeqBAIJ *)yy->B->data; 1383 bnz = PetscBLASIntCast(xb->nz); 1384 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1385 } else { 1386 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1387 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1388 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1389 } 1390 PetscFunctionReturn(0); 1391 } 1392 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1395 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1396 { 1397 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1398 PetscInt rbs,cbs; 1399 PetscErrorCode ierr; 1400 1401 PetscFunctionBegin; 1402 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1403 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1404 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1405 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1406 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1407 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1413 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1414 { 1415 PetscErrorCode ierr; 1416 PetscInt i; 1417 PetscTruth flg; 1418 1419 PetscFunctionBegin; 1420 for (i=0; i<n; i++) { 1421 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1422 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1423 } 1424 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 1429 /* -------------------------------------------------------------------*/ 1430 static struct _MatOps MatOps_Values = { 1431 MatSetValues_MPISBAIJ, 1432 MatGetRow_MPISBAIJ, 1433 MatRestoreRow_MPISBAIJ, 1434 MatMult_MPISBAIJ, 1435 /* 4*/ MatMultAdd_MPISBAIJ, 1436 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1437 MatMultAdd_MPISBAIJ, 1438 0, 1439 0, 1440 0, 1441 /*10*/ 0, 1442 0, 1443 0, 1444 MatSOR_MPISBAIJ, 1445 MatTranspose_MPISBAIJ, 1446 /*15*/ MatGetInfo_MPISBAIJ, 1447 MatEqual_MPISBAIJ, 1448 MatGetDiagonal_MPISBAIJ, 1449 MatDiagonalScale_MPISBAIJ, 1450 MatNorm_MPISBAIJ, 1451 /*20*/ MatAssemblyBegin_MPISBAIJ, 1452 MatAssemblyEnd_MPISBAIJ, 1453 MatSetOption_MPISBAIJ, 1454 MatZeroEntries_MPISBAIJ, 1455 /*24*/ 0, 1456 0, 1457 0, 1458 0, 1459 0, 1460 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1461 0, 1462 0, 1463 0, 1464 0, 1465 /*34*/ MatDuplicate_MPISBAIJ, 1466 0, 1467 0, 1468 0, 1469 0, 1470 /*39*/ MatAXPY_MPISBAIJ, 1471 MatGetSubMatrices_MPISBAIJ, 1472 MatIncreaseOverlap_MPISBAIJ, 1473 MatGetValues_MPISBAIJ, 1474 MatCopy_MPISBAIJ, 1475 /*44*/ 0, 1476 MatScale_MPISBAIJ, 1477 0, 1478 0, 1479 0, 1480 /*49*/ MatSetBlockSize_MPISBAIJ, 1481 0, 1482 0, 1483 0, 1484 0, 1485 /*54*/ 0, 1486 0, 1487 MatSetUnfactored_MPISBAIJ, 1488 0, 1489 MatSetValuesBlocked_MPISBAIJ, 1490 /*59*/ 0, 1491 0, 1492 0, 1493 0, 1494 0, 1495 /*64*/ 0, 1496 0, 1497 0, 1498 0, 1499 0, 1500 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1501 0, 1502 0, 1503 0, 1504 0, 1505 /*74*/ 0, 1506 0, 1507 0, 1508 0, 1509 0, 1510 /*79*/ 0, 1511 0, 1512 0, 1513 0, 1514 0, 1515 /*84*/ 0, 1516 0, 1517 0, 1518 0, 1519 0, 1520 /*89*/ 0, 1521 0, 1522 0, 1523 0, 1524 0, 1525 /*94*/ 0, 1526 0, 1527 0, 1528 0, 1529 0, 1530 /*99*/ 0, 1531 0, 1532 0, 1533 0, 1534 0, 1535 /*104*/0, 1536 MatRealPart_MPISBAIJ, 1537 MatImaginaryPart_MPISBAIJ, 1538 MatGetRowUpperTriangular_MPISBAIJ, 1539 MatRestoreRowUpperTriangular_MPISBAIJ, 1540 /*109*/0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 /*114*/0, 1546 0, 1547 0, 1548 0, 1549 0, 1550 /*119*/0, 1551 0, 1552 0, 1553 0, 1554 MatLoadnew_MPISBAIJ 1555 }; 1556 1557 1558 EXTERN_C_BEGIN 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1561 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1562 { 1563 PetscFunctionBegin; 1564 *a = ((Mat_MPISBAIJ *)A->data)->A; 1565 *iscopy = PETSC_FALSE; 1566 PetscFunctionReturn(0); 1567 } 1568 EXTERN_C_END 1569 1570 EXTERN_C_BEGIN 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1573 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1574 { 1575 Mat_MPISBAIJ *b; 1576 PetscErrorCode ierr; 1577 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1578 1579 PetscFunctionBegin; 1580 if (bs < 0){ 1581 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1582 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1583 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1584 bs = PetscAbs(bs); 1585 } 1586 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1587 bs = newbs; 1588 1589 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1590 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1591 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1592 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1593 1594 B->rmap->bs = B->cmap->bs = bs; 1595 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1596 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1597 1598 if (d_nnz) { 1599 for (i=0; i<B->rmap->n/bs; i++) { 1600 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1601 } 1602 } 1603 if (o_nnz) { 1604 for (i=0; i<B->rmap->n/bs; i++) { 1605 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1606 } 1607 } 1608 1609 b = (Mat_MPISBAIJ*)B->data; 1610 mbs = B->rmap->n/bs; 1611 Mbs = B->rmap->N/bs; 1612 if (mbs*bs != B->rmap->n) { 1613 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1614 } 1615 1616 B->rmap->bs = bs; 1617 b->bs2 = bs*bs; 1618 b->mbs = mbs; 1619 b->nbs = mbs; 1620 b->Mbs = Mbs; 1621 b->Nbs = Mbs; 1622 1623 for (i=0; i<=b->size; i++) { 1624 b->rangebs[i] = B->rmap->range[i]/bs; 1625 } 1626 b->rstartbs = B->rmap->rstart/bs; 1627 b->rendbs = B->rmap->rend/bs; 1628 1629 b->cstartbs = B->cmap->rstart/bs; 1630 b->cendbs = B->cmap->rend/bs; 1631 1632 if (!B->preallocated) { 1633 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1634 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1635 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1636 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1637 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1638 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1639 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1640 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1641 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1642 } 1643 1644 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1645 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1646 B->preallocated = PETSC_TRUE; 1647 PetscFunctionReturn(0); 1648 } 1649 EXTERN_C_END 1650 1651 EXTERN_C_BEGIN 1652 #if defined(PETSC_HAVE_MUMPS) 1653 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1654 #endif 1655 #if defined(PETSC_HAVE_SPOOLES) 1656 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1657 #endif 1658 #if defined(PETSC_HAVE_PASTIX) 1659 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1660 #endif 1661 EXTERN_C_END 1662 1663 /*MC 1664 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1665 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1666 1667 Options Database Keys: 1668 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1669 1670 Level: beginner 1671 1672 .seealso: MatCreateMPISBAIJ 1673 M*/ 1674 1675 EXTERN_C_BEGIN 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatCreate_MPISBAIJ" 1678 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1679 { 1680 Mat_MPISBAIJ *b; 1681 PetscErrorCode ierr; 1682 PetscTruth flg; 1683 1684 PetscFunctionBegin; 1685 1686 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1687 B->data = (void*)b; 1688 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1689 1690 B->ops->destroy = MatDestroy_MPISBAIJ; 1691 B->ops->view = MatView_MPISBAIJ; 1692 B->mapping = 0; 1693 B->assembled = PETSC_FALSE; 1694 1695 B->insertmode = NOT_SET_VALUES; 1696 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1697 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1698 1699 /* build local table of row and column ownerships */ 1700 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1701 1702 /* build cache for off array entries formed */ 1703 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1704 b->donotstash = PETSC_FALSE; 1705 b->colmap = PETSC_NULL; 1706 b->garray = PETSC_NULL; 1707 b->roworiented = PETSC_TRUE; 1708 1709 /* stuff used in block assembly */ 1710 b->barray = 0; 1711 1712 /* stuff used for matrix vector multiply */ 1713 b->lvec = 0; 1714 b->Mvctx = 0; 1715 b->slvec0 = 0; 1716 b->slvec0b = 0; 1717 b->slvec1 = 0; 1718 b->slvec1a = 0; 1719 b->slvec1b = 0; 1720 b->sMvctx = 0; 1721 1722 /* stuff for MatGetRow() */ 1723 b->rowindices = 0; 1724 b->rowvalues = 0; 1725 b->getrowactive = PETSC_FALSE; 1726 1727 /* hash table stuff */ 1728 b->ht = 0; 1729 b->hd = 0; 1730 b->ht_size = 0; 1731 b->ht_flag = PETSC_FALSE; 1732 b->ht_fact = 0; 1733 b->ht_total_ct = 0; 1734 b->ht_insert_ct = 0; 1735 1736 b->in_loc = 0; 1737 b->v_loc = 0; 1738 b->n_loc = 0; 1739 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1740 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1741 if (flg) { 1742 PetscReal fact = 1.39; 1743 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1744 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1745 if (fact <= 1.0) fact = 1.39; 1746 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1747 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1748 } 1749 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1750 1751 #if defined(PETSC_HAVE_PASTIX) 1752 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1753 "MatGetFactor_mpisbaij_pastix", 1754 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1755 #endif 1756 #if defined(PETSC_HAVE_MUMPS) 1757 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1758 "MatGetFactor_sbaij_mumps", 1759 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1760 #endif 1761 #if defined(PETSC_HAVE_SPOOLES) 1762 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1763 "MatGetFactor_mpisbaij_spooles", 1764 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1765 #endif 1766 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1767 "MatStoreValues_MPISBAIJ", 1768 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1769 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1770 "MatRetrieveValues_MPISBAIJ", 1771 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1772 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1773 "MatGetDiagonalBlock_MPISBAIJ", 1774 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1775 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1776 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1777 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1778 B->symmetric = PETSC_TRUE; 1779 B->structurally_symmetric = PETSC_TRUE; 1780 B->symmetric_set = PETSC_TRUE; 1781 B->structurally_symmetric_set = PETSC_TRUE; 1782 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1783 PetscFunctionReturn(0); 1784 } 1785 EXTERN_C_END 1786 1787 /*MC 1788 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1789 1790 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1791 and MATMPISBAIJ otherwise. 1792 1793 Options Database Keys: 1794 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1795 1796 Level: beginner 1797 1798 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1799 M*/ 1800 1801 EXTERN_C_BEGIN 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatCreate_SBAIJ" 1804 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1805 { 1806 PetscErrorCode ierr; 1807 PetscMPIInt size; 1808 1809 PetscFunctionBegin; 1810 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1811 if (size == 1) { 1812 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1813 } else { 1814 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1815 } 1816 PetscFunctionReturn(0); 1817 } 1818 EXTERN_C_END 1819 1820 #undef __FUNCT__ 1821 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1822 /*@C 1823 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1824 the user should preallocate the matrix storage by setting the parameters 1825 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1826 performance can be increased by more than a factor of 50. 1827 1828 Collective on Mat 1829 1830 Input Parameters: 1831 + A - the matrix 1832 . bs - size of blockk 1833 . d_nz - number of block nonzeros per block row in diagonal portion of local 1834 submatrix (same for all local rows) 1835 . d_nnz - array containing the number of block nonzeros in the various block rows 1836 in the upper triangular and diagonal part of the in diagonal portion of the local 1837 (possibly different for each block row) or PETSC_NULL. You must leave room 1838 for the diagonal entry even if it is zero. 1839 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1840 submatrix (same for all local rows). 1841 - o_nnz - array containing the number of nonzeros in the various block rows of the 1842 off-diagonal portion of the local submatrix (possibly different for 1843 each block row) or PETSC_NULL. 1844 1845 1846 Options Database Keys: 1847 . -mat_no_unroll - uses code that does not unroll the loops in the 1848 block calculations (much slower) 1849 . -mat_block_size - size of the blocks to use 1850 1851 Notes: 1852 1853 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1854 than it must be used on all processors that share the object for that argument. 1855 1856 If the *_nnz parameter is given then the *_nz parameter is ignored 1857 1858 Storage Information: 1859 For a square global matrix we define each processor's diagonal portion 1860 to be its local rows and the corresponding columns (a square submatrix); 1861 each processor's off-diagonal portion encompasses the remainder of the 1862 local matrix (a rectangular submatrix). 1863 1864 The user can specify preallocated storage for the diagonal part of 1865 the local submatrix with either d_nz or d_nnz (not both). Set 1866 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1867 memory allocation. Likewise, specify preallocated storage for the 1868 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1869 1870 You can call MatGetInfo() to get information on how effective the preallocation was; 1871 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1872 You can also run with the option -info and look for messages with the string 1873 malloc in them to see if additional memory allocation was needed. 1874 1875 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1876 the figure below we depict these three local rows and all columns (0-11). 1877 1878 .vb 1879 0 1 2 3 4 5 6 7 8 9 10 11 1880 ------------------- 1881 row 3 | o o o d d d o o o o o o 1882 row 4 | o o o d d d o o o o o o 1883 row 5 | o o o d d d o o o o o o 1884 ------------------- 1885 .ve 1886 1887 Thus, any entries in the d locations are stored in the d (diagonal) 1888 submatrix, and any entries in the o locations are stored in the 1889 o (off-diagonal) submatrix. Note that the d matrix is stored in 1890 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1891 1892 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1893 plus the diagonal part of the d matrix, 1894 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1895 In general, for PDE problems in which most nonzeros are near the diagonal, 1896 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1897 or you will get TERRIBLE performance; see the users' manual chapter on 1898 matrices. 1899 1900 Level: intermediate 1901 1902 .keywords: matrix, block, aij, compressed row, sparse, parallel 1903 1904 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1905 @*/ 1906 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1907 { 1908 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1909 1910 PetscFunctionBegin; 1911 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1912 if (f) { 1913 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatCreateMPISBAIJ" 1920 /*@C 1921 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1922 (block compressed row). For good matrix assembly performance 1923 the user should preallocate the matrix storage by setting the parameters 1924 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1925 performance can be increased by more than a factor of 50. 1926 1927 Collective on MPI_Comm 1928 1929 Input Parameters: 1930 + comm - MPI communicator 1931 . bs - size of blockk 1932 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1933 This value should be the same as the local size used in creating the 1934 y vector for the matrix-vector product y = Ax. 1935 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1936 This value should be the same as the local size used in creating the 1937 x vector for the matrix-vector product y = Ax. 1938 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1939 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1940 . d_nz - number of block nonzeros per block row in diagonal portion of local 1941 submatrix (same for all local rows) 1942 . d_nnz - array containing the number of block nonzeros in the various block rows 1943 in the upper triangular portion of the in diagonal portion of the local 1944 (possibly different for each block block row) or PETSC_NULL. 1945 You must leave room for the diagonal entry even if it is zero. 1946 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1947 submatrix (same for all local rows). 1948 - o_nnz - array containing the number of nonzeros in the various block rows of the 1949 off-diagonal portion of the local submatrix (possibly different for 1950 each block row) or PETSC_NULL. 1951 1952 Output Parameter: 1953 . A - the matrix 1954 1955 Options Database Keys: 1956 . -mat_no_unroll - uses code that does not unroll the loops in the 1957 block calculations (much slower) 1958 . -mat_block_size - size of the blocks to use 1959 . -mat_mpi - use the parallel matrix data structures even on one processor 1960 (defaults to using SeqBAIJ format on one processor) 1961 1962 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1963 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1964 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1965 1966 Notes: 1967 The number of rows and columns must be divisible by blocksize. 1968 This matrix type does not support complex Hermitian operation. 1969 1970 The user MUST specify either the local or global matrix dimensions 1971 (possibly both). 1972 1973 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1974 than it must be used on all processors that share the object for that argument. 1975 1976 If the *_nnz parameter is given then the *_nz parameter is ignored 1977 1978 Storage Information: 1979 For a square global matrix we define each processor's diagonal portion 1980 to be its local rows and the corresponding columns (a square submatrix); 1981 each processor's off-diagonal portion encompasses the remainder of the 1982 local matrix (a rectangular submatrix). 1983 1984 The user can specify preallocated storage for the diagonal part of 1985 the local submatrix with either d_nz or d_nnz (not both). Set 1986 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1987 memory allocation. Likewise, specify preallocated storage for the 1988 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1989 1990 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1991 the figure below we depict these three local rows and all columns (0-11). 1992 1993 .vb 1994 0 1 2 3 4 5 6 7 8 9 10 11 1995 ------------------- 1996 row 3 | o o o d d d o o o o o o 1997 row 4 | o o o d d d o o o o o o 1998 row 5 | o o o d d d o o o o o o 1999 ------------------- 2000 .ve 2001 2002 Thus, any entries in the d locations are stored in the d (diagonal) 2003 submatrix, and any entries in the o locations are stored in the 2004 o (off-diagonal) submatrix. Note that the d matrix is stored in 2005 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2006 2007 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2008 plus the diagonal part of the d matrix, 2009 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2010 In general, for PDE problems in which most nonzeros are near the diagonal, 2011 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2012 or you will get TERRIBLE performance; see the users' manual chapter on 2013 matrices. 2014 2015 Level: intermediate 2016 2017 .keywords: matrix, block, aij, compressed row, sparse, parallel 2018 2019 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2020 @*/ 2021 2022 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2023 { 2024 PetscErrorCode ierr; 2025 PetscMPIInt size; 2026 2027 PetscFunctionBegin; 2028 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2029 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2030 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2031 if (size > 1) { 2032 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2033 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2034 } else { 2035 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2036 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2037 } 2038 PetscFunctionReturn(0); 2039 } 2040 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2044 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2045 { 2046 Mat mat; 2047 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2048 PetscErrorCode ierr; 2049 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2050 PetscScalar *array; 2051 2052 PetscFunctionBegin; 2053 *newmat = 0; 2054 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2055 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2056 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2057 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2058 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2059 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2060 2061 mat->factortype = matin->factortype; 2062 mat->preallocated = PETSC_TRUE; 2063 mat->assembled = PETSC_TRUE; 2064 mat->insertmode = NOT_SET_VALUES; 2065 2066 a = (Mat_MPISBAIJ*)mat->data; 2067 a->bs2 = oldmat->bs2; 2068 a->mbs = oldmat->mbs; 2069 a->nbs = oldmat->nbs; 2070 a->Mbs = oldmat->Mbs; 2071 a->Nbs = oldmat->Nbs; 2072 2073 2074 a->size = oldmat->size; 2075 a->rank = oldmat->rank; 2076 a->donotstash = oldmat->donotstash; 2077 a->roworiented = oldmat->roworiented; 2078 a->rowindices = 0; 2079 a->rowvalues = 0; 2080 a->getrowactive = PETSC_FALSE; 2081 a->barray = 0; 2082 a->rstartbs = oldmat->rstartbs; 2083 a->rendbs = oldmat->rendbs; 2084 a->cstartbs = oldmat->cstartbs; 2085 a->cendbs = oldmat->cendbs; 2086 2087 /* hash table stuff */ 2088 a->ht = 0; 2089 a->hd = 0; 2090 a->ht_size = 0; 2091 a->ht_flag = oldmat->ht_flag; 2092 a->ht_fact = oldmat->ht_fact; 2093 a->ht_total_ct = 0; 2094 a->ht_insert_ct = 0; 2095 2096 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2097 if (oldmat->colmap) { 2098 #if defined (PETSC_USE_CTABLE) 2099 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2100 #else 2101 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2102 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2103 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2104 #endif 2105 } else a->colmap = 0; 2106 2107 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2108 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2109 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2110 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2111 } else a->garray = 0; 2112 2113 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2114 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2115 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2116 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2117 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2118 2119 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2120 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2121 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2122 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2123 2124 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2125 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2126 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2127 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2128 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2129 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2130 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2131 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2132 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2133 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2134 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2135 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2136 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2137 2138 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2139 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2140 a->sMvctx = oldmat->sMvctx; 2141 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2142 2143 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2144 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2145 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2146 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2147 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2148 *newmat = mat; 2149 PetscFunctionReturn(0); 2150 } 2151 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatLoad_MPISBAIJ" 2154 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2155 { 2156 Mat A; 2157 PetscErrorCode ierr; 2158 PetscInt i,nz,j,rstart,rend; 2159 PetscScalar *vals,*buf; 2160 MPI_Comm comm = ((PetscObject)viewer)->comm; 2161 MPI_Status status; 2162 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2163 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2164 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2165 PetscInt bs=1,Mbs,mbs,extra_rows; 2166 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2167 PetscInt dcount,kmax,k,nzcount,tmp; 2168 int fd; 2169 2170 PetscFunctionBegin; 2171 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2172 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2173 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2174 2175 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2176 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2177 if (!rank) { 2178 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2179 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2180 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2181 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2182 } 2183 2184 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2185 M = header[1]; N = header[2]; 2186 2187 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2188 2189 /* 2190 This code adds extra rows to make sure the number of rows is 2191 divisible by the blocksize 2192 */ 2193 Mbs = M/bs; 2194 extra_rows = bs - M + bs*(Mbs); 2195 if (extra_rows == bs) extra_rows = 0; 2196 else Mbs++; 2197 if (extra_rows &&!rank) { 2198 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2199 } 2200 2201 /* determine ownership of all rows */ 2202 mbs = Mbs/size + ((Mbs % size) > rank); 2203 m = mbs*bs; 2204 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2205 mmbs = PetscMPIIntCast(mbs); 2206 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2207 rowners[0] = 0; 2208 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2209 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2210 rstart = rowners[rank]; 2211 rend = rowners[rank+1]; 2212 2213 /* distribute row lengths to all processors */ 2214 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2215 if (!rank) { 2216 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2217 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2218 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2219 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2220 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2221 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2222 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2223 } else { 2224 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2225 } 2226 2227 if (!rank) { /* procs[0] */ 2228 /* calculate the number of nonzeros on each processor */ 2229 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2230 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2231 for (i=0; i<size; i++) { 2232 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2233 procsnz[i] += rowlengths[j]; 2234 } 2235 } 2236 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2237 2238 /* determine max buffer needed and allocate it */ 2239 maxnz = 0; 2240 for (i=0; i<size; i++) { 2241 maxnz = PetscMax(maxnz,procsnz[i]); 2242 } 2243 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2244 2245 /* read in my part of the matrix column indices */ 2246 nz = procsnz[0]; 2247 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2248 mycols = ibuf; 2249 if (size == 1) nz -= extra_rows; 2250 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2251 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2252 2253 /* read in every ones (except the last) and ship off */ 2254 for (i=1; i<size-1; i++) { 2255 nz = procsnz[i]; 2256 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2257 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2258 } 2259 /* read in the stuff for the last proc */ 2260 if (size != 1) { 2261 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2262 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2263 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2264 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2265 } 2266 ierr = PetscFree(cols);CHKERRQ(ierr); 2267 } else { /* procs[i], i>0 */ 2268 /* determine buffer space needed for message */ 2269 nz = 0; 2270 for (i=0; i<m; i++) { 2271 nz += locrowlens[i]; 2272 } 2273 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2274 mycols = ibuf; 2275 /* receive message of column indices*/ 2276 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2277 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2278 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2279 } 2280 2281 /* loop over local rows, determining number of off diagonal entries */ 2282 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2283 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2284 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2285 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2286 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2287 rowcount = 0; 2288 nzcount = 0; 2289 for (i=0; i<mbs; i++) { 2290 dcount = 0; 2291 odcount = 0; 2292 for (j=0; j<bs; j++) { 2293 kmax = locrowlens[rowcount]; 2294 for (k=0; k<kmax; k++) { 2295 tmp = mycols[nzcount++]/bs; /* block col. index */ 2296 if (!mask[tmp]) { 2297 mask[tmp] = 1; 2298 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2299 else masked1[dcount++] = tmp; /* entry in diag portion */ 2300 } 2301 } 2302 rowcount++; 2303 } 2304 2305 dlens[i] = dcount; /* d_nzz[i] */ 2306 odlens[i] = odcount; /* o_nzz[i] */ 2307 2308 /* zero out the mask elements we set */ 2309 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2310 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2311 } 2312 2313 /* create our matrix */ 2314 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2315 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2316 ierr = MatSetType(A,type);CHKERRQ(ierr); 2317 ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2318 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2319 2320 if (!rank) { 2321 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2322 /* read in my part of the matrix numerical values */ 2323 nz = procsnz[0]; 2324 vals = buf; 2325 mycols = ibuf; 2326 if (size == 1) nz -= extra_rows; 2327 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2328 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2329 2330 /* insert into matrix */ 2331 jj = rstart*bs; 2332 for (i=0; i<m; i++) { 2333 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2334 mycols += locrowlens[i]; 2335 vals += locrowlens[i]; 2336 jj++; 2337 } 2338 2339 /* read in other processors (except the last one) and ship out */ 2340 for (i=1; i<size-1; i++) { 2341 nz = procsnz[i]; 2342 vals = buf; 2343 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2344 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2345 } 2346 /* the last proc */ 2347 if (size != 1){ 2348 nz = procsnz[i] - extra_rows; 2349 vals = buf; 2350 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2351 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2352 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2353 } 2354 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2355 2356 } else { 2357 /* receive numeric values */ 2358 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2359 2360 /* receive message of values*/ 2361 vals = buf; 2362 mycols = ibuf; 2363 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2364 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2365 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2366 2367 /* insert into matrix */ 2368 jj = rstart*bs; 2369 for (i=0; i<m; i++) { 2370 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2371 mycols += locrowlens[i]; 2372 vals += locrowlens[i]; 2373 jj++; 2374 } 2375 } 2376 2377 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2378 ierr = PetscFree(buf);CHKERRQ(ierr); 2379 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2380 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2381 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2382 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2383 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2384 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2385 *newmat = A; 2386 PetscFunctionReturn(0); 2387 } 2388 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatLoadnew_MPISBAIJ" 2391 PetscErrorCode MatLoadnew_MPISBAIJ(PetscViewer viewer, Mat newmat) 2392 { 2393 PetscErrorCode ierr; 2394 PetscInt i,nz,j,rstart,rend; 2395 PetscScalar *vals,*buf; 2396 MPI_Comm comm = ((PetscObject)viewer)->comm; 2397 MPI_Status status; 2398 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2399 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2400 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2401 PetscInt bs=1,Mbs,mbs,extra_rows; 2402 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2403 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2404 int fd; 2405 2406 PetscFunctionBegin; 2407 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2408 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2409 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2410 2411 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2412 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2413 if (!rank) { 2414 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2415 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2416 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2417 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2418 } 2419 2420 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2421 2422 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2423 M = header[1]; N = header[2]; 2424 2425 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2426 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2427 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2428 2429 /* If global sizes are set, check if they are consistent with that given in the file */ 2430 if (sizesset) { 2431 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2432 } 2433 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2434 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2435 2436 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2437 2438 /* 2439 This code adds extra rows to make sure the number of rows is 2440 divisible by the blocksize 2441 */ 2442 Mbs = M/bs; 2443 extra_rows = bs - M + bs*(Mbs); 2444 if (extra_rows == bs) extra_rows = 0; 2445 else Mbs++; 2446 if (extra_rows &&!rank) { 2447 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2448 } 2449 2450 /* determine ownership of all rows */ 2451 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2452 mbs = Mbs/size + ((Mbs % size) > rank); 2453 m = mbs*bs; 2454 } else { /* User Set */ 2455 m = newmat->rmap->n; 2456 mbs = m/bs; 2457 } 2458 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2459 mmbs = PetscMPIIntCast(mbs); 2460 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2461 rowners[0] = 0; 2462 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2463 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2464 rstart = rowners[rank]; 2465 rend = rowners[rank+1]; 2466 2467 /* distribute row lengths to all processors */ 2468 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2469 if (!rank) { 2470 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2471 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2472 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2473 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2474 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2475 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2476 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2477 } else { 2478 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2479 } 2480 2481 if (!rank) { /* procs[0] */ 2482 /* calculate the number of nonzeros on each processor */ 2483 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2484 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2485 for (i=0; i<size; i++) { 2486 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2487 procsnz[i] += rowlengths[j]; 2488 } 2489 } 2490 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2491 2492 /* determine max buffer needed and allocate it */ 2493 maxnz = 0; 2494 for (i=0; i<size; i++) { 2495 maxnz = PetscMax(maxnz,procsnz[i]); 2496 } 2497 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2498 2499 /* read in my part of the matrix column indices */ 2500 nz = procsnz[0]; 2501 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2502 mycols = ibuf; 2503 if (size == 1) nz -= extra_rows; 2504 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2505 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2506 2507 /* read in every ones (except the last) and ship off */ 2508 for (i=1; i<size-1; i++) { 2509 nz = procsnz[i]; 2510 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2511 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2512 } 2513 /* read in the stuff for the last proc */ 2514 if (size != 1) { 2515 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2516 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2517 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2518 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2519 } 2520 ierr = PetscFree(cols);CHKERRQ(ierr); 2521 } else { /* procs[i], i>0 */ 2522 /* determine buffer space needed for message */ 2523 nz = 0; 2524 for (i=0; i<m; i++) { 2525 nz += locrowlens[i]; 2526 } 2527 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2528 mycols = ibuf; 2529 /* receive message of column indices*/ 2530 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2531 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2532 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2533 } 2534 2535 /* loop over local rows, determining number of off diagonal entries */ 2536 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2537 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2538 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2539 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2540 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2541 rowcount = 0; 2542 nzcount = 0; 2543 for (i=0; i<mbs; i++) { 2544 dcount = 0; 2545 odcount = 0; 2546 for (j=0; j<bs; j++) { 2547 kmax = locrowlens[rowcount]; 2548 for (k=0; k<kmax; k++) { 2549 tmp = mycols[nzcount++]/bs; /* block col. index */ 2550 if (!mask[tmp]) { 2551 mask[tmp] = 1; 2552 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2553 else masked1[dcount++] = tmp; /* entry in diag portion */ 2554 } 2555 } 2556 rowcount++; 2557 } 2558 2559 dlens[i] = dcount; /* d_nzz[i] */ 2560 odlens[i] = odcount; /* o_nzz[i] */ 2561 2562 /* zero out the mask elements we set */ 2563 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2564 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2565 } 2566 if (!sizesset) { 2567 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2568 } 2569 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2570 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2571 2572 if (!rank) { 2573 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2574 /* read in my part of the matrix numerical values */ 2575 nz = procsnz[0]; 2576 vals = buf; 2577 mycols = ibuf; 2578 if (size == 1) nz -= extra_rows; 2579 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2580 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2581 2582 /* insert into matrix */ 2583 jj = rstart*bs; 2584 for (i=0; i<m; i++) { 2585 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2586 mycols += locrowlens[i]; 2587 vals += locrowlens[i]; 2588 jj++; 2589 } 2590 2591 /* read in other processors (except the last one) and ship out */ 2592 for (i=1; i<size-1; i++) { 2593 nz = procsnz[i]; 2594 vals = buf; 2595 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2596 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2597 } 2598 /* the last proc */ 2599 if (size != 1){ 2600 nz = procsnz[i] - extra_rows; 2601 vals = buf; 2602 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2603 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2604 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2605 } 2606 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2607 2608 } else { 2609 /* receive numeric values */ 2610 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2611 2612 /* receive message of values*/ 2613 vals = buf; 2614 mycols = ibuf; 2615 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2616 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2617 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2618 2619 /* insert into matrix */ 2620 jj = rstart*bs; 2621 for (i=0; i<m; i++) { 2622 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2623 mycols += locrowlens[i]; 2624 vals += locrowlens[i]; 2625 jj++; 2626 } 2627 } 2628 2629 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2630 ierr = PetscFree(buf);CHKERRQ(ierr); 2631 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2632 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2633 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2634 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2635 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2636 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2637 PetscFunctionReturn(0); 2638 } 2639 2640 #undef __FUNCT__ 2641 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2642 /*XXXXX@ 2643 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2644 2645 Input Parameters: 2646 . mat - the matrix 2647 . fact - factor 2648 2649 Collective on Mat 2650 2651 Level: advanced 2652 2653 Notes: 2654 This can also be set by the command line option: -mat_use_hash_table fact 2655 2656 .keywords: matrix, hashtable, factor, HT 2657 2658 .seealso: MatSetOption() 2659 @XXXXX*/ 2660 2661 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2664 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2665 { 2666 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2667 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2668 PetscReal atmp; 2669 PetscReal *work,*svalues,*rvalues; 2670 PetscErrorCode ierr; 2671 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2672 PetscMPIInt rank,size; 2673 PetscInt *rowners_bs,dest,count,source; 2674 PetscScalar *va; 2675 MatScalar *ba; 2676 MPI_Status stat; 2677 2678 PetscFunctionBegin; 2679 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2680 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2681 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2682 2683 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2684 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2685 2686 bs = A->rmap->bs; 2687 mbs = a->mbs; 2688 Mbs = a->Mbs; 2689 ba = b->a; 2690 bi = b->i; 2691 bj = b->j; 2692 2693 /* find ownerships */ 2694 rowners_bs = A->rmap->range; 2695 2696 /* each proc creates an array to be distributed */ 2697 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2698 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2699 2700 /* row_max for B */ 2701 if (rank != size-1){ 2702 for (i=0; i<mbs; i++) { 2703 ncols = bi[1] - bi[0]; bi++; 2704 brow = bs*i; 2705 for (j=0; j<ncols; j++){ 2706 bcol = bs*(*bj); 2707 for (kcol=0; kcol<bs; kcol++){ 2708 col = bcol + kcol; /* local col index */ 2709 col += rowners_bs[rank+1]; /* global col index */ 2710 for (krow=0; krow<bs; krow++){ 2711 atmp = PetscAbsScalar(*ba); ba++; 2712 row = brow + krow; /* local row index */ 2713 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2714 if (work[col] < atmp) work[col] = atmp; 2715 } 2716 } 2717 bj++; 2718 } 2719 } 2720 2721 /* send values to its owners */ 2722 for (dest=rank+1; dest<size; dest++){ 2723 svalues = work + rowners_bs[dest]; 2724 count = rowners_bs[dest+1]-rowners_bs[dest]; 2725 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2726 } 2727 } 2728 2729 /* receive values */ 2730 if (rank){ 2731 rvalues = work; 2732 count = rowners_bs[rank+1]-rowners_bs[rank]; 2733 for (source=0; source<rank; source++){ 2734 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2735 /* process values */ 2736 for (i=0; i<count; i++){ 2737 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2738 } 2739 } 2740 } 2741 2742 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2743 ierr = PetscFree(work);CHKERRQ(ierr); 2744 PetscFunctionReturn(0); 2745 } 2746 2747 #undef __FUNCT__ 2748 #define __FUNCT__ "MatSOR_MPISBAIJ" 2749 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2750 { 2751 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2752 PetscErrorCode ierr; 2753 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2754 PetscScalar *x,*ptr,*from; 2755 Vec bb1; 2756 const PetscScalar *b; 2757 2758 PetscFunctionBegin; 2759 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2760 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2761 2762 if (flag == SOR_APPLY_UPPER) { 2763 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2764 PetscFunctionReturn(0); 2765 } 2766 2767 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2768 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2769 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2770 its--; 2771 } 2772 2773 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2774 while (its--){ 2775 2776 /* lower triangular part: slvec0b = - B^T*xx */ 2777 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2778 2779 /* copy xx into slvec0a */ 2780 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2781 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2782 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2783 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2784 2785 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2786 2787 /* copy bb into slvec1a */ 2788 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2789 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2790 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2791 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2792 2793 /* set slvec1b = 0 */ 2794 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2795 2796 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2797 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2798 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2799 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2800 2801 /* upper triangular part: bb1 = bb1 - B*x */ 2802 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2803 2804 /* local diagonal sweep */ 2805 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2806 } 2807 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2808 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2809 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2810 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2811 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2812 } else if (flag & SOR_EISENSTAT) { 2813 Vec xx1; 2814 PetscTruth hasop; 2815 const PetscScalar *diag; 2816 PetscScalar *sl,scale = (omega - 2.0)/omega; 2817 PetscInt i,n; 2818 2819 if (!mat->xx1) { 2820 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2821 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2822 } 2823 xx1 = mat->xx1; 2824 bb1 = mat->bb1; 2825 2826 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2827 2828 if (!mat->diag) { 2829 /* this is wrong for same matrix with new nonzero values */ 2830 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2831 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2832 } 2833 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2834 2835 if (hasop) { 2836 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2837 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2838 } else { 2839 /* 2840 These two lines are replaced by code that may be a bit faster for a good compiler 2841 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2842 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2843 */ 2844 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2845 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2846 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2847 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2848 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2849 if (omega == 1.0) { 2850 for (i=0; i<n; i++) { 2851 sl[i] = b[i] - diag[i]*x[i]; 2852 } 2853 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2854 } else { 2855 for (i=0; i<n; i++) { 2856 sl[i] = b[i] + scale*diag[i]*x[i]; 2857 } 2858 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2859 } 2860 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2861 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2862 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2863 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2864 } 2865 2866 /* multiply off-diagonal portion of matrix */ 2867 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2868 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2869 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2870 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2871 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2872 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2873 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2874 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2875 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2876 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2877 2878 /* local sweep */ 2879 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2880 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2881 } else { 2882 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2883 } 2884 PetscFunctionReturn(0); 2885 } 2886 2887 #undef __FUNCT__ 2888 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2889 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2890 { 2891 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2892 PetscErrorCode ierr; 2893 Vec lvec1,bb1; 2894 2895 PetscFunctionBegin; 2896 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2897 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2898 2899 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2900 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2901 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2902 its--; 2903 } 2904 2905 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2906 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2907 while (its--){ 2908 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2909 2910 /* lower diagonal part: bb1 = bb - B^T*xx */ 2911 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2912 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2913 2914 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2915 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2916 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2917 2918 /* upper diagonal part: bb1 = bb1 - B*x */ 2919 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2920 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2921 2922 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2923 2924 /* diagonal sweep */ 2925 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2926 } 2927 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2928 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2929 } else { 2930 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2931 } 2932 PetscFunctionReturn(0); 2933 } 2934 2935