1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h" 5 #include "../src/mat/impls/sbaij/seq/sbaij.h" 6 #include "petscblaslapack.h" 7 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 EXTERN_C_BEGIN 25 #undef __FUNCT__ 26 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 28 { 29 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 30 PetscErrorCode ierr; 31 32 PetscFunctionBegin; 33 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 34 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 EXTERN_C_BEGIN 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 43 { 44 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 45 PetscErrorCode ierr; 46 47 PetscFunctionBegin; 48 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 49 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 EXTERN_C_END 53 54 55 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 56 { \ 57 \ 58 brow = row/bs; \ 59 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 60 rmax = aimax[brow]; nrow = ailen[brow]; \ 61 bcol = col/bs; \ 62 ridx = row % bs; cidx = col % bs; \ 63 low = 0; high = nrow; \ 64 while (high-low > 3) { \ 65 t = (low+high)/2; \ 66 if (rp[t] > bcol) high = t; \ 67 else low = t; \ 68 } \ 69 for (_i=low; _i<high; _i++) { \ 70 if (rp[_i] > bcol) break; \ 71 if (rp[_i] == bcol) { \ 72 bap = ap + bs2*_i + bs*cidx + ridx; \ 73 if (addv == ADD_VALUES) *bap += value; \ 74 else *bap = value; \ 75 goto a_noinsert; \ 76 } \ 77 } \ 78 if (a->nonew == 1) goto a_noinsert; \ 79 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 80 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 81 N = nrow++ - 1; \ 82 /* shift up all the later entries in this row */ \ 83 for (ii=N; ii>=_i; ii--) { \ 84 rp[ii+1] = rp[ii]; \ 85 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 86 } \ 87 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 88 rp[_i] = bcol; \ 89 ap[bs2*_i + bs*cidx + ridx] = value; \ 90 a_noinsert:; \ 91 ailen[brow] = nrow; \ 92 } 93 94 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 95 { \ 96 brow = row/bs; \ 97 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 98 rmax = bimax[brow]; nrow = bilen[brow]; \ 99 bcol = col/bs; \ 100 ridx = row % bs; cidx = col % bs; \ 101 low = 0; high = nrow; \ 102 while (high-low > 3) { \ 103 t = (low+high)/2; \ 104 if (rp[t] > bcol) high = t; \ 105 else low = t; \ 106 } \ 107 for (_i=low; _i<high; _i++) { \ 108 if (rp[_i] > bcol) break; \ 109 if (rp[_i] == bcol) { \ 110 bap = ap + bs2*_i + bs*cidx + ridx; \ 111 if (addv == ADD_VALUES) *bap += value; \ 112 else *bap = value; \ 113 goto b_noinsert; \ 114 } \ 115 } \ 116 if (b->nonew == 1) goto b_noinsert; \ 117 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 118 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 119 N = nrow++ - 1; \ 120 /* shift up all the later entries in this row */ \ 121 for (ii=N; ii>=_i; ii--) { \ 122 rp[ii+1] = rp[ii]; \ 123 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 124 } \ 125 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 126 rp[_i] = bcol; \ 127 ap[bs2*_i + bs*cidx + ridx] = value; \ 128 b_noinsert:; \ 129 bilen[brow] = nrow; \ 130 } 131 132 /* Only add/insert a(i,j) with i<=j (blocks). 133 Any a(i,j) with i>j input by user is ingored. 134 */ 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatSetValues_MPISBAIJ" 137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 138 { 139 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 140 MatScalar value; 141 PetscTruth roworiented = baij->roworiented; 142 PetscErrorCode ierr; 143 PetscInt i,j,row,col; 144 PetscInt rstart_orig=mat->rmap->rstart; 145 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 146 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 147 148 /* Some Variables required in the macro */ 149 Mat A = baij->A; 150 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 151 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 152 MatScalar *aa=a->a; 153 154 Mat B = baij->B; 155 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 156 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 157 MatScalar *ba=b->a; 158 159 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 160 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 161 MatScalar *ap,*bap; 162 163 /* for stash */ 164 PetscInt n_loc, *in_loc = PETSC_NULL; 165 MatScalar *v_loc = PETSC_NULL; 166 167 PetscFunctionBegin; 168 if (v) PetscValidScalarPointer(v,6); 169 if (!baij->donotstash){ 170 if (n > baij->n_loc) { 171 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 172 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 174 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 175 baij->n_loc = n; 176 } 177 in_loc = baij->in_loc; 178 v_loc = baij->v_loc; 179 } 180 181 for (i=0; i<m; i++) { 182 if (im[i] < 0) continue; 183 #if defined(PETSC_USE_DEBUG) 184 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 185 #endif 186 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 187 row = im[i] - rstart_orig; /* local row index */ 188 for (j=0; j<n; j++) { 189 if (im[i]/bs > in[j]/bs){ 190 if (a->ignore_ltriangular){ 191 continue; /* ignore lower triangular blocks */ 192 } else { 193 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 194 } 195 } 196 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 197 col = in[j] - cstart_orig; /* local col index */ 198 brow = row/bs; bcol = col/bs; 199 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 200 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 201 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 202 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 203 } else if (in[j] < 0) continue; 204 #if defined(PETSC_USE_DEBUG) 205 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 206 #endif 207 else { /* off-diag entry (B) */ 208 if (mat->was_assembled) { 209 if (!baij->colmap) { 210 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 211 } 212 #if defined (PETSC_USE_CTABLE) 213 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 214 col = col - 1; 215 #else 216 col = baij->colmap[in[j]/bs] - 1; 217 #endif 218 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 219 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 220 col = in[j]; 221 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 222 B = baij->B; 223 b = (Mat_SeqBAIJ*)(B)->data; 224 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 225 ba=b->a; 226 } else col += in[j]%bs; 227 } else col = in[j]; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } 232 } 233 } else { /* off processor entry */ 234 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 235 if (!baij->donotstash) { 236 n_loc = 0; 237 for (j=0; j<n; j++){ 238 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 239 in_loc[n_loc] = in[j]; 240 if (roworiented) { 241 v_loc[n_loc] = v[i*n+j]; 242 } else { 243 v_loc[n_loc] = v[j*m+i]; 244 } 245 n_loc++; 246 } 247 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 248 } 249 } 250 } 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 257 { 258 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 259 const MatScalar *value; 260 MatScalar *barray=baij->barray; 261 PetscTruth roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 262 PetscErrorCode ierr; 263 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 264 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 265 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 266 267 PetscFunctionBegin; 268 if(!barray) { 269 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 270 baij->barray = barray; 271 } 272 273 if (roworiented) { 274 stepval = (n-1)*bs; 275 } else { 276 stepval = (m-1)*bs; 277 } 278 for (i=0; i<m; i++) { 279 if (im[i] < 0) continue; 280 #if defined(PETSC_USE_DEBUG) 281 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 282 #endif 283 if (im[i] >= rstart && im[i] < rend) { 284 row = im[i] - rstart; 285 for (j=0; j<n; j++) { 286 if (im[i] > in[j]) { 287 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 288 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 289 } 290 /* If NumCol = 1 then a copy is not required */ 291 if ((roworiented) && (n == 1)) { 292 barray = (MatScalar*) v + i*bs2; 293 } else if((!roworiented) && (m == 1)) { 294 barray = (MatScalar*) v + j*bs2; 295 } else { /* Here a copy is required */ 296 if (roworiented) { 297 value = v + i*(stepval+bs)*bs + j*bs; 298 } else { 299 value = v + j*(stepval+bs)*bs + i*bs; 300 } 301 for (ii=0; ii<bs; ii++,value+=stepval) { 302 for (jj=0; jj<bs; jj++) { 303 *barray++ = *value++; 304 } 305 } 306 barray -=bs2; 307 } 308 309 if (in[j] >= cstart && in[j] < cend){ 310 col = in[j] - cstart; 311 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 312 } 313 else if (in[j] < 0) continue; 314 #if defined(PETSC_USE_DEBUG) 315 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 316 #endif 317 else { 318 if (mat->was_assembled) { 319 if (!baij->colmap) { 320 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 321 } 322 323 #if defined(PETSC_USE_DEBUG) 324 #if defined (PETSC_USE_CTABLE) 325 { PetscInt data; 326 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 327 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 328 } 329 #else 330 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 331 #endif 332 #endif 333 #if defined (PETSC_USE_CTABLE) 334 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 335 col = (col - 1)/bs; 336 #else 337 col = (baij->colmap[in[j]] - 1)/bs; 338 #endif 339 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 340 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 341 col = in[j]; 342 } 343 } 344 else col = in[j]; 345 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 346 } 347 } 348 } else { 349 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 350 if (!baij->donotstash) { 351 if (roworiented) { 352 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 353 } else { 354 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 355 } 356 } 357 } 358 } 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNCT__ 363 #define __FUNCT__ "MatGetValues_MPISBAIJ" 364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 365 { 366 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 367 PetscErrorCode ierr; 368 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 369 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 370 371 PetscFunctionBegin; 372 for (i=0; i<m; i++) { 373 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 374 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 375 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 376 row = idxm[i] - bsrstart; 377 for (j=0; j<n; j++) { 378 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 379 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 380 if (idxn[j] >= bscstart && idxn[j] < bscend){ 381 col = idxn[j] - bscstart; 382 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 383 } else { 384 if (!baij->colmap) { 385 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 386 } 387 #if defined (PETSC_USE_CTABLE) 388 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 389 data --; 390 #else 391 data = baij->colmap[idxn[j]/bs]-1; 392 #endif 393 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 394 else { 395 col = data + idxn[j]%bs; 396 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 397 } 398 } 399 } 400 } else { 401 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 402 } 403 } 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatNorm_MPISBAIJ" 409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 410 { 411 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscReal sum[2],*lnorm2; 414 415 PetscFunctionBegin; 416 if (baij->size == 1) { 417 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 418 } else { 419 if (type == NORM_FROBENIUS) { 420 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 421 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 422 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 423 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 424 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 425 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 426 *norm = sqrt(sum[0] + 2*sum[1]); 427 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 428 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 429 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 430 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 431 PetscReal *rsum,*rsum2,vabs; 432 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 433 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 434 MatScalar *v; 435 436 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 437 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 438 /* Amat */ 439 v = amat->a; jj = amat->j; 440 for (brow=0; brow<mbs; brow++) { 441 grow = bs*(rstart + brow); 442 nz = amat->i[brow+1] - amat->i[brow]; 443 for (bcol=0; bcol<nz; bcol++){ 444 gcol = bs*(rstart + *jj); jj++; 445 for (col=0; col<bs; col++){ 446 for (row=0; row<bs; row++){ 447 vabs = PetscAbsScalar(*v); v++; 448 rsum[gcol+col] += vabs; 449 /* non-diagonal block */ 450 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 451 } 452 } 453 } 454 } 455 /* Bmat */ 456 v = bmat->a; jj = bmat->j; 457 for (brow=0; brow<mbs; brow++) { 458 grow = bs*(rstart + brow); 459 nz = bmat->i[brow+1] - bmat->i[brow]; 460 for (bcol=0; bcol<nz; bcol++){ 461 gcol = bs*garray[*jj]; jj++; 462 for (col=0; col<bs; col++){ 463 for (row=0; row<bs; row++){ 464 vabs = PetscAbsScalar(*v); v++; 465 rsum[gcol+col] += vabs; 466 rsum[grow+row] += vabs; 467 } 468 } 469 } 470 } 471 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 472 *norm = 0.0; 473 for (col=0; col<mat->cmap->N; col++) { 474 if (rsum2[col] > *norm) *norm = rsum2[col]; 475 } 476 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 477 } else { 478 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 479 } 480 } 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 487 { 488 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 489 PetscErrorCode ierr; 490 PetscInt nstash,reallocs; 491 InsertMode addv; 492 493 PetscFunctionBegin; 494 if (baij->donotstash || mat->nooffprocentries) { 495 PetscFunctionReturn(0); 496 } 497 498 /* make sure all processors are either in INSERTMODE or ADDMODE */ 499 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 500 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 501 mat->insertmode = addv; /* in case this processor had no cache */ 502 503 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 504 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 505 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 506 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 507 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 508 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 514 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 515 { 516 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 517 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 518 PetscErrorCode ierr; 519 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 520 PetscInt *row,*col; 521 PetscTruth other_disassembled; 522 PetscMPIInt n; 523 PetscTruth r1,r2,r3; 524 MatScalar *val; 525 InsertMode addv = mat->insertmode; 526 527 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 528 PetscFunctionBegin; 529 530 if (!baij->donotstash && !mat->nooffprocentries) { 531 while (1) { 532 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 533 if (!flg) break; 534 535 for (i=0; i<n;) { 536 /* Now identify the consecutive vals belonging to the same row */ 537 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 538 if (j < n) ncols = j-i; 539 else ncols = n-i; 540 /* Now assemble all these values with a single function call */ 541 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 542 i = j; 543 } 544 } 545 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 546 /* Now process the block-stash. Since the values are stashed column-oriented, 547 set the roworiented flag to column oriented, and after MatSetValues() 548 restore the original flags */ 549 r1 = baij->roworiented; 550 r2 = a->roworiented; 551 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 552 baij->roworiented = PETSC_FALSE; 553 a->roworiented = PETSC_FALSE; 554 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 555 while (1) { 556 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 557 if (!flg) break; 558 559 for (i=0; i<n;) { 560 /* Now identify the consecutive vals belonging to the same row */ 561 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 562 if (j < n) ncols = j-i; 563 else ncols = n-i; 564 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 565 i = j; 566 } 567 } 568 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 569 baij->roworiented = r1; 570 a->roworiented = r2; 571 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 572 } 573 574 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 575 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 576 577 /* determine if any processor has disassembled, if so we must 578 also disassemble ourselfs, in order that we may reassemble. */ 579 /* 580 if nonzero structure of submatrix B cannot change then we know that 581 no processor disassembled thus we can skip this stuff 582 */ 583 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 584 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 585 if (mat->was_assembled && !other_disassembled) { 586 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 587 } 588 } 589 590 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 591 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 592 } 593 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 594 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 595 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 596 597 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 598 baij->rowvalues = 0; 599 600 PetscFunctionReturn(0); 601 } 602 603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 606 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 607 { 608 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 609 PetscErrorCode ierr; 610 PetscInt bs = mat->rmap->bs; 611 PetscMPIInt size = baij->size,rank = baij->rank; 612 PetscTruth iascii,isdraw; 613 PetscViewer sviewer; 614 PetscViewerFormat format; 615 616 PetscFunctionBegin; 617 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 618 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 619 if (iascii) { 620 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 621 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 622 MatInfo info; 623 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 624 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 625 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 626 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 627 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 628 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 629 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 630 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 631 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 632 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 633 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 634 PetscFunctionReturn(0); 635 } else if (format == PETSC_VIEWER_ASCII_INFO) { 636 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 639 PetscFunctionReturn(0); 640 } 641 } 642 643 if (isdraw) { 644 PetscDraw draw; 645 PetscTruth isnull; 646 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 647 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 648 } 649 650 if (size == 1) { 651 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 652 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 653 } else { 654 /* assemble the entire matrix onto first processor. */ 655 Mat A; 656 Mat_SeqSBAIJ *Aloc; 657 Mat_SeqBAIJ *Bloc; 658 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 659 MatScalar *a; 660 661 /* Should this be the same type as mat? */ 662 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 663 if (!rank) { 664 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 665 } else { 666 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 667 } 668 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 669 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 670 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 671 672 /* copy over the A part */ 673 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 674 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 675 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 676 677 for (i=0; i<mbs; i++) { 678 rvals[0] = bs*(baij->rstartbs + i); 679 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 680 for (j=ai[i]; j<ai[i+1]; j++) { 681 col = (baij->cstartbs+aj[j])*bs; 682 for (k=0; k<bs; k++) { 683 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 684 col++; a += bs; 685 } 686 } 687 } 688 /* copy over the B part */ 689 Bloc = (Mat_SeqBAIJ*)baij->B->data; 690 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 691 for (i=0; i<mbs; i++) { 692 693 rvals[0] = bs*(baij->rstartbs + i); 694 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 695 for (j=ai[i]; j<ai[i+1]; j++) { 696 col = baij->garray[aj[j]]*bs; 697 for (k=0; k<bs; k++) { 698 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 699 col++; a += bs; 700 } 701 } 702 } 703 ierr = PetscFree(rvals);CHKERRQ(ierr); 704 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 705 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 706 /* 707 Everyone has to call to draw the matrix since the graphics waits are 708 synchronized across all processors that share the PetscDraw object 709 */ 710 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 711 if (!rank) { 712 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 713 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 714 } 715 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 716 ierr = MatDestroy(A);CHKERRQ(ierr); 717 } 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatView_MPISBAIJ" 723 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 724 { 725 PetscErrorCode ierr; 726 PetscTruth iascii,isdraw,issocket,isbinary; 727 728 PetscFunctionBegin; 729 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 730 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 731 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 732 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 733 if (iascii || isdraw || issocket || isbinary) { 734 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 735 } else { 736 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 737 } 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatDestroy_MPISBAIJ" 743 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 744 { 745 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 746 PetscErrorCode ierr; 747 748 PetscFunctionBegin; 749 #if defined(PETSC_USE_LOG) 750 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 751 #endif 752 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 753 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 754 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 755 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 756 #if defined (PETSC_USE_CTABLE) 757 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 758 #else 759 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 760 #endif 761 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 762 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 763 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 764 if (baij->slvec0) { 765 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 766 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 767 } 768 if (baij->slvec1) { 769 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 770 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 771 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 772 } 773 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 774 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 775 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 776 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 777 if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);} 778 if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);} 779 if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);} 780 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 781 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 782 #endif 783 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 784 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 785 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 786 ierr = PetscFree(baij);CHKERRQ(ierr); 787 788 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 792 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 798 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 799 { 800 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 801 PetscErrorCode ierr; 802 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 803 PetscScalar *x,*from; 804 805 PetscFunctionBegin; 806 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 807 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 808 809 /* diagonal part */ 810 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 811 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 812 813 /* subdiagonal part */ 814 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 815 816 /* copy x into the vec slvec0 */ 817 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 818 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 819 820 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 821 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 822 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 823 824 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 /* supperdiagonal part */ 827 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 828 PetscFunctionReturn(0); 829 } 830 831 #undef __FUNCT__ 832 #define __FUNCT__ "MatMult_MPISBAIJ" 833 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 834 { 835 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 836 PetscErrorCode ierr; 837 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 838 PetscScalar *x,*from; 839 840 PetscFunctionBegin; 841 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 842 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 843 844 /* diagonal part */ 845 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 846 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 847 848 /* subdiagonal part */ 849 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 850 851 /* copy x into the vec slvec0 */ 852 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 853 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 854 855 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 856 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 857 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 858 859 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 861 /* supperdiagonal part */ 862 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 868 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 869 { 870 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 871 PetscErrorCode ierr; 872 PetscInt nt; 873 874 PetscFunctionBegin; 875 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 876 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 877 878 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 879 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 880 881 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 /* do diagonal part */ 883 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 884 /* do supperdiagonal part */ 885 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 887 /* do subdiagonal part */ 888 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 889 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 890 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 891 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 897 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 898 { 899 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 900 PetscErrorCode ierr; 901 PetscInt mbs=a->mbs,bs=A->rmap->bs; 902 PetscScalar *x,*from,zero=0.0; 903 904 PetscFunctionBegin; 905 /* 906 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 907 PetscSynchronizedFlush(((PetscObject)A)->comm); 908 */ 909 /* diagonal part */ 910 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 911 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 912 913 /* subdiagonal part */ 914 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 915 916 /* copy x into the vec slvec0 */ 917 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 918 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 919 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 920 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 921 922 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 924 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 925 926 /* supperdiagonal part */ 927 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 928 929 PetscFunctionReturn(0); 930 } 931 932 #undef __FUNCT__ 933 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 934 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 935 { 936 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 937 PetscErrorCode ierr; 938 939 PetscFunctionBegin; 940 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 /* do diagonal part */ 942 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 943 /* do supperdiagonal part */ 944 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 946 947 /* do subdiagonal part */ 948 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 949 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 950 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 951 952 PetscFunctionReturn(0); 953 } 954 955 /* 956 This only works correctly for square matrices where the subblock A->A is the 957 diagonal block 958 */ 959 #undef __FUNCT__ 960 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 961 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 962 { 963 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 964 PetscErrorCode ierr; 965 966 PetscFunctionBegin; 967 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 968 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatScale_MPISBAIJ" 974 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 975 { 976 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 977 PetscErrorCode ierr; 978 979 PetscFunctionBegin; 980 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 981 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatGetRow_MPISBAIJ" 987 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 988 { 989 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 990 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 991 PetscErrorCode ierr; 992 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 993 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 994 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 995 996 PetscFunctionBegin; 997 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 998 mat->getrowactive = PETSC_TRUE; 999 1000 if (!mat->rowvalues && (idx || v)) { 1001 /* 1002 allocate enough space to hold information from the longest row. 1003 */ 1004 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1005 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1006 PetscInt max = 1,mbs = mat->mbs,tmp; 1007 for (i=0; i<mbs; i++) { 1008 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1009 if (max < tmp) { max = tmp; } 1010 } 1011 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1012 } 1013 1014 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1015 lrow = row - brstart; /* local row index */ 1016 1017 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1018 if (!v) {pvA = 0; pvB = 0;} 1019 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1020 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1021 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1022 nztot = nzA + nzB; 1023 1024 cmap = mat->garray; 1025 if (v || idx) { 1026 if (nztot) { 1027 /* Sort by increasing column numbers, assuming A and B already sorted */ 1028 PetscInt imark = -1; 1029 if (v) { 1030 *v = v_p = mat->rowvalues; 1031 for (i=0; i<nzB; i++) { 1032 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1033 else break; 1034 } 1035 imark = i; 1036 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1037 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1038 } 1039 if (idx) { 1040 *idx = idx_p = mat->rowindices; 1041 if (imark > -1) { 1042 for (i=0; i<imark; i++) { 1043 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1044 } 1045 } else { 1046 for (i=0; i<nzB; i++) { 1047 if (cmap[cworkB[i]/bs] < cstart) 1048 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1049 else break; 1050 } 1051 imark = i; 1052 } 1053 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1054 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1055 } 1056 } else { 1057 if (idx) *idx = 0; 1058 if (v) *v = 0; 1059 } 1060 } 1061 *nz = nztot; 1062 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1063 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1069 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1070 { 1071 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1072 1073 PetscFunctionBegin; 1074 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1075 baij->getrowactive = PETSC_FALSE; 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1081 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1082 { 1083 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1084 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1085 1086 PetscFunctionBegin; 1087 aA->getrow_utriangular = PETSC_TRUE; 1088 PetscFunctionReturn(0); 1089 } 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1092 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1093 { 1094 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1095 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1096 1097 PetscFunctionBegin; 1098 aA->getrow_utriangular = PETSC_FALSE; 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1104 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1105 { 1106 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1111 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1117 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1118 { 1119 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1120 PetscErrorCode ierr; 1121 1122 PetscFunctionBegin; 1123 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1124 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1130 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1131 { 1132 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 1135 PetscFunctionBegin; 1136 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1137 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1143 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1144 { 1145 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1146 Mat A = a->A,B = a->B; 1147 PetscErrorCode ierr; 1148 PetscReal isend[5],irecv[5]; 1149 1150 PetscFunctionBegin; 1151 info->block_size = (PetscReal)matin->rmap->bs; 1152 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1153 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1154 isend[3] = info->memory; isend[4] = info->mallocs; 1155 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1156 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1157 isend[3] += info->memory; isend[4] += info->mallocs; 1158 if (flag == MAT_LOCAL) { 1159 info->nz_used = isend[0]; 1160 info->nz_allocated = isend[1]; 1161 info->nz_unneeded = isend[2]; 1162 info->memory = isend[3]; 1163 info->mallocs = isend[4]; 1164 } else if (flag == MAT_GLOBAL_MAX) { 1165 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1166 info->nz_used = irecv[0]; 1167 info->nz_allocated = irecv[1]; 1168 info->nz_unneeded = irecv[2]; 1169 info->memory = irecv[3]; 1170 info->mallocs = irecv[4]; 1171 } else if (flag == MAT_GLOBAL_SUM) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else { 1179 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1180 } 1181 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1182 info->fill_ratio_needed = 0; 1183 info->factor_mallocs = 0; 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1189 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg) 1190 { 1191 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1192 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 switch (op) { 1197 case MAT_NEW_NONZERO_LOCATIONS: 1198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1199 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1200 case MAT_KEEP_NONZERO_PATTERN: 1201 case MAT_NEW_NONZERO_LOCATION_ERR: 1202 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1203 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1204 break; 1205 case MAT_ROW_ORIENTED: 1206 a->roworiented = flg; 1207 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1208 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1209 break; 1210 case MAT_NEW_DIAGONALS: 1211 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1212 break; 1213 case MAT_IGNORE_OFF_PROC_ENTRIES: 1214 a->donotstash = flg; 1215 break; 1216 case MAT_USE_HASH_TABLE: 1217 a->ht_flag = flg; 1218 break; 1219 case MAT_HERMITIAN: 1220 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1221 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1222 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1223 break; 1224 case MAT_SPD: 1225 A->spd_set = PETSC_TRUE; 1226 A->spd = flg; 1227 if (flg) { 1228 A->symmetric = PETSC_TRUE; 1229 A->structurally_symmetric = PETSC_TRUE; 1230 A->symmetric_set = PETSC_TRUE; 1231 A->structurally_symmetric_set = PETSC_TRUE; 1232 } 1233 break; 1234 case MAT_SYMMETRIC: 1235 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1236 break; 1237 case MAT_STRUCTURALLY_SYMMETRIC: 1238 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1239 break; 1240 case MAT_SYMMETRY_ETERNAL: 1241 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1242 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1243 break; 1244 case MAT_IGNORE_LOWER_TRIANGULAR: 1245 aA->ignore_ltriangular = flg; 1246 break; 1247 case MAT_ERROR_LOWER_TRIANGULAR: 1248 aA->ignore_ltriangular = flg; 1249 break; 1250 case MAT_GETROW_UPPERTRIANGULAR: 1251 aA->getrow_utriangular = flg; 1252 break; 1253 default: 1254 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1261 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1262 { 1263 PetscErrorCode ierr; 1264 PetscFunctionBegin; 1265 if (MAT_INITIAL_MATRIX || *B != A) { 1266 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1267 } 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1273 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1274 { 1275 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1276 Mat a=baij->A, b=baij->B; 1277 PetscErrorCode ierr; 1278 PetscInt nv,m,n; 1279 PetscTruth flg; 1280 1281 PetscFunctionBegin; 1282 if (ll != rr){ 1283 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1284 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1285 } 1286 if (!ll) PetscFunctionReturn(0); 1287 1288 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1289 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1290 1291 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1292 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1293 1294 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1295 1296 /* left diagonalscale the off-diagonal part */ 1297 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1298 1299 /* scale the diagonal part */ 1300 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1301 1302 /* right diagonalscale the off-diagonal part */ 1303 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1304 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1310 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1311 { 1312 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1313 PetscErrorCode ierr; 1314 1315 PetscFunctionBegin; 1316 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatEqual_MPISBAIJ" 1324 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1325 { 1326 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1327 Mat a,b,c,d; 1328 PetscTruth flg; 1329 PetscErrorCode ierr; 1330 1331 PetscFunctionBegin; 1332 a = matA->A; b = matA->B; 1333 c = matB->A; d = matB->B; 1334 1335 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1336 if (flg) { 1337 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1338 } 1339 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatCopy_MPISBAIJ" 1345 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1346 { 1347 PetscErrorCode ierr; 1348 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1349 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1350 1351 PetscFunctionBegin; 1352 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1353 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1354 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1355 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1356 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1357 } else { 1358 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1359 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1360 } 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1366 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1367 { 1368 PetscErrorCode ierr; 1369 1370 PetscFunctionBegin; 1371 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1377 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1378 { 1379 PetscErrorCode ierr; 1380 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1381 PetscBLASInt bnz,one=1; 1382 Mat_SeqSBAIJ *xa,*ya; 1383 Mat_SeqBAIJ *xb,*yb; 1384 1385 PetscFunctionBegin; 1386 if (str == SAME_NONZERO_PATTERN) { 1387 PetscScalar alpha = a; 1388 xa = (Mat_SeqSBAIJ *)xx->A->data; 1389 ya = (Mat_SeqSBAIJ *)yy->A->data; 1390 bnz = PetscBLASIntCast(xa->nz); 1391 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1392 xb = (Mat_SeqBAIJ *)xx->B->data; 1393 yb = (Mat_SeqBAIJ *)yy->B->data; 1394 bnz = PetscBLASIntCast(xb->nz); 1395 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1396 } else { 1397 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1398 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1399 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1406 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1407 { 1408 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1409 PetscInt rbs,cbs; 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1414 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1415 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1416 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1417 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1418 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1424 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1425 { 1426 PetscErrorCode ierr; 1427 PetscInt i; 1428 PetscTruth flg; 1429 1430 PetscFunctionBegin; 1431 for (i=0; i<n; i++) { 1432 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1433 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1434 } 1435 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 1440 /* -------------------------------------------------------------------*/ 1441 static struct _MatOps MatOps_Values = { 1442 MatSetValues_MPISBAIJ, 1443 MatGetRow_MPISBAIJ, 1444 MatRestoreRow_MPISBAIJ, 1445 MatMult_MPISBAIJ, 1446 /* 4*/ MatMultAdd_MPISBAIJ, 1447 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1448 MatMultAdd_MPISBAIJ, 1449 0, 1450 0, 1451 0, 1452 /*10*/ 0, 1453 0, 1454 0, 1455 MatSOR_MPISBAIJ, 1456 MatTranspose_MPISBAIJ, 1457 /*15*/ MatGetInfo_MPISBAIJ, 1458 MatEqual_MPISBAIJ, 1459 MatGetDiagonal_MPISBAIJ, 1460 MatDiagonalScale_MPISBAIJ, 1461 MatNorm_MPISBAIJ, 1462 /*20*/ MatAssemblyBegin_MPISBAIJ, 1463 MatAssemblyEnd_MPISBAIJ, 1464 MatSetOption_MPISBAIJ, 1465 MatZeroEntries_MPISBAIJ, 1466 /*24*/ 0, 1467 0, 1468 0, 1469 0, 1470 0, 1471 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1472 0, 1473 0, 1474 0, 1475 0, 1476 /*34*/ MatDuplicate_MPISBAIJ, 1477 0, 1478 0, 1479 0, 1480 0, 1481 /*39*/ MatAXPY_MPISBAIJ, 1482 MatGetSubMatrices_MPISBAIJ, 1483 MatIncreaseOverlap_MPISBAIJ, 1484 MatGetValues_MPISBAIJ, 1485 MatCopy_MPISBAIJ, 1486 /*44*/ 0, 1487 MatScale_MPISBAIJ, 1488 0, 1489 0, 1490 0, 1491 /*49*/ MatSetBlockSize_MPISBAIJ, 1492 0, 1493 0, 1494 0, 1495 0, 1496 /*54*/ 0, 1497 0, 1498 MatSetUnfactored_MPISBAIJ, 1499 0, 1500 MatSetValuesBlocked_MPISBAIJ, 1501 /*59*/ 0, 1502 0, 1503 0, 1504 0, 1505 0, 1506 /*64*/ 0, 1507 0, 1508 0, 1509 0, 1510 0, 1511 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1512 0, 1513 0, 1514 0, 1515 0, 1516 /*74*/ 0, 1517 0, 1518 0, 1519 0, 1520 0, 1521 /*79*/ 0, 1522 0, 1523 0, 1524 0, 1525 MatLoad_MPISBAIJ, 1526 /*84*/ 0, 1527 0, 1528 0, 1529 0, 1530 0, 1531 /*89*/ 0, 1532 0, 1533 0, 1534 0, 1535 0, 1536 /*94*/ 0, 1537 0, 1538 0, 1539 0, 1540 0, 1541 /*99*/ 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 /*104*/0, 1547 MatRealPart_MPISBAIJ, 1548 MatImaginaryPart_MPISBAIJ, 1549 MatGetRowUpperTriangular_MPISBAIJ, 1550 MatRestoreRowUpperTriangular_MPISBAIJ, 1551 /*109*/0, 1552 0, 1553 0, 1554 0, 1555 0, 1556 /*114*/0, 1557 0, 1558 0, 1559 0, 1560 0, 1561 /*119*/0, 1562 0, 1563 0, 1564 0 1565 }; 1566 1567 1568 EXTERN_C_BEGIN 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1571 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1572 { 1573 PetscFunctionBegin; 1574 *a = ((Mat_MPISBAIJ *)A->data)->A; 1575 *iscopy = PETSC_FALSE; 1576 PetscFunctionReturn(0); 1577 } 1578 EXTERN_C_END 1579 1580 EXTERN_C_BEGIN 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1583 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1584 { 1585 Mat_MPISBAIJ *b; 1586 PetscErrorCode ierr; 1587 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1588 1589 PetscFunctionBegin; 1590 if (bs < 0){ 1591 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1592 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1593 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1594 bs = PetscAbs(bs); 1595 } 1596 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1597 bs = newbs; 1598 1599 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1600 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1601 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1602 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1603 1604 B->rmap->bs = B->cmap->bs = bs; 1605 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1606 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1607 1608 if (d_nnz) { 1609 for (i=0; i<B->rmap->n/bs; i++) { 1610 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1611 } 1612 } 1613 if (o_nnz) { 1614 for (i=0; i<B->rmap->n/bs; i++) { 1615 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1616 } 1617 } 1618 1619 b = (Mat_MPISBAIJ*)B->data; 1620 mbs = B->rmap->n/bs; 1621 Mbs = B->rmap->N/bs; 1622 if (mbs*bs != B->rmap->n) { 1623 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1624 } 1625 1626 B->rmap->bs = bs; 1627 b->bs2 = bs*bs; 1628 b->mbs = mbs; 1629 b->nbs = mbs; 1630 b->Mbs = Mbs; 1631 b->Nbs = Mbs; 1632 1633 for (i=0; i<=b->size; i++) { 1634 b->rangebs[i] = B->rmap->range[i]/bs; 1635 } 1636 b->rstartbs = B->rmap->rstart/bs; 1637 b->rendbs = B->rmap->rend/bs; 1638 1639 b->cstartbs = B->cmap->rstart/bs; 1640 b->cendbs = B->cmap->rend/bs; 1641 1642 if (!B->preallocated) { 1643 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1644 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1645 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1646 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1647 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1648 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1649 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1650 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1651 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1652 } 1653 1654 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1655 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1656 B->preallocated = PETSC_TRUE; 1657 PetscFunctionReturn(0); 1658 } 1659 EXTERN_C_END 1660 1661 EXTERN_C_BEGIN 1662 #undef __FUNCT__ 1663 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1664 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1665 { 1666 PetscInt m,rstart,cstart,cend; 1667 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1668 const PetscInt *JJ=0; 1669 PetscScalar *values=0; 1670 PetscErrorCode ierr; 1671 1672 PetscFunctionBegin; 1673 1674 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1675 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1676 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1677 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1678 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1679 m = B->rmap->n/bs; 1680 rstart = B->rmap->rstart/bs; 1681 cstart = B->cmap->rstart/bs; 1682 cend = B->cmap->rend/bs; 1683 1684 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1685 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 1686 for (i=0; i<m; i++) { 1687 nz = ii[i+1] - ii[i]; 1688 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1689 nz_max = PetscMax(nz_max,nz); 1690 JJ = jj + ii[i]; 1691 for (j=0; j<nz; j++) { 1692 if (*JJ >= cstart) break; 1693 JJ++; 1694 } 1695 d = 0; 1696 for (; j<nz; j++) { 1697 if (*JJ++ >= cend) break; 1698 d++; 1699 } 1700 d_nnz[i] = d; 1701 o_nnz[i] = nz - d; 1702 } 1703 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1704 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1705 1706 values = (PetscScalar*)V; 1707 if (!values) { 1708 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 1709 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1710 } 1711 for (i=0; i<m; i++) { 1712 PetscInt row = i + rstart; 1713 PetscInt ncols = ii[i+1] - ii[i]; 1714 const PetscInt *icols = jj + ii[i]; 1715 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1716 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1717 } 1718 1719 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1720 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 1723 PetscFunctionReturn(0); 1724 } 1725 EXTERN_C_END 1726 1727 EXTERN_C_BEGIN 1728 #if defined(PETSC_HAVE_MUMPS) 1729 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1730 #endif 1731 #if defined(PETSC_HAVE_SPOOLES) 1732 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1733 #endif 1734 #if defined(PETSC_HAVE_PASTIX) 1735 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1736 #endif 1737 EXTERN_C_END 1738 1739 /*MC 1740 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1741 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1742 1743 Options Database Keys: 1744 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1745 1746 Level: beginner 1747 1748 .seealso: MatCreateMPISBAIJ 1749 M*/ 1750 1751 EXTERN_C_BEGIN 1752 #undef __FUNCT__ 1753 #define __FUNCT__ "MatCreate_MPISBAIJ" 1754 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1755 { 1756 Mat_MPISBAIJ *b; 1757 PetscErrorCode ierr; 1758 PetscTruth flg; 1759 1760 PetscFunctionBegin; 1761 1762 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1763 B->data = (void*)b; 1764 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1765 1766 B->ops->destroy = MatDestroy_MPISBAIJ; 1767 B->ops->view = MatView_MPISBAIJ; 1768 B->mapping = 0; 1769 B->assembled = PETSC_FALSE; 1770 1771 B->insertmode = NOT_SET_VALUES; 1772 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1773 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1774 1775 /* build local table of row and column ownerships */ 1776 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1777 1778 /* build cache for off array entries formed */ 1779 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1780 b->donotstash = PETSC_FALSE; 1781 b->colmap = PETSC_NULL; 1782 b->garray = PETSC_NULL; 1783 b->roworiented = PETSC_TRUE; 1784 1785 /* stuff used in block assembly */ 1786 b->barray = 0; 1787 1788 /* stuff used for matrix vector multiply */ 1789 b->lvec = 0; 1790 b->Mvctx = 0; 1791 b->slvec0 = 0; 1792 b->slvec0b = 0; 1793 b->slvec1 = 0; 1794 b->slvec1a = 0; 1795 b->slvec1b = 0; 1796 b->sMvctx = 0; 1797 1798 /* stuff for MatGetRow() */ 1799 b->rowindices = 0; 1800 b->rowvalues = 0; 1801 b->getrowactive = PETSC_FALSE; 1802 1803 /* hash table stuff */ 1804 b->ht = 0; 1805 b->hd = 0; 1806 b->ht_size = 0; 1807 b->ht_flag = PETSC_FALSE; 1808 b->ht_fact = 0; 1809 b->ht_total_ct = 0; 1810 b->ht_insert_ct = 0; 1811 1812 b->in_loc = 0; 1813 b->v_loc = 0; 1814 b->n_loc = 0; 1815 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1816 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1817 if (flg) { 1818 PetscReal fact = 1.39; 1819 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1820 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1821 if (fact <= 1.0) fact = 1.39; 1822 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1823 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1824 } 1825 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1826 1827 #if defined(PETSC_HAVE_PASTIX) 1828 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1829 "MatGetFactor_mpisbaij_pastix", 1830 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1831 #endif 1832 #if defined(PETSC_HAVE_MUMPS) 1833 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1834 "MatGetFactor_sbaij_mumps", 1835 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1836 #endif 1837 #if defined(PETSC_HAVE_SPOOLES) 1838 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1839 "MatGetFactor_mpisbaij_spooles", 1840 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1841 #endif 1842 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1843 "MatStoreValues_MPISBAIJ", 1844 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1845 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1846 "MatRetrieveValues_MPISBAIJ", 1847 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1848 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1849 "MatGetDiagonalBlock_MPISBAIJ", 1850 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1851 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1852 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1853 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1854 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C", 1855 "MatMPISBAIJSetPreallocationCSR_MPISBAIJ", 1856 MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1857 B->symmetric = PETSC_TRUE; 1858 B->structurally_symmetric = PETSC_TRUE; 1859 B->symmetric_set = PETSC_TRUE; 1860 B->structurally_symmetric_set = PETSC_TRUE; 1861 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1862 PetscFunctionReturn(0); 1863 } 1864 EXTERN_C_END 1865 1866 /*MC 1867 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1868 1869 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1870 and MATMPISBAIJ otherwise. 1871 1872 Options Database Keys: 1873 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1874 1875 Level: beginner 1876 1877 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1878 M*/ 1879 1880 EXTERN_C_BEGIN 1881 #undef __FUNCT__ 1882 #define __FUNCT__ "MatCreate_SBAIJ" 1883 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1884 { 1885 PetscErrorCode ierr; 1886 PetscMPIInt size; 1887 1888 PetscFunctionBegin; 1889 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1890 if (size == 1) { 1891 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1892 } else { 1893 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1894 } 1895 PetscFunctionReturn(0); 1896 } 1897 EXTERN_C_END 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1901 /*@C 1902 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1903 the user should preallocate the matrix storage by setting the parameters 1904 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1905 performance can be increased by more than a factor of 50. 1906 1907 Collective on Mat 1908 1909 Input Parameters: 1910 + A - the matrix 1911 . bs - size of blockk 1912 . d_nz - number of block nonzeros per block row in diagonal portion of local 1913 submatrix (same for all local rows) 1914 . d_nnz - array containing the number of block nonzeros in the various block rows 1915 in the upper triangular and diagonal part of the in diagonal portion of the local 1916 (possibly different for each block row) or PETSC_NULL. You must leave room 1917 for the diagonal entry even if it is zero. 1918 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1919 submatrix (same for all local rows). 1920 - o_nnz - array containing the number of nonzeros in the various block rows of the 1921 off-diagonal portion of the local submatrix (possibly different for 1922 each block row) or PETSC_NULL. 1923 1924 1925 Options Database Keys: 1926 . -mat_no_unroll - uses code that does not unroll the loops in the 1927 block calculations (much slower) 1928 . -mat_block_size - size of the blocks to use 1929 1930 Notes: 1931 1932 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1933 than it must be used on all processors that share the object for that argument. 1934 1935 If the *_nnz parameter is given then the *_nz parameter is ignored 1936 1937 Storage Information: 1938 For a square global matrix we define each processor's diagonal portion 1939 to be its local rows and the corresponding columns (a square submatrix); 1940 each processor's off-diagonal portion encompasses the remainder of the 1941 local matrix (a rectangular submatrix). 1942 1943 The user can specify preallocated storage for the diagonal part of 1944 the local submatrix with either d_nz or d_nnz (not both). Set 1945 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1946 memory allocation. Likewise, specify preallocated storage for the 1947 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1948 1949 You can call MatGetInfo() to get information on how effective the preallocation was; 1950 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1951 You can also run with the option -info and look for messages with the string 1952 malloc in them to see if additional memory allocation was needed. 1953 1954 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1955 the figure below we depict these three local rows and all columns (0-11). 1956 1957 .vb 1958 0 1 2 3 4 5 6 7 8 9 10 11 1959 ------------------- 1960 row 3 | o o o d d d o o o o o o 1961 row 4 | o o o d d d o o o o o o 1962 row 5 | o o o d d d o o o o o o 1963 ------------------- 1964 .ve 1965 1966 Thus, any entries in the d locations are stored in the d (diagonal) 1967 submatrix, and any entries in the o locations are stored in the 1968 o (off-diagonal) submatrix. Note that the d matrix is stored in 1969 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1970 1971 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1972 plus the diagonal part of the d matrix, 1973 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1974 In general, for PDE problems in which most nonzeros are near the diagonal, 1975 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1976 or you will get TERRIBLE performance; see the users' manual chapter on 1977 matrices. 1978 1979 Level: intermediate 1980 1981 .keywords: matrix, block, aij, compressed row, sparse, parallel 1982 1983 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1984 @*/ 1985 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1986 { 1987 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1988 1989 PetscFunctionBegin; 1990 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1991 if (f) { 1992 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1993 } 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatCreateMPISBAIJ" 1999 /*@C 2000 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2001 (block compressed row). For good matrix assembly performance 2002 the user should preallocate the matrix storage by setting the parameters 2003 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2004 performance can be increased by more than a factor of 50. 2005 2006 Collective on MPI_Comm 2007 2008 Input Parameters: 2009 + comm - MPI communicator 2010 . bs - size of blockk 2011 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2012 This value should be the same as the local size used in creating the 2013 y vector for the matrix-vector product y = Ax. 2014 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2015 This value should be the same as the local size used in creating the 2016 x vector for the matrix-vector product y = Ax. 2017 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2018 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2019 . d_nz - number of block nonzeros per block row in diagonal portion of local 2020 submatrix (same for all local rows) 2021 . d_nnz - array containing the number of block nonzeros in the various block rows 2022 in the upper triangular portion of the in diagonal portion of the local 2023 (possibly different for each block block row) or PETSC_NULL. 2024 You must leave room for the diagonal entry even if it is zero. 2025 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2026 submatrix (same for all local rows). 2027 - o_nnz - array containing the number of nonzeros in the various block rows of the 2028 off-diagonal portion of the local submatrix (possibly different for 2029 each block row) or PETSC_NULL. 2030 2031 Output Parameter: 2032 . A - the matrix 2033 2034 Options Database Keys: 2035 . -mat_no_unroll - uses code that does not unroll the loops in the 2036 block calculations (much slower) 2037 . -mat_block_size - size of the blocks to use 2038 . -mat_mpi - use the parallel matrix data structures even on one processor 2039 (defaults to using SeqBAIJ format on one processor) 2040 2041 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2042 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2043 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2044 2045 Notes: 2046 The number of rows and columns must be divisible by blocksize. 2047 This matrix type does not support complex Hermitian operation. 2048 2049 The user MUST specify either the local or global matrix dimensions 2050 (possibly both). 2051 2052 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2053 than it must be used on all processors that share the object for that argument. 2054 2055 If the *_nnz parameter is given then the *_nz parameter is ignored 2056 2057 Storage Information: 2058 For a square global matrix we define each processor's diagonal portion 2059 to be its local rows and the corresponding columns (a square submatrix); 2060 each processor's off-diagonal portion encompasses the remainder of the 2061 local matrix (a rectangular submatrix). 2062 2063 The user can specify preallocated storage for the diagonal part of 2064 the local submatrix with either d_nz or d_nnz (not both). Set 2065 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2066 memory allocation. Likewise, specify preallocated storage for the 2067 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2068 2069 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2070 the figure below we depict these three local rows and all columns (0-11). 2071 2072 .vb 2073 0 1 2 3 4 5 6 7 8 9 10 11 2074 ------------------- 2075 row 3 | o o o d d d o o o o o o 2076 row 4 | o o o d d d o o o o o o 2077 row 5 | o o o d d d o o o o o o 2078 ------------------- 2079 .ve 2080 2081 Thus, any entries in the d locations are stored in the d (diagonal) 2082 submatrix, and any entries in the o locations are stored in the 2083 o (off-diagonal) submatrix. Note that the d matrix is stored in 2084 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2085 2086 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2087 plus the diagonal part of the d matrix, 2088 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2089 In general, for PDE problems in which most nonzeros are near the diagonal, 2090 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2091 or you will get TERRIBLE performance; see the users' manual chapter on 2092 matrices. 2093 2094 Level: intermediate 2095 2096 .keywords: matrix, block, aij, compressed row, sparse, parallel 2097 2098 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2099 @*/ 2100 2101 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2102 { 2103 PetscErrorCode ierr; 2104 PetscMPIInt size; 2105 2106 PetscFunctionBegin; 2107 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2108 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2109 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2110 if (size > 1) { 2111 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2112 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2113 } else { 2114 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2115 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2116 } 2117 PetscFunctionReturn(0); 2118 } 2119 2120 2121 #undef __FUNCT__ 2122 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2123 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2124 { 2125 Mat mat; 2126 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2127 PetscErrorCode ierr; 2128 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2129 PetscScalar *array; 2130 2131 PetscFunctionBegin; 2132 *newmat = 0; 2133 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2134 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2135 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2136 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2137 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2138 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2139 2140 mat->factortype = matin->factortype; 2141 mat->preallocated = PETSC_TRUE; 2142 mat->assembled = PETSC_TRUE; 2143 mat->insertmode = NOT_SET_VALUES; 2144 2145 a = (Mat_MPISBAIJ*)mat->data; 2146 a->bs2 = oldmat->bs2; 2147 a->mbs = oldmat->mbs; 2148 a->nbs = oldmat->nbs; 2149 a->Mbs = oldmat->Mbs; 2150 a->Nbs = oldmat->Nbs; 2151 2152 2153 a->size = oldmat->size; 2154 a->rank = oldmat->rank; 2155 a->donotstash = oldmat->donotstash; 2156 a->roworiented = oldmat->roworiented; 2157 a->rowindices = 0; 2158 a->rowvalues = 0; 2159 a->getrowactive = PETSC_FALSE; 2160 a->barray = 0; 2161 a->rstartbs = oldmat->rstartbs; 2162 a->rendbs = oldmat->rendbs; 2163 a->cstartbs = oldmat->cstartbs; 2164 a->cendbs = oldmat->cendbs; 2165 2166 /* hash table stuff */ 2167 a->ht = 0; 2168 a->hd = 0; 2169 a->ht_size = 0; 2170 a->ht_flag = oldmat->ht_flag; 2171 a->ht_fact = oldmat->ht_fact; 2172 a->ht_total_ct = 0; 2173 a->ht_insert_ct = 0; 2174 2175 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2176 if (oldmat->colmap) { 2177 #if defined (PETSC_USE_CTABLE) 2178 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2179 #else 2180 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2181 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2182 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2183 #endif 2184 } else a->colmap = 0; 2185 2186 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2187 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2188 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2189 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2190 } else a->garray = 0; 2191 2192 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2193 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2194 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2195 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2196 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2197 2198 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2199 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2200 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2201 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2202 2203 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2204 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2205 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2206 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2207 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2208 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2209 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2210 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2211 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2212 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2213 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2214 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2215 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2216 2217 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2218 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2219 a->sMvctx = oldmat->sMvctx; 2220 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2221 2222 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2223 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2224 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2225 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2226 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2227 *newmat = mat; 2228 PetscFunctionReturn(0); 2229 } 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatLoad_MPISBAIJ" 2233 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2234 { 2235 PetscErrorCode ierr; 2236 PetscInt i,nz,j,rstart,rend; 2237 PetscScalar *vals,*buf; 2238 MPI_Comm comm = ((PetscObject)viewer)->comm; 2239 MPI_Status status; 2240 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2241 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2242 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2243 PetscInt bs=1,Mbs,mbs,extra_rows; 2244 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2245 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2246 int fd; 2247 2248 PetscFunctionBegin; 2249 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2250 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2251 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2252 2253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2254 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2255 if (!rank) { 2256 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2257 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2258 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2259 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2260 } 2261 2262 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2263 2264 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2265 M = header[1]; N = header[2]; 2266 2267 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2268 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2269 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2270 2271 /* If global sizes are set, check if they are consistent with that given in the file */ 2272 if (sizesset) { 2273 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2274 } 2275 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2276 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2277 2278 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2279 2280 /* 2281 This code adds extra rows to make sure the number of rows is 2282 divisible by the blocksize 2283 */ 2284 Mbs = M/bs; 2285 extra_rows = bs - M + bs*(Mbs); 2286 if (extra_rows == bs) extra_rows = 0; 2287 else Mbs++; 2288 if (extra_rows &&!rank) { 2289 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2290 } 2291 2292 /* determine ownership of all rows */ 2293 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2294 mbs = Mbs/size + ((Mbs % size) > rank); 2295 m = mbs*bs; 2296 } else { /* User Set */ 2297 m = newmat->rmap->n; 2298 mbs = m/bs; 2299 } 2300 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2301 mmbs = PetscMPIIntCast(mbs); 2302 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2303 rowners[0] = 0; 2304 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2305 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2306 rstart = rowners[rank]; 2307 rend = rowners[rank+1]; 2308 2309 /* distribute row lengths to all processors */ 2310 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2311 if (!rank) { 2312 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2313 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2314 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2315 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2316 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2317 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2318 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2319 } else { 2320 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2321 } 2322 2323 if (!rank) { /* procs[0] */ 2324 /* calculate the number of nonzeros on each processor */ 2325 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2326 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2327 for (i=0; i<size; i++) { 2328 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2329 procsnz[i] += rowlengths[j]; 2330 } 2331 } 2332 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2333 2334 /* determine max buffer needed and allocate it */ 2335 maxnz = 0; 2336 for (i=0; i<size; i++) { 2337 maxnz = PetscMax(maxnz,procsnz[i]); 2338 } 2339 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2340 2341 /* read in my part of the matrix column indices */ 2342 nz = procsnz[0]; 2343 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2344 mycols = ibuf; 2345 if (size == 1) nz -= extra_rows; 2346 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2347 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2348 2349 /* read in every ones (except the last) and ship off */ 2350 for (i=1; i<size-1; i++) { 2351 nz = procsnz[i]; 2352 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2353 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2354 } 2355 /* read in the stuff for the last proc */ 2356 if (size != 1) { 2357 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2358 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2359 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2360 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2361 } 2362 ierr = PetscFree(cols);CHKERRQ(ierr); 2363 } else { /* procs[i], i>0 */ 2364 /* determine buffer space needed for message */ 2365 nz = 0; 2366 for (i=0; i<m; i++) { 2367 nz += locrowlens[i]; 2368 } 2369 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2370 mycols = ibuf; 2371 /* receive message of column indices*/ 2372 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2373 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2374 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2375 } 2376 2377 /* loop over local rows, determining number of off diagonal entries */ 2378 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2379 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2380 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2381 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2382 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2383 rowcount = 0; 2384 nzcount = 0; 2385 for (i=0; i<mbs; i++) { 2386 dcount = 0; 2387 odcount = 0; 2388 for (j=0; j<bs; j++) { 2389 kmax = locrowlens[rowcount]; 2390 for (k=0; k<kmax; k++) { 2391 tmp = mycols[nzcount++]/bs; /* block col. index */ 2392 if (!mask[tmp]) { 2393 mask[tmp] = 1; 2394 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2395 else masked1[dcount++] = tmp; /* entry in diag portion */ 2396 } 2397 } 2398 rowcount++; 2399 } 2400 2401 dlens[i] = dcount; /* d_nzz[i] */ 2402 odlens[i] = odcount; /* o_nzz[i] */ 2403 2404 /* zero out the mask elements we set */ 2405 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2406 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2407 } 2408 if (!sizesset) { 2409 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2410 } 2411 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2412 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2413 2414 if (!rank) { 2415 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2416 /* read in my part of the matrix numerical values */ 2417 nz = procsnz[0]; 2418 vals = buf; 2419 mycols = ibuf; 2420 if (size == 1) nz -= extra_rows; 2421 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2422 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2423 2424 /* insert into matrix */ 2425 jj = rstart*bs; 2426 for (i=0; i<m; i++) { 2427 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2428 mycols += locrowlens[i]; 2429 vals += locrowlens[i]; 2430 jj++; 2431 } 2432 2433 /* read in other processors (except the last one) and ship out */ 2434 for (i=1; i<size-1; i++) { 2435 nz = procsnz[i]; 2436 vals = buf; 2437 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2438 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2439 } 2440 /* the last proc */ 2441 if (size != 1){ 2442 nz = procsnz[i] - extra_rows; 2443 vals = buf; 2444 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2445 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2446 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2447 } 2448 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2449 2450 } else { 2451 /* receive numeric values */ 2452 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2453 2454 /* receive message of values*/ 2455 vals = buf; 2456 mycols = ibuf; 2457 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2458 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2459 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2460 2461 /* insert into matrix */ 2462 jj = rstart*bs; 2463 for (i=0; i<m; i++) { 2464 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2465 mycols += locrowlens[i]; 2466 vals += locrowlens[i]; 2467 jj++; 2468 } 2469 } 2470 2471 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2472 ierr = PetscFree(buf);CHKERRQ(ierr); 2473 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2474 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2475 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2476 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2477 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2478 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2479 PetscFunctionReturn(0); 2480 } 2481 2482 #undef __FUNCT__ 2483 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2484 /*XXXXX@ 2485 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2486 2487 Input Parameters: 2488 . mat - the matrix 2489 . fact - factor 2490 2491 Not Collective on Mat, each process can have a different hash factor 2492 2493 Level: advanced 2494 2495 Notes: 2496 This can also be set by the command line option: -mat_use_hash_table fact 2497 2498 .keywords: matrix, hashtable, factor, HT 2499 2500 .seealso: MatSetOption() 2501 @XXXXX*/ 2502 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2506 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2507 { 2508 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2509 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2510 PetscReal atmp; 2511 PetscReal *work,*svalues,*rvalues; 2512 PetscErrorCode ierr; 2513 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2514 PetscMPIInt rank,size; 2515 PetscInt *rowners_bs,dest,count,source; 2516 PetscScalar *va; 2517 MatScalar *ba; 2518 MPI_Status stat; 2519 2520 PetscFunctionBegin; 2521 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2522 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2523 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2524 2525 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2526 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2527 2528 bs = A->rmap->bs; 2529 mbs = a->mbs; 2530 Mbs = a->Mbs; 2531 ba = b->a; 2532 bi = b->i; 2533 bj = b->j; 2534 2535 /* find ownerships */ 2536 rowners_bs = A->rmap->range; 2537 2538 /* each proc creates an array to be distributed */ 2539 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2540 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2541 2542 /* row_max for B */ 2543 if (rank != size-1){ 2544 for (i=0; i<mbs; i++) { 2545 ncols = bi[1] - bi[0]; bi++; 2546 brow = bs*i; 2547 for (j=0; j<ncols; j++){ 2548 bcol = bs*(*bj); 2549 for (kcol=0; kcol<bs; kcol++){ 2550 col = bcol + kcol; /* local col index */ 2551 col += rowners_bs[rank+1]; /* global col index */ 2552 for (krow=0; krow<bs; krow++){ 2553 atmp = PetscAbsScalar(*ba); ba++; 2554 row = brow + krow; /* local row index */ 2555 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2556 if (work[col] < atmp) work[col] = atmp; 2557 } 2558 } 2559 bj++; 2560 } 2561 } 2562 2563 /* send values to its owners */ 2564 for (dest=rank+1; dest<size; dest++){ 2565 svalues = work + rowners_bs[dest]; 2566 count = rowners_bs[dest+1]-rowners_bs[dest]; 2567 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2568 } 2569 } 2570 2571 /* receive values */ 2572 if (rank){ 2573 rvalues = work; 2574 count = rowners_bs[rank+1]-rowners_bs[rank]; 2575 for (source=0; source<rank; source++){ 2576 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2577 /* process values */ 2578 for (i=0; i<count; i++){ 2579 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2580 } 2581 } 2582 } 2583 2584 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2585 ierr = PetscFree(work);CHKERRQ(ierr); 2586 PetscFunctionReturn(0); 2587 } 2588 2589 #undef __FUNCT__ 2590 #define __FUNCT__ "MatSOR_MPISBAIJ" 2591 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2592 { 2593 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2594 PetscErrorCode ierr; 2595 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2596 PetscScalar *x,*ptr,*from; 2597 Vec bb1; 2598 const PetscScalar *b; 2599 2600 PetscFunctionBegin; 2601 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2602 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2603 2604 if (flag == SOR_APPLY_UPPER) { 2605 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2606 PetscFunctionReturn(0); 2607 } 2608 2609 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2610 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2611 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2612 its--; 2613 } 2614 2615 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2616 while (its--){ 2617 2618 /* lower triangular part: slvec0b = - B^T*xx */ 2619 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2620 2621 /* copy xx into slvec0a */ 2622 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2623 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2624 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2625 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2626 2627 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2628 2629 /* copy bb into slvec1a */ 2630 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2631 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2632 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2633 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2634 2635 /* set slvec1b = 0 */ 2636 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2637 2638 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2639 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2640 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2641 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2642 2643 /* upper triangular part: bb1 = bb1 - B*x */ 2644 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2645 2646 /* local diagonal sweep */ 2647 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2648 } 2649 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2650 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2651 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2652 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2653 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2654 } else if (flag & SOR_EISENSTAT) { 2655 Vec xx1; 2656 PetscTruth hasop; 2657 const PetscScalar *diag; 2658 PetscScalar *sl,scale = (omega - 2.0)/omega; 2659 PetscInt i,n; 2660 2661 if (!mat->xx1) { 2662 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2663 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2664 } 2665 xx1 = mat->xx1; 2666 bb1 = mat->bb1; 2667 2668 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2669 2670 if (!mat->diag) { 2671 /* this is wrong for same matrix with new nonzero values */ 2672 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2673 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2674 } 2675 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2676 2677 if (hasop) { 2678 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2679 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2680 } else { 2681 /* 2682 These two lines are replaced by code that may be a bit faster for a good compiler 2683 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2684 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2685 */ 2686 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2687 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2688 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2689 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2690 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2691 if (omega == 1.0) { 2692 for (i=0; i<n; i++) { 2693 sl[i] = b[i] - diag[i]*x[i]; 2694 } 2695 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2696 } else { 2697 for (i=0; i<n; i++) { 2698 sl[i] = b[i] + scale*diag[i]*x[i]; 2699 } 2700 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2701 } 2702 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2703 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2704 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2705 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2706 } 2707 2708 /* multiply off-diagonal portion of matrix */ 2709 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2710 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2711 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2712 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2713 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2714 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2715 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2716 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2717 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2718 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2719 2720 /* local sweep */ 2721 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2722 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2723 } else { 2724 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2725 } 2726 PetscFunctionReturn(0); 2727 } 2728 2729 #undef __FUNCT__ 2730 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2731 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2732 { 2733 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2734 PetscErrorCode ierr; 2735 Vec lvec1,bb1; 2736 2737 PetscFunctionBegin; 2738 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2739 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2740 2741 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2742 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2743 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2744 its--; 2745 } 2746 2747 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2748 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2749 while (its--){ 2750 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2751 2752 /* lower diagonal part: bb1 = bb - B^T*xx */ 2753 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2754 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2755 2756 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2757 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2758 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2759 2760 /* upper diagonal part: bb1 = bb1 - B*x */ 2761 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2762 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2763 2764 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2765 2766 /* diagonal sweep */ 2767 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2768 } 2769 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2770 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2771 } else { 2772 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2773 } 2774 PetscFunctionReturn(0); 2775 } 2776 2777 #undef __FUNCT__ 2778 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2779 /*@ 2780 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2781 CSR format the local rows. 2782 2783 Collective on MPI_Comm 2784 2785 Input Parameters: 2786 + comm - MPI communicator 2787 . bs - the block size, only a block size of 1 is supported 2788 . m - number of local rows (Cannot be PETSC_DECIDE) 2789 . n - This value should be the same as the local size used in creating the 2790 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2791 calculated if N is given) For square matrices n is almost always m. 2792 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2793 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2794 . i - row indices 2795 . j - column indices 2796 - a - matrix values 2797 2798 Output Parameter: 2799 . mat - the matrix 2800 2801 Level: intermediate 2802 2803 Notes: 2804 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2805 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2806 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2807 2808 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2809 2810 .keywords: matrix, aij, compressed row, sparse, parallel 2811 2812 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2813 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 2814 @*/ 2815 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2816 { 2817 PetscErrorCode ierr; 2818 2819 2820 PetscFunctionBegin; 2821 if (i[0]) { 2822 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2823 } 2824 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2825 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2826 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2827 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2828 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2829 PetscFunctionReturn(0); 2830 } 2831 2832 2833 #undef __FUNCT__ 2834 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2835 /*@C 2836 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2837 (the default parallel PETSc format). 2838 2839 Collective on MPI_Comm 2840 2841 Input Parameters: 2842 + A - the matrix 2843 . bs - the block size 2844 . i - the indices into j for the start of each local row (starts with zero) 2845 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2846 - v - optional values in the matrix 2847 2848 Level: developer 2849 2850 .keywords: matrix, aij, compressed row, sparse, parallel 2851 2852 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2853 @*/ 2854 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2855 { 2856 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2857 2858 PetscFunctionBegin; 2859 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2860 if (f) { 2861 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2862 } 2863 PetscFunctionReturn(0); 2864 } 2865 2866 2867