1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h" 5 #include "../src/mat/impls/sbaij/seq/sbaij.h" 6 #include "petscblaslapack.h" 7 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 9 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern PetscErrorCode DisAssemble_MPISBAIJ(Mat); 11 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 12 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 14 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 17 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 19 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 20 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec); 21 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 22 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 EXTERN_C_BEGIN 25 #undef __FUNCT__ 26 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 27 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 28 { 29 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 30 PetscErrorCode ierr; 31 32 PetscFunctionBegin; 33 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 34 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 EXTERN_C_BEGIN 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 42 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 43 { 44 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 45 PetscErrorCode ierr; 46 47 PetscFunctionBegin; 48 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 49 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 EXTERN_C_END 53 54 55 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 56 { \ 57 \ 58 brow = row/bs; \ 59 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 60 rmax = aimax[brow]; nrow = ailen[brow]; \ 61 bcol = col/bs; \ 62 ridx = row % bs; cidx = col % bs; \ 63 low = 0; high = nrow; \ 64 while (high-low > 3) { \ 65 t = (low+high)/2; \ 66 if (rp[t] > bcol) high = t; \ 67 else low = t; \ 68 } \ 69 for (_i=low; _i<high; _i++) { \ 70 if (rp[_i] > bcol) break; \ 71 if (rp[_i] == bcol) { \ 72 bap = ap + bs2*_i + bs*cidx + ridx; \ 73 if (addv == ADD_VALUES) *bap += value; \ 74 else *bap = value; \ 75 goto a_noinsert; \ 76 } \ 77 } \ 78 if (a->nonew == 1) goto a_noinsert; \ 79 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 80 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 81 N = nrow++ - 1; \ 82 /* shift up all the later entries in this row */ \ 83 for (ii=N; ii>=_i; ii--) { \ 84 rp[ii+1] = rp[ii]; \ 85 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 86 } \ 87 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 88 rp[_i] = bcol; \ 89 ap[bs2*_i + bs*cidx + ridx] = value; \ 90 a_noinsert:; \ 91 ailen[brow] = nrow; \ 92 } 93 94 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 95 { \ 96 brow = row/bs; \ 97 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 98 rmax = bimax[brow]; nrow = bilen[brow]; \ 99 bcol = col/bs; \ 100 ridx = row % bs; cidx = col % bs; \ 101 low = 0; high = nrow; \ 102 while (high-low > 3) { \ 103 t = (low+high)/2; \ 104 if (rp[t] > bcol) high = t; \ 105 else low = t; \ 106 } \ 107 for (_i=low; _i<high; _i++) { \ 108 if (rp[_i] > bcol) break; \ 109 if (rp[_i] == bcol) { \ 110 bap = ap + bs2*_i + bs*cidx + ridx; \ 111 if (addv == ADD_VALUES) *bap += value; \ 112 else *bap = value; \ 113 goto b_noinsert; \ 114 } \ 115 } \ 116 if (b->nonew == 1) goto b_noinsert; \ 117 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 118 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 119 N = nrow++ - 1; \ 120 /* shift up all the later entries in this row */ \ 121 for (ii=N; ii>=_i; ii--) { \ 122 rp[ii+1] = rp[ii]; \ 123 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 124 } \ 125 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 126 rp[_i] = bcol; \ 127 ap[bs2*_i + bs*cidx + ridx] = value; \ 128 b_noinsert:; \ 129 bilen[brow] = nrow; \ 130 } 131 132 /* Only add/insert a(i,j) with i<=j (blocks). 133 Any a(i,j) with i>j input by user is ingored. 134 */ 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatSetValues_MPISBAIJ" 137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 138 { 139 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 140 MatScalar value; 141 PetscBool roworiented = baij->roworiented; 142 PetscErrorCode ierr; 143 PetscInt i,j,row,col; 144 PetscInt rstart_orig=mat->rmap->rstart; 145 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 146 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 147 148 /* Some Variables required in the macro */ 149 Mat A = baij->A; 150 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 151 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 152 MatScalar *aa=a->a; 153 154 Mat B = baij->B; 155 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 156 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 157 MatScalar *ba=b->a; 158 159 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 160 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 161 MatScalar *ap,*bap; 162 163 /* for stash */ 164 PetscInt n_loc, *in_loc = PETSC_NULL; 165 MatScalar *v_loc = PETSC_NULL; 166 167 PetscFunctionBegin; 168 if (v) PetscValidScalarPointer(v,6); 169 if (!baij->donotstash){ 170 if (n > baij->n_loc) { 171 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 172 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 174 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 175 baij->n_loc = n; 176 } 177 in_loc = baij->in_loc; 178 v_loc = baij->v_loc; 179 } 180 181 for (i=0; i<m; i++) { 182 if (im[i] < 0) continue; 183 #if defined(PETSC_USE_DEBUG) 184 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 185 #endif 186 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 187 row = im[i] - rstart_orig; /* local row index */ 188 for (j=0; j<n; j++) { 189 if (im[i]/bs > in[j]/bs){ 190 if (a->ignore_ltriangular){ 191 continue; /* ignore lower triangular blocks */ 192 } else { 193 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 194 } 195 } 196 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 197 col = in[j] - cstart_orig; /* local col index */ 198 brow = row/bs; bcol = col/bs; 199 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 200 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 201 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 202 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 203 } else if (in[j] < 0) continue; 204 #if defined(PETSC_USE_DEBUG) 205 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 206 #endif 207 else { /* off-diag entry (B) */ 208 if (mat->was_assembled) { 209 if (!baij->colmap) { 210 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 211 } 212 #if defined (PETSC_USE_CTABLE) 213 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 214 col = col - 1; 215 #else 216 col = baij->colmap[in[j]/bs] - 1; 217 #endif 218 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 219 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 220 col = in[j]; 221 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 222 B = baij->B; 223 b = (Mat_SeqBAIJ*)(B)->data; 224 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 225 ba=b->a; 226 } else col += in[j]%bs; 227 } else col = in[j]; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } 232 } 233 } else { /* off processor entry */ 234 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 235 if (!baij->donotstash) { 236 n_loc = 0; 237 for (j=0; j<n; j++){ 238 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 239 in_loc[n_loc] = in[j]; 240 if (roworiented) { 241 v_loc[n_loc] = v[i*n+j]; 242 } else { 243 v_loc[n_loc] = v[j*m+i]; 244 } 245 n_loc++; 246 } 247 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 248 } 249 } 250 } 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 257 { 258 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 259 const MatScalar *value; 260 MatScalar *barray=baij->barray; 261 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 262 PetscErrorCode ierr; 263 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 264 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 265 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 266 267 PetscFunctionBegin; 268 if(!barray) { 269 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 270 baij->barray = barray; 271 } 272 273 if (roworiented) { 274 stepval = (n-1)*bs; 275 } else { 276 stepval = (m-1)*bs; 277 } 278 for (i=0; i<m; i++) { 279 if (im[i] < 0) continue; 280 #if defined(PETSC_USE_DEBUG) 281 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 282 #endif 283 if (im[i] >= rstart && im[i] < rend) { 284 row = im[i] - rstart; 285 for (j=0; j<n; j++) { 286 if (im[i] > in[j]) { 287 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 288 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 289 } 290 /* If NumCol = 1 then a copy is not required */ 291 if ((roworiented) && (n == 1)) { 292 barray = (MatScalar*) v + i*bs2; 293 } else if((!roworiented) && (m == 1)) { 294 barray = (MatScalar*) v + j*bs2; 295 } else { /* Here a copy is required */ 296 if (roworiented) { 297 value = v + i*(stepval+bs)*bs + j*bs; 298 } else { 299 value = v + j*(stepval+bs)*bs + i*bs; 300 } 301 for (ii=0; ii<bs; ii++,value+=stepval) { 302 for (jj=0; jj<bs; jj++) { 303 *barray++ = *value++; 304 } 305 } 306 barray -=bs2; 307 } 308 309 if (in[j] >= cstart && in[j] < cend){ 310 col = in[j] - cstart; 311 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 312 } 313 else if (in[j] < 0) continue; 314 #if defined(PETSC_USE_DEBUG) 315 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 316 #endif 317 else { 318 if (mat->was_assembled) { 319 if (!baij->colmap) { 320 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 321 } 322 323 #if defined(PETSC_USE_DEBUG) 324 #if defined (PETSC_USE_CTABLE) 325 { PetscInt data; 326 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 327 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 328 } 329 #else 330 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 331 #endif 332 #endif 333 #if defined (PETSC_USE_CTABLE) 334 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 335 col = (col - 1)/bs; 336 #else 337 col = (baij->colmap[in[j]] - 1)/bs; 338 #endif 339 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 340 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 341 col = in[j]; 342 } 343 } 344 else col = in[j]; 345 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 346 } 347 } 348 } else { 349 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 350 if (!baij->donotstash) { 351 if (roworiented) { 352 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 353 } else { 354 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 355 } 356 } 357 } 358 } 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNCT__ 363 #define __FUNCT__ "MatGetValues_MPISBAIJ" 364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 365 { 366 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 367 PetscErrorCode ierr; 368 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 369 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 370 371 PetscFunctionBegin; 372 for (i=0; i<m; i++) { 373 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 374 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 375 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 376 row = idxm[i] - bsrstart; 377 for (j=0; j<n; j++) { 378 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 379 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 380 if (idxn[j] >= bscstart && idxn[j] < bscend){ 381 col = idxn[j] - bscstart; 382 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 383 } else { 384 if (!baij->colmap) { 385 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 386 } 387 #if defined (PETSC_USE_CTABLE) 388 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 389 data --; 390 #else 391 data = baij->colmap[idxn[j]/bs]-1; 392 #endif 393 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 394 else { 395 col = data + idxn[j]%bs; 396 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 397 } 398 } 399 } 400 } else { 401 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 402 } 403 } 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatNorm_MPISBAIJ" 409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 410 { 411 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscReal sum[2],*lnorm2; 414 415 PetscFunctionBegin; 416 if (baij->size == 1) { 417 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 418 } else { 419 if (type == NORM_FROBENIUS) { 420 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 421 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 422 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 423 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 424 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 425 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 426 *norm = sqrt(sum[0] + 2*sum[1]); 427 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 428 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 429 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 430 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 431 PetscReal *rsum,*rsum2,vabs; 432 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 433 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 434 MatScalar *v; 435 436 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 437 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 438 /* Amat */ 439 v = amat->a; jj = amat->j; 440 for (brow=0; brow<mbs; brow++) { 441 grow = bs*(rstart + brow); 442 nz = amat->i[brow+1] - amat->i[brow]; 443 for (bcol=0; bcol<nz; bcol++){ 444 gcol = bs*(rstart + *jj); jj++; 445 for (col=0; col<bs; col++){ 446 for (row=0; row<bs; row++){ 447 vabs = PetscAbsScalar(*v); v++; 448 rsum[gcol+col] += vabs; 449 /* non-diagonal block */ 450 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 451 } 452 } 453 } 454 } 455 /* Bmat */ 456 v = bmat->a; jj = bmat->j; 457 for (brow=0; brow<mbs; brow++) { 458 grow = bs*(rstart + brow); 459 nz = bmat->i[brow+1] - bmat->i[brow]; 460 for (bcol=0; bcol<nz; bcol++){ 461 gcol = bs*garray[*jj]; jj++; 462 for (col=0; col<bs; col++){ 463 for (row=0; row<bs; row++){ 464 vabs = PetscAbsScalar(*v); v++; 465 rsum[gcol+col] += vabs; 466 rsum[grow+row] += vabs; 467 } 468 } 469 } 470 } 471 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 472 *norm = 0.0; 473 for (col=0; col<mat->cmap->N; col++) { 474 if (rsum2[col] > *norm) *norm = rsum2[col]; 475 } 476 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 477 } else { 478 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 479 } 480 } 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 487 { 488 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 489 PetscErrorCode ierr; 490 PetscInt nstash,reallocs; 491 InsertMode addv; 492 493 PetscFunctionBegin; 494 if (baij->donotstash || mat->nooffprocentries) { 495 PetscFunctionReturn(0); 496 } 497 498 /* make sure all processors are either in INSERTMODE or ADDMODE */ 499 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 500 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 501 mat->insertmode = addv; /* in case this processor had no cache */ 502 503 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 504 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 505 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 506 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 507 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 508 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 514 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 515 { 516 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 517 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 518 PetscErrorCode ierr; 519 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 520 PetscInt *row,*col; 521 PetscBool other_disassembled; 522 PetscMPIInt n; 523 PetscBool r1,r2,r3; 524 MatScalar *val; 525 InsertMode addv = mat->insertmode; 526 527 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 528 PetscFunctionBegin; 529 530 if (!baij->donotstash && !mat->nooffprocentries) { 531 while (1) { 532 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 533 if (!flg) break; 534 535 for (i=0; i<n;) { 536 /* Now identify the consecutive vals belonging to the same row */ 537 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 538 if (j < n) ncols = j-i; 539 else ncols = n-i; 540 /* Now assemble all these values with a single function call */ 541 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 542 i = j; 543 } 544 } 545 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 546 /* Now process the block-stash. Since the values are stashed column-oriented, 547 set the roworiented flag to column oriented, and after MatSetValues() 548 restore the original flags */ 549 r1 = baij->roworiented; 550 r2 = a->roworiented; 551 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 552 baij->roworiented = PETSC_FALSE; 553 a->roworiented = PETSC_FALSE; 554 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 555 while (1) { 556 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 557 if (!flg) break; 558 559 for (i=0; i<n;) { 560 /* Now identify the consecutive vals belonging to the same row */ 561 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 562 if (j < n) ncols = j-i; 563 else ncols = n-i; 564 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 565 i = j; 566 } 567 } 568 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 569 baij->roworiented = r1; 570 a->roworiented = r2; 571 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 572 } 573 574 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 575 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 576 577 /* determine if any processor has disassembled, if so we must 578 also disassemble ourselfs, in order that we may reassemble. */ 579 /* 580 if nonzero structure of submatrix B cannot change then we know that 581 no processor disassembled thus we can skip this stuff 582 */ 583 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 584 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 585 if (mat->was_assembled && !other_disassembled) { 586 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 587 } 588 } 589 590 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 591 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 592 } 593 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr); 594 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 595 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 596 597 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 598 baij->rowvalues = 0; 599 600 PetscFunctionReturn(0); 601 } 602 603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 606 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 607 { 608 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 609 PetscErrorCode ierr; 610 PetscInt bs = mat->rmap->bs; 611 PetscMPIInt size = baij->size,rank = baij->rank; 612 PetscBool iascii,isdraw; 613 PetscViewer sviewer; 614 PetscViewerFormat format; 615 616 PetscFunctionBegin; 617 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 618 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 619 if (iascii) { 620 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 621 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 622 MatInfo info; 623 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 624 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 625 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 626 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 627 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 628 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 629 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 630 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 631 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 632 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 633 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 634 PetscFunctionReturn(0); 635 } else if (format == PETSC_VIEWER_ASCII_INFO) { 636 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 639 PetscFunctionReturn(0); 640 } 641 } 642 643 if (isdraw) { 644 PetscDraw draw; 645 PetscBool isnull; 646 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 647 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 648 } 649 650 if (size == 1) { 651 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 652 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 653 } else { 654 /* assemble the entire matrix onto first processor. */ 655 Mat A; 656 Mat_SeqSBAIJ *Aloc; 657 Mat_SeqBAIJ *Bloc; 658 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 659 MatScalar *a; 660 661 /* Should this be the same type as mat? */ 662 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 663 if (!rank) { 664 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 665 } else { 666 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 667 } 668 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 669 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 670 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 671 672 /* copy over the A part */ 673 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 674 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 675 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 676 677 for (i=0; i<mbs; i++) { 678 rvals[0] = bs*(baij->rstartbs + i); 679 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 680 for (j=ai[i]; j<ai[i+1]; j++) { 681 col = (baij->cstartbs+aj[j])*bs; 682 for (k=0; k<bs; k++) { 683 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 684 col++; a += bs; 685 } 686 } 687 } 688 /* copy over the B part */ 689 Bloc = (Mat_SeqBAIJ*)baij->B->data; 690 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 691 for (i=0; i<mbs; i++) { 692 693 rvals[0] = bs*(baij->rstartbs + i); 694 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 695 for (j=ai[i]; j<ai[i+1]; j++) { 696 col = baij->garray[aj[j]]*bs; 697 for (k=0; k<bs; k++) { 698 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 699 col++; a += bs; 700 } 701 } 702 } 703 ierr = PetscFree(rvals);CHKERRQ(ierr); 704 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 705 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 706 /* 707 Everyone has to call to draw the matrix since the graphics waits are 708 synchronized across all processors that share the PetscDraw object 709 */ 710 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 711 if (!rank) { 712 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 713 /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/ 714 PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ); 715 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 716 } 717 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 718 ierr = MatDestroy(A);CHKERRQ(ierr); 719 } 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatView_MPISBAIJ" 725 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 726 { 727 PetscErrorCode ierr; 728 PetscBool iascii,isdraw,issocket,isbinary; 729 730 PetscFunctionBegin; 731 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 732 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 733 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 734 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 735 if (iascii || isdraw || issocket || isbinary) { 736 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 737 } else { 738 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatDestroy_MPISBAIJ" 745 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 746 { 747 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 #if defined(PETSC_USE_LOG) 752 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 753 #endif 754 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 755 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 756 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 757 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 758 #if defined (PETSC_USE_CTABLE) 759 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 760 #else 761 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 762 #endif 763 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 764 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 765 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 766 if (baij->slvec0) { 767 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 768 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 769 } 770 if (baij->slvec1) { 771 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 772 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 773 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 774 } 775 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 776 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 777 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 778 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 779 if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);} 780 if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);} 781 if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);} 782 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 783 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 784 #endif 785 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 786 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 787 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 788 ierr = PetscFree(baij);CHKERRQ(ierr); 789 790 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 792 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 793 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 794 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNCT__ 799 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 800 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 801 { 802 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 803 PetscErrorCode ierr; 804 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 805 PetscScalar *x,*from; 806 807 PetscFunctionBegin; 808 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 809 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 810 811 /* diagonal part */ 812 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 813 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 814 815 /* subdiagonal part */ 816 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 817 818 /* copy x into the vec slvec0 */ 819 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 820 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 821 822 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 823 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 824 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 825 826 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 827 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828 /* supperdiagonal part */ 829 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "MatMult_MPISBAIJ" 835 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 836 { 837 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 838 PetscErrorCode ierr; 839 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 840 PetscScalar *x,*from; 841 842 PetscFunctionBegin; 843 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 844 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 845 846 /* diagonal part */ 847 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 848 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 849 850 /* subdiagonal part */ 851 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 852 853 /* copy x into the vec slvec0 */ 854 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 855 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 856 857 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 858 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 859 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 860 861 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 863 /* supperdiagonal part */ 864 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 870 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 871 { 872 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 873 PetscErrorCode ierr; 874 PetscInt nt; 875 876 PetscFunctionBegin; 877 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 878 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 879 880 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 881 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 882 883 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 884 /* do diagonal part */ 885 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 886 /* do supperdiagonal part */ 887 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 888 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 889 /* do subdiagonal part */ 890 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 891 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 892 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 893 894 PetscFunctionReturn(0); 895 } 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 899 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 900 { 901 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 902 PetscErrorCode ierr; 903 PetscInt mbs=a->mbs,bs=A->rmap->bs; 904 PetscScalar *x,*from,zero=0.0; 905 906 PetscFunctionBegin; 907 /* 908 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 909 PetscSynchronizedFlush(((PetscObject)A)->comm); 910 */ 911 /* diagonal part */ 912 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 913 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 914 915 /* subdiagonal part */ 916 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 917 918 /* copy x into the vec slvec0 */ 919 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 920 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 921 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 922 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 923 924 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 925 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 926 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 927 928 /* supperdiagonal part */ 929 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 930 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 936 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 937 { 938 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 /* do diagonal part */ 944 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 945 /* do supperdiagonal part */ 946 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 948 949 /* do subdiagonal part */ 950 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 951 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 952 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 953 954 PetscFunctionReturn(0); 955 } 956 957 /* 958 This only works correctly for square matrices where the subblock A->A is the 959 diagonal block 960 */ 961 #undef __FUNCT__ 962 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 963 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 964 { 965 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 966 PetscErrorCode ierr; 967 968 PetscFunctionBegin; 969 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 970 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } 973 974 #undef __FUNCT__ 975 #define __FUNCT__ "MatScale_MPISBAIJ" 976 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 977 { 978 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 979 PetscErrorCode ierr; 980 981 PetscFunctionBegin; 982 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 983 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatGetRow_MPISBAIJ" 989 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 990 { 991 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 992 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 993 PetscErrorCode ierr; 994 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 995 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 996 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 997 998 PetscFunctionBegin; 999 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1000 mat->getrowactive = PETSC_TRUE; 1001 1002 if (!mat->rowvalues && (idx || v)) { 1003 /* 1004 allocate enough space to hold information from the longest row. 1005 */ 1006 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1007 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1008 PetscInt max = 1,mbs = mat->mbs,tmp; 1009 for (i=0; i<mbs; i++) { 1010 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1011 if (max < tmp) { max = tmp; } 1012 } 1013 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1014 } 1015 1016 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1017 lrow = row - brstart; /* local row index */ 1018 1019 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1020 if (!v) {pvA = 0; pvB = 0;} 1021 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1022 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1023 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1024 nztot = nzA + nzB; 1025 1026 cmap = mat->garray; 1027 if (v || idx) { 1028 if (nztot) { 1029 /* Sort by increasing column numbers, assuming A and B already sorted */ 1030 PetscInt imark = -1; 1031 if (v) { 1032 *v = v_p = mat->rowvalues; 1033 for (i=0; i<nzB; i++) { 1034 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1035 else break; 1036 } 1037 imark = i; 1038 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1039 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1040 } 1041 if (idx) { 1042 *idx = idx_p = mat->rowindices; 1043 if (imark > -1) { 1044 for (i=0; i<imark; i++) { 1045 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1046 } 1047 } else { 1048 for (i=0; i<nzB; i++) { 1049 if (cmap[cworkB[i]/bs] < cstart) 1050 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1051 else break; 1052 } 1053 imark = i; 1054 } 1055 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1056 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1057 } 1058 } else { 1059 if (idx) *idx = 0; 1060 if (v) *v = 0; 1061 } 1062 } 1063 *nz = nztot; 1064 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1065 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1071 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1072 { 1073 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1074 1075 PetscFunctionBegin; 1076 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1077 baij->getrowactive = PETSC_FALSE; 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1083 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1084 { 1085 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1086 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1087 1088 PetscFunctionBegin; 1089 aA->getrow_utriangular = PETSC_TRUE; 1090 PetscFunctionReturn(0); 1091 } 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1094 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1095 { 1096 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1097 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1098 1099 PetscFunctionBegin; 1100 aA->getrow_utriangular = PETSC_FALSE; 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1106 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1107 { 1108 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1109 PetscErrorCode ierr; 1110 1111 PetscFunctionBegin; 1112 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1113 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1119 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1120 { 1121 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1126 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1132 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1133 { 1134 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1139 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1145 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1146 { 1147 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1148 Mat A = a->A,B = a->B; 1149 PetscErrorCode ierr; 1150 PetscReal isend[5],irecv[5]; 1151 1152 PetscFunctionBegin; 1153 info->block_size = (PetscReal)matin->rmap->bs; 1154 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1155 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1156 isend[3] = info->memory; isend[4] = info->mallocs; 1157 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1158 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1159 isend[3] += info->memory; isend[4] += info->mallocs; 1160 if (flag == MAT_LOCAL) { 1161 info->nz_used = isend[0]; 1162 info->nz_allocated = isend[1]; 1163 info->nz_unneeded = isend[2]; 1164 info->memory = isend[3]; 1165 info->mallocs = isend[4]; 1166 } else if (flag == MAT_GLOBAL_MAX) { 1167 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1168 info->nz_used = irecv[0]; 1169 info->nz_allocated = irecv[1]; 1170 info->nz_unneeded = irecv[2]; 1171 info->memory = irecv[3]; 1172 info->mallocs = irecv[4]; 1173 } else if (flag == MAT_GLOBAL_SUM) { 1174 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1175 info->nz_used = irecv[0]; 1176 info->nz_allocated = irecv[1]; 1177 info->nz_unneeded = irecv[2]; 1178 info->memory = irecv[3]; 1179 info->mallocs = irecv[4]; 1180 } else { 1181 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1182 } 1183 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1184 info->fill_ratio_needed = 0; 1185 info->factor_mallocs = 0; 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1191 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1192 { 1193 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1194 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1195 PetscErrorCode ierr; 1196 1197 PetscFunctionBegin; 1198 switch (op) { 1199 case MAT_NEW_NONZERO_LOCATIONS: 1200 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1201 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1202 case MAT_KEEP_NONZERO_PATTERN: 1203 case MAT_NEW_NONZERO_LOCATION_ERR: 1204 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1205 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1206 break; 1207 case MAT_ROW_ORIENTED: 1208 a->roworiented = flg; 1209 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1210 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1211 break; 1212 case MAT_NEW_DIAGONALS: 1213 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1214 break; 1215 case MAT_IGNORE_OFF_PROC_ENTRIES: 1216 a->donotstash = flg; 1217 break; 1218 case MAT_USE_HASH_TABLE: 1219 a->ht_flag = flg; 1220 break; 1221 case MAT_HERMITIAN: 1222 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1223 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1224 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1225 break; 1226 case MAT_SPD: 1227 A->spd_set = PETSC_TRUE; 1228 A->spd = flg; 1229 if (flg) { 1230 A->symmetric = PETSC_TRUE; 1231 A->structurally_symmetric = PETSC_TRUE; 1232 A->symmetric_set = PETSC_TRUE; 1233 A->structurally_symmetric_set = PETSC_TRUE; 1234 } 1235 break; 1236 case MAT_SYMMETRIC: 1237 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1238 break; 1239 case MAT_STRUCTURALLY_SYMMETRIC: 1240 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1241 break; 1242 case MAT_SYMMETRY_ETERNAL: 1243 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1244 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1245 break; 1246 case MAT_IGNORE_LOWER_TRIANGULAR: 1247 aA->ignore_ltriangular = flg; 1248 break; 1249 case MAT_ERROR_LOWER_TRIANGULAR: 1250 aA->ignore_ltriangular = flg; 1251 break; 1252 case MAT_GETROW_UPPERTRIANGULAR: 1253 aA->getrow_utriangular = flg; 1254 break; 1255 default: 1256 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1257 } 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1263 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1264 { 1265 PetscErrorCode ierr; 1266 PetscFunctionBegin; 1267 if (MAT_INITIAL_MATRIX || *B != A) { 1268 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1269 } 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1275 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1276 { 1277 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1278 Mat a=baij->A, b=baij->B; 1279 PetscErrorCode ierr; 1280 PetscInt nv,m,n; 1281 PetscBool flg; 1282 1283 PetscFunctionBegin; 1284 if (ll != rr){ 1285 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1286 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1287 } 1288 if (!ll) PetscFunctionReturn(0); 1289 1290 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1291 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1292 1293 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1294 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1295 1296 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1297 1298 /* left diagonalscale the off-diagonal part */ 1299 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1300 1301 /* scale the diagonal part */ 1302 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1303 1304 /* right diagonalscale the off-diagonal part */ 1305 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1306 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1312 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1313 { 1314 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1315 PetscErrorCode ierr; 1316 1317 PetscFunctionBegin; 1318 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatEqual_MPISBAIJ" 1326 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1327 { 1328 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1329 Mat a,b,c,d; 1330 PetscBool flg; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 a = matA->A; b = matA->B; 1335 c = matB->A; d = matB->B; 1336 1337 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1338 if (flg) { 1339 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1340 } 1341 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatCopy_MPISBAIJ" 1347 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1348 { 1349 PetscErrorCode ierr; 1350 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1351 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1352 1353 PetscFunctionBegin; 1354 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1355 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1356 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1357 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1358 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1359 } else { 1360 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1361 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1362 } 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1368 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1369 { 1370 PetscErrorCode ierr; 1371 1372 PetscFunctionBegin; 1373 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1379 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1380 { 1381 PetscErrorCode ierr; 1382 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1383 PetscBLASInt bnz,one=1; 1384 Mat_SeqSBAIJ *xa,*ya; 1385 Mat_SeqBAIJ *xb,*yb; 1386 1387 PetscFunctionBegin; 1388 if (str == SAME_NONZERO_PATTERN) { 1389 PetscScalar alpha = a; 1390 xa = (Mat_SeqSBAIJ *)xx->A->data; 1391 ya = (Mat_SeqSBAIJ *)yy->A->data; 1392 bnz = PetscBLASIntCast(xa->nz); 1393 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1394 xb = (Mat_SeqBAIJ *)xx->B->data; 1395 yb = (Mat_SeqBAIJ *)yy->B->data; 1396 bnz = PetscBLASIntCast(xb->nz); 1397 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1398 } else { 1399 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1400 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1401 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1408 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1409 { 1410 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1411 PetscInt rbs,cbs; 1412 PetscErrorCode ierr; 1413 1414 PetscFunctionBegin; 1415 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1416 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1417 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1418 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1419 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1420 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1426 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1427 { 1428 PetscErrorCode ierr; 1429 PetscInt i; 1430 PetscBool flg; 1431 1432 PetscFunctionBegin; 1433 for (i=0; i<n; i++) { 1434 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1435 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1436 } 1437 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 1442 /* -------------------------------------------------------------------*/ 1443 static struct _MatOps MatOps_Values = { 1444 MatSetValues_MPISBAIJ, 1445 MatGetRow_MPISBAIJ, 1446 MatRestoreRow_MPISBAIJ, 1447 MatMult_MPISBAIJ, 1448 /* 4*/ MatMultAdd_MPISBAIJ, 1449 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1450 MatMultAdd_MPISBAIJ, 1451 0, 1452 0, 1453 0, 1454 /*10*/ 0, 1455 0, 1456 0, 1457 MatSOR_MPISBAIJ, 1458 MatTranspose_MPISBAIJ, 1459 /*15*/ MatGetInfo_MPISBAIJ, 1460 MatEqual_MPISBAIJ, 1461 MatGetDiagonal_MPISBAIJ, 1462 MatDiagonalScale_MPISBAIJ, 1463 MatNorm_MPISBAIJ, 1464 /*20*/ MatAssemblyBegin_MPISBAIJ, 1465 MatAssemblyEnd_MPISBAIJ, 1466 MatSetOption_MPISBAIJ, 1467 MatZeroEntries_MPISBAIJ, 1468 /*24*/ 0, 1469 0, 1470 0, 1471 0, 1472 0, 1473 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1474 0, 1475 0, 1476 0, 1477 0, 1478 /*34*/ MatDuplicate_MPISBAIJ, 1479 0, 1480 0, 1481 0, 1482 0, 1483 /*39*/ MatAXPY_MPISBAIJ, 1484 MatGetSubMatrices_MPISBAIJ, 1485 MatIncreaseOverlap_MPISBAIJ, 1486 MatGetValues_MPISBAIJ, 1487 MatCopy_MPISBAIJ, 1488 /*44*/ 0, 1489 MatScale_MPISBAIJ, 1490 0, 1491 0, 1492 0, 1493 /*49*/ MatSetBlockSize_MPISBAIJ, 1494 0, 1495 0, 1496 0, 1497 0, 1498 /*54*/ 0, 1499 0, 1500 MatSetUnfactored_MPISBAIJ, 1501 0, 1502 MatSetValuesBlocked_MPISBAIJ, 1503 /*59*/ 0, 1504 0, 1505 0, 1506 0, 1507 0, 1508 /*64*/ 0, 1509 0, 1510 0, 1511 0, 1512 0, 1513 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1514 0, 1515 0, 1516 0, 1517 0, 1518 /*74*/ 0, 1519 0, 1520 0, 1521 0, 1522 0, 1523 /*79*/ 0, 1524 0, 1525 0, 1526 0, 1527 MatLoad_MPISBAIJ, 1528 /*84*/ 0, 1529 0, 1530 0, 1531 0, 1532 0, 1533 /*89*/ 0, 1534 0, 1535 0, 1536 0, 1537 0, 1538 /*94*/ 0, 1539 0, 1540 0, 1541 0, 1542 0, 1543 /*99*/ 0, 1544 0, 1545 0, 1546 0, 1547 0, 1548 /*104*/0, 1549 MatRealPart_MPISBAIJ, 1550 MatImaginaryPart_MPISBAIJ, 1551 MatGetRowUpperTriangular_MPISBAIJ, 1552 MatRestoreRowUpperTriangular_MPISBAIJ, 1553 /*109*/0, 1554 0, 1555 0, 1556 0, 1557 0, 1558 /*114*/0, 1559 0, 1560 0, 1561 0, 1562 0, 1563 /*119*/0, 1564 0, 1565 0, 1566 0 1567 }; 1568 1569 1570 EXTERN_C_BEGIN 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1573 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *a) 1574 { 1575 PetscFunctionBegin; 1576 *a = ((Mat_MPISBAIJ *)A->data)->A; 1577 *iscopy = PETSC_FALSE; 1578 PetscFunctionReturn(0); 1579 } 1580 EXTERN_C_END 1581 1582 EXTERN_C_BEGIN 1583 #undef __FUNCT__ 1584 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1585 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1586 { 1587 Mat_MPISBAIJ *b; 1588 PetscErrorCode ierr; 1589 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1590 1591 PetscFunctionBegin; 1592 if (bs < 0){ 1593 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1594 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1595 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1596 bs = PetscAbs(bs); 1597 } 1598 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1599 bs = newbs; 1600 1601 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1602 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1603 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1604 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1605 1606 B->rmap->bs = B->cmap->bs = bs; 1607 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1608 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1609 1610 if (d_nnz) { 1611 for (i=0; i<B->rmap->n/bs; i++) { 1612 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1613 } 1614 } 1615 if (o_nnz) { 1616 for (i=0; i<B->rmap->n/bs; i++) { 1617 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1618 } 1619 } 1620 1621 b = (Mat_MPISBAIJ*)B->data; 1622 mbs = B->rmap->n/bs; 1623 Mbs = B->rmap->N/bs; 1624 if (mbs*bs != B->rmap->n) { 1625 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1626 } 1627 1628 B->rmap->bs = bs; 1629 b->bs2 = bs*bs; 1630 b->mbs = mbs; 1631 b->nbs = mbs; 1632 b->Mbs = Mbs; 1633 b->Nbs = Mbs; 1634 1635 for (i=0; i<=b->size; i++) { 1636 b->rangebs[i] = B->rmap->range[i]/bs; 1637 } 1638 b->rstartbs = B->rmap->rstart/bs; 1639 b->rendbs = B->rmap->rend/bs; 1640 1641 b->cstartbs = B->cmap->rstart/bs; 1642 b->cendbs = B->cmap->rend/bs; 1643 1644 if (!B->preallocated) { 1645 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1646 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1647 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1648 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1649 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1650 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1651 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1652 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1653 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1654 } 1655 1656 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1657 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1658 B->preallocated = PETSC_TRUE; 1659 PetscFunctionReturn(0); 1660 } 1661 EXTERN_C_END 1662 1663 EXTERN_C_BEGIN 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1666 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1667 { 1668 PetscInt m,rstart,cstart,cend; 1669 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1670 const PetscInt *JJ=0; 1671 PetscScalar *values=0; 1672 PetscErrorCode ierr; 1673 1674 PetscFunctionBegin; 1675 1676 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1677 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1678 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1679 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1680 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1681 m = B->rmap->n/bs; 1682 rstart = B->rmap->rstart/bs; 1683 cstart = B->cmap->rstart/bs; 1684 cend = B->cmap->rend/bs; 1685 1686 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1687 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 1688 for (i=0; i<m; i++) { 1689 nz = ii[i+1] - ii[i]; 1690 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1691 nz_max = PetscMax(nz_max,nz); 1692 JJ = jj + ii[i]; 1693 for (j=0; j<nz; j++) { 1694 if (*JJ >= cstart) break; 1695 JJ++; 1696 } 1697 d = 0; 1698 for (; j<nz; j++) { 1699 if (*JJ++ >= cend) break; 1700 d++; 1701 } 1702 d_nnz[i] = d; 1703 o_nnz[i] = nz - d; 1704 } 1705 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1706 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1707 1708 values = (PetscScalar*)V; 1709 if (!values) { 1710 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 1711 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1712 } 1713 for (i=0; i<m; i++) { 1714 PetscInt row = i + rstart; 1715 PetscInt ncols = ii[i+1] - ii[i]; 1716 const PetscInt *icols = jj + ii[i]; 1717 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1718 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1719 } 1720 1721 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1722 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1723 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1724 1725 PetscFunctionReturn(0); 1726 } 1727 EXTERN_C_END 1728 1729 EXTERN_C_BEGIN 1730 #if defined(PETSC_HAVE_MUMPS) 1731 extern PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1732 #endif 1733 #if defined(PETSC_HAVE_SPOOLES) 1734 extern PetscErrorCode MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1735 #endif 1736 #if defined(PETSC_HAVE_PASTIX) 1737 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1738 #endif 1739 EXTERN_C_END 1740 1741 /*MC 1742 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1743 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1744 1745 Options Database Keys: 1746 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1747 1748 Level: beginner 1749 1750 .seealso: MatCreateMPISBAIJ 1751 M*/ 1752 1753 EXTERN_C_BEGIN 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatCreate_MPISBAIJ" 1756 PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1757 { 1758 Mat_MPISBAIJ *b; 1759 PetscErrorCode ierr; 1760 PetscBool flg; 1761 1762 PetscFunctionBegin; 1763 1764 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1765 B->data = (void*)b; 1766 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1767 1768 B->ops->destroy = MatDestroy_MPISBAIJ; 1769 B->ops->view = MatView_MPISBAIJ; 1770 B->assembled = PETSC_FALSE; 1771 1772 B->insertmode = NOT_SET_VALUES; 1773 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1774 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1775 1776 /* build local table of row and column ownerships */ 1777 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1778 1779 /* build cache for off array entries formed */ 1780 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1781 b->donotstash = PETSC_FALSE; 1782 b->colmap = PETSC_NULL; 1783 b->garray = PETSC_NULL; 1784 b->roworiented = PETSC_TRUE; 1785 1786 /* stuff used in block assembly */ 1787 b->barray = 0; 1788 1789 /* stuff used for matrix vector multiply */ 1790 b->lvec = 0; 1791 b->Mvctx = 0; 1792 b->slvec0 = 0; 1793 b->slvec0b = 0; 1794 b->slvec1 = 0; 1795 b->slvec1a = 0; 1796 b->slvec1b = 0; 1797 b->sMvctx = 0; 1798 1799 /* stuff for MatGetRow() */ 1800 b->rowindices = 0; 1801 b->rowvalues = 0; 1802 b->getrowactive = PETSC_FALSE; 1803 1804 /* hash table stuff */ 1805 b->ht = 0; 1806 b->hd = 0; 1807 b->ht_size = 0; 1808 b->ht_flag = PETSC_FALSE; 1809 b->ht_fact = 0; 1810 b->ht_total_ct = 0; 1811 b->ht_insert_ct = 0; 1812 1813 b->in_loc = 0; 1814 b->v_loc = 0; 1815 b->n_loc = 0; 1816 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1817 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1818 if (flg) { 1819 PetscReal fact = 1.39; 1820 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1821 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1822 if (fact <= 1.0) fact = 1.39; 1823 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1824 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1825 } 1826 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1827 1828 #if defined(PETSC_HAVE_PASTIX) 1829 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1830 "MatGetFactor_mpisbaij_pastix", 1831 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1832 #endif 1833 #if defined(PETSC_HAVE_MUMPS) 1834 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1835 "MatGetFactor_sbaij_mumps", 1836 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1837 #endif 1838 #if defined(PETSC_HAVE_SPOOLES) 1839 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1840 "MatGetFactor_mpisbaij_spooles", 1841 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1842 #endif 1843 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1844 "MatStoreValues_MPISBAIJ", 1845 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1847 "MatRetrieveValues_MPISBAIJ", 1848 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1849 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1850 "MatGetDiagonalBlock_MPISBAIJ", 1851 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1852 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1853 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1854 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1855 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C", 1856 "MatMPISBAIJSetPreallocationCSR_MPISBAIJ", 1857 MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1858 B->symmetric = PETSC_TRUE; 1859 B->structurally_symmetric = PETSC_TRUE; 1860 B->symmetric_set = PETSC_TRUE; 1861 B->structurally_symmetric_set = PETSC_TRUE; 1862 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1863 PetscFunctionReturn(0); 1864 } 1865 EXTERN_C_END 1866 1867 /*MC 1868 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1869 1870 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1871 and MATMPISBAIJ otherwise. 1872 1873 Options Database Keys: 1874 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1875 1876 Level: beginner 1877 1878 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1879 M*/ 1880 1881 EXTERN_C_BEGIN 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "MatCreate_SBAIJ" 1884 PetscErrorCode MatCreate_SBAIJ(Mat A) 1885 { 1886 PetscErrorCode ierr; 1887 PetscMPIInt size; 1888 1889 PetscFunctionBegin; 1890 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1891 if (size == 1) { 1892 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1893 } else { 1894 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1895 } 1896 PetscFunctionReturn(0); 1897 } 1898 EXTERN_C_END 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1902 /*@C 1903 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1904 the user should preallocate the matrix storage by setting the parameters 1905 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1906 performance can be increased by more than a factor of 50. 1907 1908 Collective on Mat 1909 1910 Input Parameters: 1911 + A - the matrix 1912 . bs - size of blockk 1913 . d_nz - number of block nonzeros per block row in diagonal portion of local 1914 submatrix (same for all local rows) 1915 . d_nnz - array containing the number of block nonzeros in the various block rows 1916 in the upper triangular and diagonal part of the in diagonal portion of the local 1917 (possibly different for each block row) or PETSC_NULL. You must leave room 1918 for the diagonal entry even if it is zero. 1919 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1920 submatrix (same for all local rows). 1921 - o_nnz - array containing the number of nonzeros in the various block rows of the 1922 off-diagonal portion of the local submatrix (possibly different for 1923 each block row) or PETSC_NULL. 1924 1925 1926 Options Database Keys: 1927 . -mat_no_unroll - uses code that does not unroll the loops in the 1928 block calculations (much slower) 1929 . -mat_block_size - size of the blocks to use 1930 1931 Notes: 1932 1933 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1934 than it must be used on all processors that share the object for that argument. 1935 1936 If the *_nnz parameter is given then the *_nz parameter is ignored 1937 1938 Storage Information: 1939 For a square global matrix we define each processor's diagonal portion 1940 to be its local rows and the corresponding columns (a square submatrix); 1941 each processor's off-diagonal portion encompasses the remainder of the 1942 local matrix (a rectangular submatrix). 1943 1944 The user can specify preallocated storage for the diagonal part of 1945 the local submatrix with either d_nz or d_nnz (not both). Set 1946 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1947 memory allocation. Likewise, specify preallocated storage for the 1948 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1949 1950 You can call MatGetInfo() to get information on how effective the preallocation was; 1951 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1952 You can also run with the option -info and look for messages with the string 1953 malloc in them to see if additional memory allocation was needed. 1954 1955 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1956 the figure below we depict these three local rows and all columns (0-11). 1957 1958 .vb 1959 0 1 2 3 4 5 6 7 8 9 10 11 1960 ------------------- 1961 row 3 | o o o d d d o o o o o o 1962 row 4 | o o o d d d o o o o o o 1963 row 5 | o o o d d d o o o o o o 1964 ------------------- 1965 .ve 1966 1967 Thus, any entries in the d locations are stored in the d (diagonal) 1968 submatrix, and any entries in the o locations are stored in the 1969 o (off-diagonal) submatrix. Note that the d matrix is stored in 1970 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1971 1972 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1973 plus the diagonal part of the d matrix, 1974 and o_nz should indicate the number of block nonzeros per row in the upper triangular 1975 part of the o matrix. 1976 In general, for PDE problems in which most nonzeros are near the diagonal, 1977 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1978 or you will get TERRIBLE performance; see the users' manual chapter on 1979 matrices. 1980 1981 Level: intermediate 1982 1983 .keywords: matrix, block, aij, compressed row, sparse, parallel 1984 1985 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1986 @*/ 1987 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1988 { 1989 PetscErrorCode ierr; 1990 1991 PetscFunctionBegin; 1992 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 1993 PetscFunctionReturn(0); 1994 } 1995 1996 #undef __FUNCT__ 1997 #define __FUNCT__ "MatCreateMPISBAIJ" 1998 /*@C 1999 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2000 (block compressed row). For good matrix assembly performance 2001 the user should preallocate the matrix storage by setting the parameters 2002 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2003 performance can be increased by more than a factor of 50. 2004 2005 Collective on MPI_Comm 2006 2007 Input Parameters: 2008 + comm - MPI communicator 2009 . bs - size of blockk 2010 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2011 This value should be the same as the local size used in creating the 2012 y vector for the matrix-vector product y = Ax. 2013 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2014 This value should be the same as the local size used in creating the 2015 x vector for the matrix-vector product y = Ax. 2016 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2017 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2018 . d_nz - number of block nonzeros per block row in diagonal portion of local 2019 submatrix (same for all local rows) 2020 . d_nnz - array containing the number of block nonzeros in the various block rows 2021 in the upper triangular portion of the in diagonal portion of the local 2022 (possibly different for each block block row) or PETSC_NULL. 2023 You must leave room for the diagonal entry even if it is zero. 2024 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2025 submatrix (same for all local rows). 2026 - o_nnz - array containing the number of nonzeros in the various block rows of the 2027 off-diagonal portion of the local submatrix (possibly different for 2028 each block row) or PETSC_NULL. 2029 2030 Output Parameter: 2031 . A - the matrix 2032 2033 Options Database Keys: 2034 . -mat_no_unroll - uses code that does not unroll the loops in the 2035 block calculations (much slower) 2036 . -mat_block_size - size of the blocks to use 2037 . -mat_mpi - use the parallel matrix data structures even on one processor 2038 (defaults to using SeqBAIJ format on one processor) 2039 2040 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2041 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2042 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2043 2044 Notes: 2045 The number of rows and columns must be divisible by blocksize. 2046 This matrix type does not support complex Hermitian operation. 2047 2048 The user MUST specify either the local or global matrix dimensions 2049 (possibly both). 2050 2051 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2052 than it must be used on all processors that share the object for that argument. 2053 2054 If the *_nnz parameter is given then the *_nz parameter is ignored 2055 2056 Storage Information: 2057 For a square global matrix we define each processor's diagonal portion 2058 to be its local rows and the corresponding columns (a square submatrix); 2059 each processor's off-diagonal portion encompasses the remainder of the 2060 local matrix (a rectangular submatrix). 2061 2062 The user can specify preallocated storage for the diagonal part of 2063 the local submatrix with either d_nz or d_nnz (not both). Set 2064 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2065 memory allocation. Likewise, specify preallocated storage for the 2066 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2067 2068 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2069 the figure below we depict these three local rows and all columns (0-11). 2070 2071 .vb 2072 0 1 2 3 4 5 6 7 8 9 10 11 2073 ------------------- 2074 row 3 | o o o d d d o o o o o o 2075 row 4 | o o o d d d o o o o o o 2076 row 5 | o o o d d d o o o o o o 2077 ------------------- 2078 .ve 2079 2080 Thus, any entries in the d locations are stored in the d (diagonal) 2081 submatrix, and any entries in the o locations are stored in the 2082 o (off-diagonal) submatrix. Note that the d matrix is stored in 2083 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2084 2085 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2086 plus the diagonal part of the d matrix, 2087 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2088 In general, for PDE problems in which most nonzeros are near the diagonal, 2089 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2090 or you will get TERRIBLE performance; see the users' manual chapter on 2091 matrices. 2092 2093 Level: intermediate 2094 2095 .keywords: matrix, block, aij, compressed row, sparse, parallel 2096 2097 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2098 @*/ 2099 2100 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2101 { 2102 PetscErrorCode ierr; 2103 PetscMPIInt size; 2104 2105 PetscFunctionBegin; 2106 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2107 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2108 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2109 if (size > 1) { 2110 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2111 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2112 } else { 2113 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2114 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2115 } 2116 PetscFunctionReturn(0); 2117 } 2118 2119 2120 #undef __FUNCT__ 2121 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2122 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2123 { 2124 Mat mat; 2125 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2126 PetscErrorCode ierr; 2127 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2128 PetscScalar *array; 2129 2130 PetscFunctionBegin; 2131 *newmat = 0; 2132 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2133 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2134 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2135 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2136 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2137 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2138 2139 mat->factortype = matin->factortype; 2140 mat->preallocated = PETSC_TRUE; 2141 mat->assembled = PETSC_TRUE; 2142 mat->insertmode = NOT_SET_VALUES; 2143 2144 a = (Mat_MPISBAIJ*)mat->data; 2145 a->bs2 = oldmat->bs2; 2146 a->mbs = oldmat->mbs; 2147 a->nbs = oldmat->nbs; 2148 a->Mbs = oldmat->Mbs; 2149 a->Nbs = oldmat->Nbs; 2150 2151 2152 a->size = oldmat->size; 2153 a->rank = oldmat->rank; 2154 a->donotstash = oldmat->donotstash; 2155 a->roworiented = oldmat->roworiented; 2156 a->rowindices = 0; 2157 a->rowvalues = 0; 2158 a->getrowactive = PETSC_FALSE; 2159 a->barray = 0; 2160 a->rstartbs = oldmat->rstartbs; 2161 a->rendbs = oldmat->rendbs; 2162 a->cstartbs = oldmat->cstartbs; 2163 a->cendbs = oldmat->cendbs; 2164 2165 /* hash table stuff */ 2166 a->ht = 0; 2167 a->hd = 0; 2168 a->ht_size = 0; 2169 a->ht_flag = oldmat->ht_flag; 2170 a->ht_fact = oldmat->ht_fact; 2171 a->ht_total_ct = 0; 2172 a->ht_insert_ct = 0; 2173 2174 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2175 if (oldmat->colmap) { 2176 #if defined (PETSC_USE_CTABLE) 2177 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2178 #else 2179 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2180 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2181 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2182 #endif 2183 } else a->colmap = 0; 2184 2185 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2186 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2187 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2188 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2189 } else a->garray = 0; 2190 2191 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2192 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2193 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2194 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2195 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2196 2197 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2198 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2199 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2200 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2201 2202 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2203 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2204 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2205 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2206 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2207 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2208 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2209 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2210 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2211 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2212 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2213 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2214 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2215 2216 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2217 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2218 a->sMvctx = oldmat->sMvctx; 2219 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2220 2221 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2222 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2223 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2224 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2225 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2226 *newmat = mat; 2227 PetscFunctionReturn(0); 2228 } 2229 2230 #undef __FUNCT__ 2231 #define __FUNCT__ "MatLoad_MPISBAIJ" 2232 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2233 { 2234 PetscErrorCode ierr; 2235 PetscInt i,nz,j,rstart,rend; 2236 PetscScalar *vals,*buf; 2237 MPI_Comm comm = ((PetscObject)viewer)->comm; 2238 MPI_Status status; 2239 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2240 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2241 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2242 PetscInt bs=1,Mbs,mbs,extra_rows; 2243 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2244 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2245 int fd; 2246 2247 PetscFunctionBegin; 2248 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2249 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2250 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2251 2252 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2253 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2254 if (!rank) { 2255 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2256 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2257 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2258 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2259 } 2260 2261 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2262 2263 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2264 M = header[1]; N = header[2]; 2265 2266 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2267 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2268 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2269 2270 /* If global sizes are set, check if they are consistent with that given in the file */ 2271 if (sizesset) { 2272 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2273 } 2274 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2275 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2276 2277 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2278 2279 /* 2280 This code adds extra rows to make sure the number of rows is 2281 divisible by the blocksize 2282 */ 2283 Mbs = M/bs; 2284 extra_rows = bs - M + bs*(Mbs); 2285 if (extra_rows == bs) extra_rows = 0; 2286 else Mbs++; 2287 if (extra_rows &&!rank) { 2288 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2289 } 2290 2291 /* determine ownership of all rows */ 2292 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2293 mbs = Mbs/size + ((Mbs % size) > rank); 2294 m = mbs*bs; 2295 } else { /* User Set */ 2296 m = newmat->rmap->n; 2297 mbs = m/bs; 2298 } 2299 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2300 mmbs = PetscMPIIntCast(mbs); 2301 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2302 rowners[0] = 0; 2303 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2304 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2305 rstart = rowners[rank]; 2306 rend = rowners[rank+1]; 2307 2308 /* distribute row lengths to all processors */ 2309 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2310 if (!rank) { 2311 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2312 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2313 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2314 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2315 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2316 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2317 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2318 } else { 2319 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2320 } 2321 2322 if (!rank) { /* procs[0] */ 2323 /* calculate the number of nonzeros on each processor */ 2324 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2325 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2326 for (i=0; i<size; i++) { 2327 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2328 procsnz[i] += rowlengths[j]; 2329 } 2330 } 2331 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2332 2333 /* determine max buffer needed and allocate it */ 2334 maxnz = 0; 2335 for (i=0; i<size; i++) { 2336 maxnz = PetscMax(maxnz,procsnz[i]); 2337 } 2338 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2339 2340 /* read in my part of the matrix column indices */ 2341 nz = procsnz[0]; 2342 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2343 mycols = ibuf; 2344 if (size == 1) nz -= extra_rows; 2345 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2346 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2347 2348 /* read in every ones (except the last) and ship off */ 2349 for (i=1; i<size-1; i++) { 2350 nz = procsnz[i]; 2351 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2352 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2353 } 2354 /* read in the stuff for the last proc */ 2355 if (size != 1) { 2356 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2357 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2358 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2359 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2360 } 2361 ierr = PetscFree(cols);CHKERRQ(ierr); 2362 } else { /* procs[i], i>0 */ 2363 /* determine buffer space needed for message */ 2364 nz = 0; 2365 for (i=0; i<m; i++) { 2366 nz += locrowlens[i]; 2367 } 2368 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2369 mycols = ibuf; 2370 /* receive message of column indices*/ 2371 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2372 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2373 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2374 } 2375 2376 /* loop over local rows, determining number of off diagonal entries */ 2377 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2378 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2379 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2380 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2381 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2382 rowcount = 0; 2383 nzcount = 0; 2384 for (i=0; i<mbs; i++) { 2385 dcount = 0; 2386 odcount = 0; 2387 for (j=0; j<bs; j++) { 2388 kmax = locrowlens[rowcount]; 2389 for (k=0; k<kmax; k++) { 2390 tmp = mycols[nzcount++]/bs; /* block col. index */ 2391 if (!mask[tmp]) { 2392 mask[tmp] = 1; 2393 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2394 else masked1[dcount++] = tmp; /* entry in diag portion */ 2395 } 2396 } 2397 rowcount++; 2398 } 2399 2400 dlens[i] = dcount; /* d_nzz[i] */ 2401 odlens[i] = odcount; /* o_nzz[i] */ 2402 2403 /* zero out the mask elements we set */ 2404 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2405 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2406 } 2407 if (!sizesset) { 2408 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2409 } 2410 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2411 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2412 2413 if (!rank) { 2414 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2415 /* read in my part of the matrix numerical values */ 2416 nz = procsnz[0]; 2417 vals = buf; 2418 mycols = ibuf; 2419 if (size == 1) nz -= extra_rows; 2420 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2421 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2422 2423 /* insert into matrix */ 2424 jj = rstart*bs; 2425 for (i=0; i<m; i++) { 2426 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2427 mycols += locrowlens[i]; 2428 vals += locrowlens[i]; 2429 jj++; 2430 } 2431 2432 /* read in other processors (except the last one) and ship out */ 2433 for (i=1; i<size-1; i++) { 2434 nz = procsnz[i]; 2435 vals = buf; 2436 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2437 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2438 } 2439 /* the last proc */ 2440 if (size != 1){ 2441 nz = procsnz[i] - extra_rows; 2442 vals = buf; 2443 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2444 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2445 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2446 } 2447 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2448 2449 } else { 2450 /* receive numeric values */ 2451 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2452 2453 /* receive message of values*/ 2454 vals = buf; 2455 mycols = ibuf; 2456 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2457 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2458 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2459 2460 /* insert into matrix */ 2461 jj = rstart*bs; 2462 for (i=0; i<m; i++) { 2463 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2464 mycols += locrowlens[i]; 2465 vals += locrowlens[i]; 2466 jj++; 2467 } 2468 } 2469 2470 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2471 ierr = PetscFree(buf);CHKERRQ(ierr); 2472 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2473 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2474 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2475 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2476 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2477 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2483 /*XXXXX@ 2484 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2485 2486 Input Parameters: 2487 . mat - the matrix 2488 . fact - factor 2489 2490 Not Collective on Mat, each process can have a different hash factor 2491 2492 Level: advanced 2493 2494 Notes: 2495 This can also be set by the command line option: -mat_use_hash_table fact 2496 2497 .keywords: matrix, hashtable, factor, HT 2498 2499 .seealso: MatSetOption() 2500 @XXXXX*/ 2501 2502 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2505 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2506 { 2507 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2508 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2509 PetscReal atmp; 2510 PetscReal *work,*svalues,*rvalues; 2511 PetscErrorCode ierr; 2512 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2513 PetscMPIInt rank,size; 2514 PetscInt *rowners_bs,dest,count,source; 2515 PetscScalar *va; 2516 MatScalar *ba; 2517 MPI_Status stat; 2518 2519 PetscFunctionBegin; 2520 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2521 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2522 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2523 2524 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2525 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2526 2527 bs = A->rmap->bs; 2528 mbs = a->mbs; 2529 Mbs = a->Mbs; 2530 ba = b->a; 2531 bi = b->i; 2532 bj = b->j; 2533 2534 /* find ownerships */ 2535 rowners_bs = A->rmap->range; 2536 2537 /* each proc creates an array to be distributed */ 2538 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2539 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2540 2541 /* row_max for B */ 2542 if (rank != size-1){ 2543 for (i=0; i<mbs; i++) { 2544 ncols = bi[1] - bi[0]; bi++; 2545 brow = bs*i; 2546 for (j=0; j<ncols; j++){ 2547 bcol = bs*(*bj); 2548 for (kcol=0; kcol<bs; kcol++){ 2549 col = bcol + kcol; /* local col index */ 2550 col += rowners_bs[rank+1]; /* global col index */ 2551 for (krow=0; krow<bs; krow++){ 2552 atmp = PetscAbsScalar(*ba); ba++; 2553 row = brow + krow; /* local row index */ 2554 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2555 if (work[col] < atmp) work[col] = atmp; 2556 } 2557 } 2558 bj++; 2559 } 2560 } 2561 2562 /* send values to its owners */ 2563 for (dest=rank+1; dest<size; dest++){ 2564 svalues = work + rowners_bs[dest]; 2565 count = rowners_bs[dest+1]-rowners_bs[dest]; 2566 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2567 } 2568 } 2569 2570 /* receive values */ 2571 if (rank){ 2572 rvalues = work; 2573 count = rowners_bs[rank+1]-rowners_bs[rank]; 2574 for (source=0; source<rank; source++){ 2575 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2576 /* process values */ 2577 for (i=0; i<count; i++){ 2578 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2579 } 2580 } 2581 } 2582 2583 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2584 ierr = PetscFree(work);CHKERRQ(ierr); 2585 PetscFunctionReturn(0); 2586 } 2587 2588 #undef __FUNCT__ 2589 #define __FUNCT__ "MatSOR_MPISBAIJ" 2590 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2591 { 2592 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2593 PetscErrorCode ierr; 2594 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2595 PetscScalar *x,*ptr,*from; 2596 Vec bb1; 2597 const PetscScalar *b; 2598 2599 PetscFunctionBegin; 2600 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2601 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2602 2603 if (flag == SOR_APPLY_UPPER) { 2604 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2605 PetscFunctionReturn(0); 2606 } 2607 2608 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2609 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2610 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2611 its--; 2612 } 2613 2614 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2615 while (its--){ 2616 2617 /* lower triangular part: slvec0b = - B^T*xx */ 2618 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2619 2620 /* copy xx into slvec0a */ 2621 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2622 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2623 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2624 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2625 2626 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2627 2628 /* copy bb into slvec1a */ 2629 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2630 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2631 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2632 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2633 2634 /* set slvec1b = 0 */ 2635 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2636 2637 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2638 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2639 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2640 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2641 2642 /* upper triangular part: bb1 = bb1 - B*x */ 2643 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2644 2645 /* local diagonal sweep */ 2646 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2647 } 2648 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2649 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2650 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2651 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2652 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2653 } else if (flag & SOR_EISENSTAT) { 2654 Vec xx1; 2655 PetscBool hasop; 2656 const PetscScalar *diag; 2657 PetscScalar *sl,scale = (omega - 2.0)/omega; 2658 PetscInt i,n; 2659 2660 if (!mat->xx1) { 2661 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2662 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2663 } 2664 xx1 = mat->xx1; 2665 bb1 = mat->bb1; 2666 2667 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2668 2669 if (!mat->diag) { 2670 /* this is wrong for same matrix with new nonzero values */ 2671 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2672 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2673 } 2674 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2675 2676 if (hasop) { 2677 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2678 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2679 } else { 2680 /* 2681 These two lines are replaced by code that may be a bit faster for a good compiler 2682 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2683 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2684 */ 2685 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2686 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2687 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2688 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2689 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2690 if (omega == 1.0) { 2691 for (i=0; i<n; i++) { 2692 sl[i] = b[i] - diag[i]*x[i]; 2693 } 2694 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2695 } else { 2696 for (i=0; i<n; i++) { 2697 sl[i] = b[i] + scale*diag[i]*x[i]; 2698 } 2699 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2700 } 2701 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2702 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2703 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2704 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2705 } 2706 2707 /* multiply off-diagonal portion of matrix */ 2708 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2709 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2710 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2711 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2712 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2713 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2714 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2715 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2716 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2717 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2718 2719 /* local sweep */ 2720 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2721 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2722 } else { 2723 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2724 } 2725 PetscFunctionReturn(0); 2726 } 2727 2728 #undef __FUNCT__ 2729 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2730 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2731 { 2732 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2733 PetscErrorCode ierr; 2734 Vec lvec1,bb1; 2735 2736 PetscFunctionBegin; 2737 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2738 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2739 2740 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2741 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2742 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2743 its--; 2744 } 2745 2746 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2747 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2748 while (its--){ 2749 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2750 2751 /* lower diagonal part: bb1 = bb - B^T*xx */ 2752 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2753 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2754 2755 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2756 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2757 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2758 2759 /* upper diagonal part: bb1 = bb1 - B*x */ 2760 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2761 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2762 2763 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2764 2765 /* diagonal sweep */ 2766 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2767 } 2768 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2769 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2770 } else { 2771 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2772 } 2773 PetscFunctionReturn(0); 2774 } 2775 2776 #undef __FUNCT__ 2777 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2778 /*@ 2779 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2780 CSR format the local rows. 2781 2782 Collective on MPI_Comm 2783 2784 Input Parameters: 2785 + comm - MPI communicator 2786 . bs - the block size, only a block size of 1 is supported 2787 . m - number of local rows (Cannot be PETSC_DECIDE) 2788 . n - This value should be the same as the local size used in creating the 2789 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2790 calculated if N is given) For square matrices n is almost always m. 2791 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2792 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2793 . i - row indices 2794 . j - column indices 2795 - a - matrix values 2796 2797 Output Parameter: 2798 . mat - the matrix 2799 2800 Level: intermediate 2801 2802 Notes: 2803 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2804 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2805 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2806 2807 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2808 2809 .keywords: matrix, aij, compressed row, sparse, parallel 2810 2811 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2812 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 2813 @*/ 2814 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2815 { 2816 PetscErrorCode ierr; 2817 2818 2819 PetscFunctionBegin; 2820 if (i[0]) { 2821 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2822 } 2823 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2824 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2825 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2826 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2827 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2828 PetscFunctionReturn(0); 2829 } 2830 2831 2832 #undef __FUNCT__ 2833 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2834 /*@C 2835 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2836 (the default parallel PETSc format). 2837 2838 Collective on MPI_Comm 2839 2840 Input Parameters: 2841 + A - the matrix 2842 . bs - the block size 2843 . i - the indices into j for the start of each local row (starts with zero) 2844 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2845 - v - optional values in the matrix 2846 2847 Level: developer 2848 2849 .keywords: matrix, aij, compressed row, sparse, parallel 2850 2851 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2852 @*/ 2853 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2854 { 2855 PetscErrorCode ierr; 2856 2857 PetscFunctionBegin; 2858 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2859 PetscFunctionReturn(0); 2860 } 2861 2862 2863