1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 EXTERN_C_BEGIN 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 26 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 27 { 28 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 29 PetscErrorCode ierr; 30 31 PetscFunctionBegin; 32 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 33 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN_C_BEGIN 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 EXTERN_C_END 52 53 54 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 55 { \ 56 \ 57 brow = row/bs; \ 58 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 59 rmax = aimax[brow]; nrow = ailen[brow]; \ 60 bcol = col/bs; \ 61 ridx = row % bs; cidx = col % bs; \ 62 low = 0; high = nrow; \ 63 while (high-low > 3) { \ 64 t = (low+high)/2; \ 65 if (rp[t] > bcol) high = t; \ 66 else low = t; \ 67 } \ 68 for (_i=low; _i<high; _i++) { \ 69 if (rp[_i] > bcol) break; \ 70 if (rp[_i] == bcol) { \ 71 bap = ap + bs2*_i + bs*cidx + ridx; \ 72 if (addv == ADD_VALUES) *bap += value; \ 73 else *bap = value; \ 74 goto a_noinsert; \ 75 } \ 76 } \ 77 if (a->nonew == 1) goto a_noinsert; \ 78 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 79 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 80 N = nrow++ - 1; \ 81 /* shift up all the later entries in this row */ \ 82 for (ii=N; ii>=_i; ii--) { \ 83 rp[ii+1] = rp[ii]; \ 84 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 85 } \ 86 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 87 rp[_i] = bcol; \ 88 ap[bs2*_i + bs*cidx + ridx] = value; \ 89 a_noinsert:; \ 90 ailen[brow] = nrow; \ 91 } 92 93 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 94 { \ 95 brow = row/bs; \ 96 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 97 rmax = bimax[brow]; nrow = bilen[brow]; \ 98 bcol = col/bs; \ 99 ridx = row % bs; cidx = col % bs; \ 100 low = 0; high = nrow; \ 101 while (high-low > 3) { \ 102 t = (low+high)/2; \ 103 if (rp[t] > bcol) high = t; \ 104 else low = t; \ 105 } \ 106 for (_i=low; _i<high; _i++) { \ 107 if (rp[_i] > bcol) break; \ 108 if (rp[_i] == bcol) { \ 109 bap = ap + bs2*_i + bs*cidx + ridx; \ 110 if (addv == ADD_VALUES) *bap += value; \ 111 else *bap = value; \ 112 goto b_noinsert; \ 113 } \ 114 } \ 115 if (b->nonew == 1) goto b_noinsert; \ 116 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 117 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 118 N = nrow++ - 1; \ 119 /* shift up all the later entries in this row */ \ 120 for (ii=N; ii>=_i; ii--) { \ 121 rp[ii+1] = rp[ii]; \ 122 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 } \ 124 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 125 rp[_i] = bcol; \ 126 ap[bs2*_i + bs*cidx + ridx] = value; \ 127 b_noinsert:; \ 128 bilen[brow] = nrow; \ 129 } 130 131 /* Only add/insert a(i,j) with i<=j (blocks). 132 Any a(i,j) with i>j input by user is ingored. 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetValues_MPISBAIJ" 136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 137 { 138 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 139 MatScalar value; 140 PetscBool roworiented = baij->roworiented; 141 PetscErrorCode ierr; 142 PetscInt i,j,row,col; 143 PetscInt rstart_orig=mat->rmap->rstart; 144 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 145 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 146 147 /* Some Variables required in the macro */ 148 Mat A = baij->A; 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 150 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 151 MatScalar *aa=a->a; 152 153 Mat B = baij->B; 154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 155 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 156 MatScalar *ba=b->a; 157 158 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 159 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 160 MatScalar *ap,*bap; 161 162 /* for stash */ 163 PetscInt n_loc, *in_loc = PETSC_NULL; 164 MatScalar *v_loc = PETSC_NULL; 165 166 PetscFunctionBegin; 167 if (v) PetscValidScalarPointer(v,6); 168 if (!baij->donotstash){ 169 if (n > baij->n_loc) { 170 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 171 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 172 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 174 baij->n_loc = n; 175 } 176 in_loc = baij->in_loc; 177 v_loc = baij->v_loc; 178 } 179 180 for (i=0; i<m; i++) { 181 if (im[i] < 0) continue; 182 #if defined(PETSC_USE_DEBUG) 183 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 184 #endif 185 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 186 row = im[i] - rstart_orig; /* local row index */ 187 for (j=0; j<n; j++) { 188 if (im[i]/bs > in[j]/bs){ 189 if (a->ignore_ltriangular){ 190 continue; /* ignore lower triangular blocks */ 191 } else { 192 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 193 } 194 } 195 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 196 col = in[j] - cstart_orig; /* local col index */ 197 brow = row/bs; bcol = col/bs; 198 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 199 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 200 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 201 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 202 } else if (in[j] < 0) continue; 203 #if defined(PETSC_USE_DEBUG) 204 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 205 #endif 206 else { /* off-diag entry (B) */ 207 if (mat->was_assembled) { 208 if (!baij->colmap) { 209 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 210 } 211 #if defined (PETSC_USE_CTABLE) 212 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 213 col = col - 1; 214 #else 215 col = baij->colmap[in[j]/bs] - 1; 216 #endif 217 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 218 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 219 col = in[j]; 220 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 221 B = baij->B; 222 b = (Mat_SeqBAIJ*)(B)->data; 223 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 224 ba=b->a; 225 } else col += in[j]%bs; 226 } else col = in[j]; 227 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 228 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } 231 } 232 } else { /* off processor entry */ 233 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 234 if (!baij->donotstash) { 235 n_loc = 0; 236 for (j=0; j<n; j++){ 237 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 238 in_loc[n_loc] = in[j]; 239 if (roworiented) { 240 v_loc[n_loc] = v[i*n+j]; 241 } else { 242 v_loc[n_loc] = v[j*m+i]; 243 } 244 n_loc++; 245 } 246 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 247 } 248 } 249 } 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 256 { 257 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 258 const MatScalar *value; 259 MatScalar *barray=baij->barray; 260 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 261 PetscErrorCode ierr; 262 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 263 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 264 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 265 266 PetscFunctionBegin; 267 if(!barray) { 268 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 269 baij->barray = barray; 270 } 271 272 if (roworiented) { 273 stepval = (n-1)*bs; 274 } else { 275 stepval = (m-1)*bs; 276 } 277 for (i=0; i<m; i++) { 278 if (im[i] < 0) continue; 279 #if defined(PETSC_USE_DEBUG) 280 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 281 #endif 282 if (im[i] >= rstart && im[i] < rend) { 283 row = im[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (im[i] > in[j]) { 286 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 287 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 288 } 289 /* If NumCol = 1 then a copy is not required */ 290 if ((roworiented) && (n == 1)) { 291 barray = (MatScalar*) v + i*bs2; 292 } else if((!roworiented) && (m == 1)) { 293 barray = (MatScalar*) v + j*bs2; 294 } else { /* Here a copy is required */ 295 if (roworiented) { 296 value = v + i*(stepval+bs)*bs + j*bs; 297 } else { 298 value = v + j*(stepval+bs)*bs + i*bs; 299 } 300 for (ii=0; ii<bs; ii++,value+=stepval) { 301 for (jj=0; jj<bs; jj++) { 302 *barray++ = *value++; 303 } 304 } 305 barray -=bs2; 306 } 307 308 if (in[j] >= cstart && in[j] < cend){ 309 col = in[j] - cstart; 310 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 311 } 312 else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined (PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined (PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } 343 else col = in[j]; 344 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 345 } 346 } 347 } else { 348 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 349 if (!baij->donotstash) { 350 if (roworiented) { 351 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 352 } else { 353 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 354 } 355 } 356 } 357 } 358 PetscFunctionReturn(0); 359 } 360 361 #undef __FUNCT__ 362 #define __FUNCT__ "MatGetValues_MPISBAIJ" 363 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 364 { 365 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 366 PetscErrorCode ierr; 367 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 368 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 369 370 PetscFunctionBegin; 371 for (i=0; i<m; i++) { 372 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 373 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 374 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 375 row = idxm[i] - bsrstart; 376 for (j=0; j<n; j++) { 377 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 378 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 379 if (idxn[j] >= bscstart && idxn[j] < bscend){ 380 col = idxn[j] - bscstart; 381 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 382 } else { 383 if (!baij->colmap) { 384 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 385 } 386 #if defined (PETSC_USE_CTABLE) 387 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 388 data --; 389 #else 390 data = baij->colmap[idxn[j]/bs]-1; 391 #endif 392 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 393 else { 394 col = data + idxn[j]%bs; 395 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 396 } 397 } 398 } 399 } else { 400 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatNorm_MPISBAIJ" 408 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 409 { 410 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscReal sum[2],*lnorm2; 413 414 PetscFunctionBegin; 415 if (baij->size == 1) { 416 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 417 } else { 418 if (type == NORM_FROBENIUS) { 419 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 420 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 421 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 422 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 423 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 424 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 425 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 426 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 427 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 428 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 429 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 430 PetscReal *rsum,*rsum2,vabs; 431 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 432 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 433 MatScalar *v; 434 435 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 436 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 437 /* Amat */ 438 v = amat->a; jj = amat->j; 439 for (brow=0; brow<mbs; brow++) { 440 grow = bs*(rstart + brow); 441 nz = amat->i[brow+1] - amat->i[brow]; 442 for (bcol=0; bcol<nz; bcol++){ 443 gcol = bs*(rstart + *jj); jj++; 444 for (col=0; col<bs; col++){ 445 for (row=0; row<bs; row++){ 446 vabs = PetscAbsScalar(*v); v++; 447 rsum[gcol+col] += vabs; 448 /* non-diagonal block */ 449 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 450 } 451 } 452 } 453 } 454 /* Bmat */ 455 v = bmat->a; jj = bmat->j; 456 for (brow=0; brow<mbs; brow++) { 457 grow = bs*(rstart + brow); 458 nz = bmat->i[brow+1] - bmat->i[brow]; 459 for (bcol=0; bcol<nz; bcol++){ 460 gcol = bs*garray[*jj]; jj++; 461 for (col=0; col<bs; col++){ 462 for (row=0; row<bs; row++){ 463 vabs = PetscAbsScalar(*v); v++; 464 rsum[gcol+col] += vabs; 465 rsum[grow+row] += vabs; 466 } 467 } 468 } 469 } 470 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 471 *norm = 0.0; 472 for (col=0; col<mat->cmap->N; col++) { 473 if (rsum2[col] > *norm) *norm = rsum2[col]; 474 } 475 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 476 } else { 477 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 478 } 479 } 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 485 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 486 { 487 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 488 PetscErrorCode ierr; 489 PetscInt nstash,reallocs; 490 InsertMode addv; 491 492 PetscFunctionBegin; 493 if (baij->donotstash || mat->nooffprocentries) { 494 PetscFunctionReturn(0); 495 } 496 497 /* make sure all processors are either in INSERTMODE or ADDMODE */ 498 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 499 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 500 mat->insertmode = addv; /* in case this processor had no cache */ 501 502 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 503 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 504 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 505 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 506 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 507 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNCT__ 512 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 513 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 514 { 515 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 516 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 517 PetscErrorCode ierr; 518 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 519 PetscInt *row,*col; 520 PetscBool other_disassembled; 521 PetscMPIInt n; 522 PetscBool r1,r2,r3; 523 MatScalar *val; 524 InsertMode addv = mat->insertmode; 525 526 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 527 PetscFunctionBegin; 528 529 if (!baij->donotstash && !mat->nooffprocentries) { 530 while (1) { 531 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 532 if (!flg) break; 533 534 for (i=0; i<n;) { 535 /* Now identify the consecutive vals belonging to the same row */ 536 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 537 if (j < n) ncols = j-i; 538 else ncols = n-i; 539 /* Now assemble all these values with a single function call */ 540 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 541 i = j; 542 } 543 } 544 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 545 /* Now process the block-stash. Since the values are stashed column-oriented, 546 set the roworiented flag to column oriented, and after MatSetValues() 547 restore the original flags */ 548 r1 = baij->roworiented; 549 r2 = a->roworiented; 550 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 551 baij->roworiented = PETSC_FALSE; 552 a->roworiented = PETSC_FALSE; 553 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 554 while (1) { 555 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 556 if (!flg) break; 557 558 for (i=0; i<n;) { 559 /* Now identify the consecutive vals belonging to the same row */ 560 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 561 if (j < n) ncols = j-i; 562 else ncols = n-i; 563 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 564 i = j; 565 } 566 } 567 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 568 baij->roworiented = r1; 569 a->roworiented = r2; 570 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 571 } 572 573 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 575 576 /* determine if any processor has disassembled, if so we must 577 also disassemble ourselfs, in order that we may reassemble. */ 578 /* 579 if nonzero structure of submatrix B cannot change then we know that 580 no processor disassembled thus we can skip this stuff 581 */ 582 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 583 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 584 if (mat->was_assembled && !other_disassembled) { 585 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 586 } 587 } 588 589 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 590 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 591 } 592 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr); 593 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 594 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 595 596 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 597 baij->rowvalues = 0; 598 599 PetscFunctionReturn(0); 600 } 601 602 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 603 #undef __FUNCT__ 604 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 605 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 606 { 607 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 608 PetscErrorCode ierr; 609 PetscInt bs = mat->rmap->bs; 610 PetscMPIInt size = baij->size,rank = baij->rank; 611 PetscBool iascii,isdraw; 612 PetscViewer sviewer; 613 PetscViewerFormat format; 614 615 PetscFunctionBegin; 616 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 617 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 618 if (iascii) { 619 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 620 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 621 MatInfo info; 622 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 623 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 624 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 625 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 626 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 628 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 630 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 631 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 632 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 633 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 634 PetscFunctionReturn(0); 635 } else if (format == PETSC_VIEWER_ASCII_INFO) { 636 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 639 PetscFunctionReturn(0); 640 } 641 } 642 643 if (isdraw) { 644 PetscDraw draw; 645 PetscBool isnull; 646 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 647 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 648 } 649 650 if (size == 1) { 651 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 652 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 653 } else { 654 /* assemble the entire matrix onto first processor. */ 655 Mat A; 656 Mat_SeqSBAIJ *Aloc; 657 Mat_SeqBAIJ *Bloc; 658 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 659 MatScalar *a; 660 661 /* Should this be the same type as mat? */ 662 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 663 if (!rank) { 664 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 665 } else { 666 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 667 } 668 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 669 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 670 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 671 672 /* copy over the A part */ 673 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 674 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 675 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 676 677 for (i=0; i<mbs; i++) { 678 rvals[0] = bs*(baij->rstartbs + i); 679 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 680 for (j=ai[i]; j<ai[i+1]; j++) { 681 col = (baij->cstartbs+aj[j])*bs; 682 for (k=0; k<bs; k++) { 683 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 684 col++; a += bs; 685 } 686 } 687 } 688 /* copy over the B part */ 689 Bloc = (Mat_SeqBAIJ*)baij->B->data; 690 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 691 for (i=0; i<mbs; i++) { 692 693 rvals[0] = bs*(baij->rstartbs + i); 694 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 695 for (j=ai[i]; j<ai[i+1]; j++) { 696 col = baij->garray[aj[j]]*bs; 697 for (k=0; k<bs; k++) { 698 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 699 col++; a += bs; 700 } 701 } 702 } 703 ierr = PetscFree(rvals);CHKERRQ(ierr); 704 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 705 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 706 /* 707 Everyone has to call to draw the matrix since the graphics waits are 708 synchronized across all processors that share the PetscDraw object 709 */ 710 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 711 if (!rank) { 712 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 713 /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/ 714 PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ); 715 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 716 } 717 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 718 ierr = MatDestroy(&A);CHKERRQ(ierr); 719 } 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatView_MPISBAIJ" 725 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 726 { 727 PetscErrorCode ierr; 728 PetscBool iascii,isdraw,issocket,isbinary; 729 730 PetscFunctionBegin; 731 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 732 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 733 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 734 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 735 if (iascii || isdraw || issocket || isbinary) { 736 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 737 } else { 738 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatDestroy_MPISBAIJ" 745 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 746 { 747 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 #if defined(PETSC_USE_LOG) 752 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 753 #endif 754 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 755 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 756 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 757 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 758 #if defined (PETSC_USE_CTABLE) 759 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 760 #else 761 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 762 #endif 763 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 764 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 765 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 766 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 767 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 768 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 769 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 770 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 771 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 772 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 773 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 774 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 775 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 776 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 777 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 778 #if defined(PETSC_USE_REAL_MAT_SINGLE) 779 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 780 #endif 781 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 782 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 783 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 784 ierr = PetscFree(mat->data);CHKERRQ(ierr); 785 786 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 787 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 798 { 799 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 800 PetscErrorCode ierr; 801 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 802 PetscScalar *x,*from; 803 804 PetscFunctionBegin; 805 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 806 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 807 808 /* diagonal part */ 809 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 810 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 811 812 /* subdiagonal part */ 813 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 814 815 /* copy x into the vec slvec0 */ 816 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 819 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 820 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 821 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 822 823 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 /* supperdiagonal part */ 826 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatMult_MPISBAIJ" 832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 833 { 834 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 835 PetscErrorCode ierr; 836 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 837 PetscScalar *x,*from; 838 839 PetscFunctionBegin; 840 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 841 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 842 843 /* diagonal part */ 844 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 845 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 846 847 /* subdiagonal part */ 848 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 849 850 /* copy x into the vec slvec0 */ 851 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 852 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 853 854 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 855 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 856 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 857 858 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 /* supperdiagonal part */ 861 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 868 { 869 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 870 PetscErrorCode ierr; 871 PetscInt nt; 872 873 PetscFunctionBegin; 874 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 875 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 876 877 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 878 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 879 880 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 /* do diagonal part */ 882 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 883 /* do supperdiagonal part */ 884 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 886 /* do subdiagonal part */ 887 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 888 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 889 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 890 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 896 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 897 { 898 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 899 PetscErrorCode ierr; 900 PetscInt mbs=a->mbs,bs=A->rmap->bs; 901 PetscScalar *x,*from,zero=0.0; 902 903 PetscFunctionBegin; 904 /* 905 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 906 PetscSynchronizedFlush(((PetscObject)A)->comm); 907 */ 908 /* diagonal part */ 909 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 910 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 911 912 /* subdiagonal part */ 913 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 914 915 /* copy x into the vec slvec0 */ 916 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 917 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 918 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 919 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 920 921 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 923 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 924 925 /* supperdiagonal part */ 926 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 927 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 934 { 935 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 936 PetscErrorCode ierr; 937 938 PetscFunctionBegin; 939 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 /* do diagonal part */ 941 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 942 /* do supperdiagonal part */ 943 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 945 946 /* do subdiagonal part */ 947 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 948 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 949 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 950 951 PetscFunctionReturn(0); 952 } 953 954 /* 955 This only works correctly for square matrices where the subblock A->A is the 956 diagonal block 957 */ 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 961 { 962 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 967 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatScale_MPISBAIJ" 973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 974 { 975 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 980 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatGetRow_MPISBAIJ" 986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 987 { 988 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 989 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 990 PetscErrorCode ierr; 991 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 992 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 993 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 994 995 PetscFunctionBegin; 996 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 997 mat->getrowactive = PETSC_TRUE; 998 999 if (!mat->rowvalues && (idx || v)) { 1000 /* 1001 allocate enough space to hold information from the longest row. 1002 */ 1003 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1004 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1005 PetscInt max = 1,mbs = mat->mbs,tmp; 1006 for (i=0; i<mbs; i++) { 1007 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1008 if (max < tmp) { max = tmp; } 1009 } 1010 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1011 } 1012 1013 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1014 lrow = row - brstart; /* local row index */ 1015 1016 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1017 if (!v) {pvA = 0; pvB = 0;} 1018 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1019 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1020 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1021 nztot = nzA + nzB; 1022 1023 cmap = mat->garray; 1024 if (v || idx) { 1025 if (nztot) { 1026 /* Sort by increasing column numbers, assuming A and B already sorted */ 1027 PetscInt imark = -1; 1028 if (v) { 1029 *v = v_p = mat->rowvalues; 1030 for (i=0; i<nzB; i++) { 1031 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1032 else break; 1033 } 1034 imark = i; 1035 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1036 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1037 } 1038 if (idx) { 1039 *idx = idx_p = mat->rowindices; 1040 if (imark > -1) { 1041 for (i=0; i<imark; i++) { 1042 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1043 } 1044 } else { 1045 for (i=0; i<nzB; i++) { 1046 if (cmap[cworkB[i]/bs] < cstart) 1047 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1048 else break; 1049 } 1050 imark = i; 1051 } 1052 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1053 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1054 } 1055 } else { 1056 if (idx) *idx = 0; 1057 if (v) *v = 0; 1058 } 1059 } 1060 *nz = nztot; 1061 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1062 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1068 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1069 { 1070 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1071 1072 PetscFunctionBegin; 1073 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1074 baij->getrowactive = PETSC_FALSE; 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1080 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1081 { 1082 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1083 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1084 1085 PetscFunctionBegin; 1086 aA->getrow_utriangular = PETSC_TRUE; 1087 PetscFunctionReturn(0); 1088 } 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1091 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1092 { 1093 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1094 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1095 1096 PetscFunctionBegin; 1097 aA->getrow_utriangular = PETSC_FALSE; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1103 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1104 { 1105 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1110 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1116 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1117 { 1118 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1123 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1129 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1130 { 1131 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1136 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1142 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1143 { 1144 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1145 Mat A = a->A,B = a->B; 1146 PetscErrorCode ierr; 1147 PetscReal isend[5],irecv[5]; 1148 1149 PetscFunctionBegin; 1150 info->block_size = (PetscReal)matin->rmap->bs; 1151 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1152 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1153 isend[3] = info->memory; isend[4] = info->mallocs; 1154 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1155 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1156 isend[3] += info->memory; isend[4] += info->mallocs; 1157 if (flag == MAT_LOCAL) { 1158 info->nz_used = isend[0]; 1159 info->nz_allocated = isend[1]; 1160 info->nz_unneeded = isend[2]; 1161 info->memory = isend[3]; 1162 info->mallocs = isend[4]; 1163 } else if (flag == MAT_GLOBAL_MAX) { 1164 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1165 info->nz_used = irecv[0]; 1166 info->nz_allocated = irecv[1]; 1167 info->nz_unneeded = irecv[2]; 1168 info->memory = irecv[3]; 1169 info->mallocs = irecv[4]; 1170 } else if (flag == MAT_GLOBAL_SUM) { 1171 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1172 info->nz_used = irecv[0]; 1173 info->nz_allocated = irecv[1]; 1174 info->nz_unneeded = irecv[2]; 1175 info->memory = irecv[3]; 1176 info->mallocs = irecv[4]; 1177 } else { 1178 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1179 } 1180 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1181 info->fill_ratio_needed = 0; 1182 info->factor_mallocs = 0; 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1189 { 1190 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1191 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 switch (op) { 1196 case MAT_NEW_NONZERO_LOCATIONS: 1197 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1198 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1199 case MAT_KEEP_NONZERO_PATTERN: 1200 case MAT_NEW_NONZERO_LOCATION_ERR: 1201 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1202 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1203 break; 1204 case MAT_ROW_ORIENTED: 1205 a->roworiented = flg; 1206 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1207 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1208 break; 1209 case MAT_NEW_DIAGONALS: 1210 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1211 break; 1212 case MAT_IGNORE_OFF_PROC_ENTRIES: 1213 a->donotstash = flg; 1214 break; 1215 case MAT_USE_HASH_TABLE: 1216 a->ht_flag = flg; 1217 break; 1218 case MAT_HERMITIAN: 1219 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1220 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1221 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1222 break; 1223 case MAT_SPD: 1224 A->spd_set = PETSC_TRUE; 1225 A->spd = flg; 1226 if (flg) { 1227 A->symmetric = PETSC_TRUE; 1228 A->structurally_symmetric = PETSC_TRUE; 1229 A->symmetric_set = PETSC_TRUE; 1230 A->structurally_symmetric_set = PETSC_TRUE; 1231 } 1232 break; 1233 case MAT_SYMMETRIC: 1234 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1235 break; 1236 case MAT_STRUCTURALLY_SYMMETRIC: 1237 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1238 break; 1239 case MAT_SYMMETRY_ETERNAL: 1240 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1241 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1242 break; 1243 case MAT_IGNORE_LOWER_TRIANGULAR: 1244 aA->ignore_ltriangular = flg; 1245 break; 1246 case MAT_ERROR_LOWER_TRIANGULAR: 1247 aA->ignore_ltriangular = flg; 1248 break; 1249 case MAT_GETROW_UPPERTRIANGULAR: 1250 aA->getrow_utriangular = flg; 1251 break; 1252 default: 1253 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1260 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1261 { 1262 PetscErrorCode ierr; 1263 PetscFunctionBegin; 1264 if (MAT_INITIAL_MATRIX || *B != A) { 1265 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1266 } 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1272 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1273 { 1274 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1275 Mat a=baij->A, b=baij->B; 1276 PetscErrorCode ierr; 1277 PetscInt nv,m,n; 1278 PetscBool flg; 1279 1280 PetscFunctionBegin; 1281 if (ll != rr){ 1282 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1283 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1284 } 1285 if (!ll) PetscFunctionReturn(0); 1286 1287 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1288 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1289 1290 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1291 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1292 1293 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1294 1295 /* left diagonalscale the off-diagonal part */ 1296 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1297 1298 /* scale the diagonal part */ 1299 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1300 1301 /* right diagonalscale the off-diagonal part */ 1302 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1303 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1309 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1310 { 1311 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatEqual_MPISBAIJ" 1323 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1324 { 1325 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1326 Mat a,b,c,d; 1327 PetscBool flg; 1328 PetscErrorCode ierr; 1329 1330 PetscFunctionBegin; 1331 a = matA->A; b = matA->B; 1332 c = matB->A; d = matB->B; 1333 1334 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1335 if (flg) { 1336 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1337 } 1338 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatCopy_MPISBAIJ" 1344 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1345 { 1346 PetscErrorCode ierr; 1347 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1348 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1349 1350 PetscFunctionBegin; 1351 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1352 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1353 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1354 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1355 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1356 } else { 1357 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1358 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1359 } 1360 PetscFunctionReturn(0); 1361 } 1362 1363 #undef __FUNCT__ 1364 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1365 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1366 { 1367 PetscErrorCode ierr; 1368 1369 PetscFunctionBegin; 1370 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1376 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1377 { 1378 PetscErrorCode ierr; 1379 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1380 PetscBLASInt bnz,one=1; 1381 Mat_SeqSBAIJ *xa,*ya; 1382 Mat_SeqBAIJ *xb,*yb; 1383 1384 PetscFunctionBegin; 1385 if (str == SAME_NONZERO_PATTERN) { 1386 PetscScalar alpha = a; 1387 xa = (Mat_SeqSBAIJ *)xx->A->data; 1388 ya = (Mat_SeqSBAIJ *)yy->A->data; 1389 bnz = PetscBLASIntCast(xa->nz); 1390 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1391 xb = (Mat_SeqBAIJ *)xx->B->data; 1392 yb = (Mat_SeqBAIJ *)yy->B->data; 1393 bnz = PetscBLASIntCast(xb->nz); 1394 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1395 } else { 1396 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1397 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1398 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1405 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1406 { 1407 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1408 PetscInt rbs,cbs; 1409 PetscErrorCode ierr; 1410 1411 PetscFunctionBegin; 1412 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1413 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1414 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1415 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1416 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1417 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1423 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1424 { 1425 PetscErrorCode ierr; 1426 PetscInt i; 1427 PetscBool flg; 1428 1429 PetscFunctionBegin; 1430 for (i=0; i<n; i++) { 1431 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1432 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1433 } 1434 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 1439 /* -------------------------------------------------------------------*/ 1440 static struct _MatOps MatOps_Values = { 1441 MatSetValues_MPISBAIJ, 1442 MatGetRow_MPISBAIJ, 1443 MatRestoreRow_MPISBAIJ, 1444 MatMult_MPISBAIJ, 1445 /* 4*/ MatMultAdd_MPISBAIJ, 1446 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1447 MatMultAdd_MPISBAIJ, 1448 0, 1449 0, 1450 0, 1451 /*10*/ 0, 1452 0, 1453 0, 1454 MatSOR_MPISBAIJ, 1455 MatTranspose_MPISBAIJ, 1456 /*15*/ MatGetInfo_MPISBAIJ, 1457 MatEqual_MPISBAIJ, 1458 MatGetDiagonal_MPISBAIJ, 1459 MatDiagonalScale_MPISBAIJ, 1460 MatNorm_MPISBAIJ, 1461 /*20*/ MatAssemblyBegin_MPISBAIJ, 1462 MatAssemblyEnd_MPISBAIJ, 1463 MatSetOption_MPISBAIJ, 1464 MatZeroEntries_MPISBAIJ, 1465 /*24*/ 0, 1466 0, 1467 0, 1468 0, 1469 0, 1470 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1471 0, 1472 0, 1473 0, 1474 0, 1475 /*34*/ MatDuplicate_MPISBAIJ, 1476 0, 1477 0, 1478 0, 1479 0, 1480 /*39*/ MatAXPY_MPISBAIJ, 1481 MatGetSubMatrices_MPISBAIJ, 1482 MatIncreaseOverlap_MPISBAIJ, 1483 MatGetValues_MPISBAIJ, 1484 MatCopy_MPISBAIJ, 1485 /*44*/ 0, 1486 MatScale_MPISBAIJ, 1487 0, 1488 0, 1489 0, 1490 /*49*/ MatSetBlockSize_MPISBAIJ, 1491 0, 1492 0, 1493 0, 1494 0, 1495 /*54*/ 0, 1496 0, 1497 MatSetUnfactored_MPISBAIJ, 1498 0, 1499 MatSetValuesBlocked_MPISBAIJ, 1500 /*59*/ 0, 1501 0, 1502 0, 1503 0, 1504 0, 1505 /*64*/ 0, 1506 0, 1507 0, 1508 0, 1509 0, 1510 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1511 0, 1512 0, 1513 0, 1514 0, 1515 /*74*/ 0, 1516 0, 1517 0, 1518 0, 1519 0, 1520 /*79*/ 0, 1521 0, 1522 0, 1523 0, 1524 MatLoad_MPISBAIJ, 1525 /*84*/ 0, 1526 0, 1527 0, 1528 0, 1529 0, 1530 /*89*/ 0, 1531 0, 1532 0, 1533 0, 1534 0, 1535 /*94*/ 0, 1536 0, 1537 0, 1538 0, 1539 0, 1540 /*99*/ 0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 /*104*/0, 1546 MatRealPart_MPISBAIJ, 1547 MatImaginaryPart_MPISBAIJ, 1548 MatGetRowUpperTriangular_MPISBAIJ, 1549 MatRestoreRowUpperTriangular_MPISBAIJ, 1550 /*109*/0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 /*114*/0, 1556 0, 1557 0, 1558 0, 1559 0, 1560 /*119*/0, 1561 0, 1562 0, 1563 0 1564 }; 1565 1566 1567 EXTERN_C_BEGIN 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1570 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1571 { 1572 PetscFunctionBegin; 1573 *a = ((Mat_MPISBAIJ *)A->data)->A; 1574 PetscFunctionReturn(0); 1575 } 1576 EXTERN_C_END 1577 1578 EXTERN_C_BEGIN 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1581 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1582 { 1583 Mat_MPISBAIJ *b; 1584 PetscErrorCode ierr; 1585 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1586 1587 PetscFunctionBegin; 1588 if (bs < 0){ 1589 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1590 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1591 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1592 bs = PetscAbs(bs); 1593 } 1594 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1595 bs = newbs; 1596 1597 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1598 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1599 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1600 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1601 1602 B->rmap->bs = B->cmap->bs = bs; 1603 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1604 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1605 1606 if (d_nnz) { 1607 for (i=0; i<B->rmap->n/bs; i++) { 1608 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1609 } 1610 } 1611 if (o_nnz) { 1612 for (i=0; i<B->rmap->n/bs; i++) { 1613 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1614 } 1615 } 1616 1617 b = (Mat_MPISBAIJ*)B->data; 1618 mbs = B->rmap->n/bs; 1619 Mbs = B->rmap->N/bs; 1620 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1621 1622 B->rmap->bs = bs; 1623 b->bs2 = bs*bs; 1624 b->mbs = mbs; 1625 b->nbs = mbs; 1626 b->Mbs = Mbs; 1627 b->Nbs = Mbs; 1628 1629 for (i=0; i<=b->size; i++) { 1630 b->rangebs[i] = B->rmap->range[i]/bs; 1631 } 1632 b->rstartbs = B->rmap->rstart/bs; 1633 b->rendbs = B->rmap->rend/bs; 1634 1635 b->cstartbs = B->cmap->rstart/bs; 1636 b->cendbs = B->cmap->rend/bs; 1637 1638 if (!B->preallocated) { 1639 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1640 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1641 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1642 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1643 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1644 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1645 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1646 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1647 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1648 } 1649 1650 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1651 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1652 B->preallocated = PETSC_TRUE; 1653 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1654 PetscFunctionReturn(0); 1655 } 1656 EXTERN_C_END 1657 1658 EXTERN_C_BEGIN 1659 #undef __FUNCT__ 1660 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1661 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1662 { 1663 PetscInt m,rstart,cstart,cend; 1664 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1665 const PetscInt *JJ=0; 1666 PetscScalar *values=0; 1667 PetscErrorCode ierr; 1668 1669 PetscFunctionBegin; 1670 1671 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1672 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1673 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1674 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1675 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1676 m = B->rmap->n/bs; 1677 rstart = B->rmap->rstart/bs; 1678 cstart = B->cmap->rstart/bs; 1679 cend = B->cmap->rend/bs; 1680 1681 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1682 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 1683 for (i=0; i<m; i++) { 1684 nz = ii[i+1] - ii[i]; 1685 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1686 nz_max = PetscMax(nz_max,nz); 1687 JJ = jj + ii[i]; 1688 for (j=0; j<nz; j++) { 1689 if (*JJ >= cstart) break; 1690 JJ++; 1691 } 1692 d = 0; 1693 for (; j<nz; j++) { 1694 if (*JJ++ >= cend) break; 1695 d++; 1696 } 1697 d_nnz[i] = d; 1698 o_nnz[i] = nz - d; 1699 } 1700 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1701 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1702 1703 values = (PetscScalar*)V; 1704 if (!values) { 1705 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 1706 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1707 } 1708 for (i=0; i<m; i++) { 1709 PetscInt row = i + rstart; 1710 PetscInt ncols = ii[i+1] - ii[i]; 1711 const PetscInt *icols = jj + ii[i]; 1712 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1713 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1714 } 1715 1716 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1717 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1718 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1719 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1720 PetscFunctionReturn(0); 1721 } 1722 EXTERN_C_END 1723 1724 EXTERN_C_BEGIN 1725 #if defined(PETSC_HAVE_MUMPS) 1726 extern PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1727 #endif 1728 #if defined(PETSC_HAVE_SPOOLES) 1729 extern PetscErrorCode MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1730 #endif 1731 #if defined(PETSC_HAVE_PASTIX) 1732 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1733 #endif 1734 EXTERN_C_END 1735 1736 /*MC 1737 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1738 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1739 the matrix is stored. 1740 1741 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1742 can call MatSetOption(Mat, MAT_HERMITIAN); 1743 1744 Options Database Keys: 1745 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1746 1747 Level: beginner 1748 1749 .seealso: MatCreateMPISBAIJ 1750 M*/ 1751 1752 EXTERN_C_BEGIN 1753 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,const MatType,MatReuse,Mat*); 1754 EXTERN_C_END 1755 1756 EXTERN_C_BEGIN 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatCreate_MPISBAIJ" 1759 PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1760 { 1761 Mat_MPISBAIJ *b; 1762 PetscErrorCode ierr; 1763 PetscBool flg; 1764 1765 PetscFunctionBegin; 1766 1767 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1768 B->data = (void*)b; 1769 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1770 1771 B->ops->destroy = MatDestroy_MPISBAIJ; 1772 B->ops->view = MatView_MPISBAIJ; 1773 B->assembled = PETSC_FALSE; 1774 1775 B->insertmode = NOT_SET_VALUES; 1776 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1777 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1778 1779 /* build local table of row and column ownerships */ 1780 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1781 1782 /* build cache for off array entries formed */ 1783 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1784 b->donotstash = PETSC_FALSE; 1785 b->colmap = PETSC_NULL; 1786 b->garray = PETSC_NULL; 1787 b->roworiented = PETSC_TRUE; 1788 1789 /* stuff used in block assembly */ 1790 b->barray = 0; 1791 1792 /* stuff used for matrix vector multiply */ 1793 b->lvec = 0; 1794 b->Mvctx = 0; 1795 b->slvec0 = 0; 1796 b->slvec0b = 0; 1797 b->slvec1 = 0; 1798 b->slvec1a = 0; 1799 b->slvec1b = 0; 1800 b->sMvctx = 0; 1801 1802 /* stuff for MatGetRow() */ 1803 b->rowindices = 0; 1804 b->rowvalues = 0; 1805 b->getrowactive = PETSC_FALSE; 1806 1807 /* hash table stuff */ 1808 b->ht = 0; 1809 b->hd = 0; 1810 b->ht_size = 0; 1811 b->ht_flag = PETSC_FALSE; 1812 b->ht_fact = 0; 1813 b->ht_total_ct = 0; 1814 b->ht_insert_ct = 0; 1815 1816 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1817 b->ijonly = PETSC_FALSE; 1818 1819 b->in_loc = 0; 1820 b->v_loc = 0; 1821 b->n_loc = 0; 1822 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1823 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1824 if (flg) { 1825 PetscReal fact = 1.39; 1826 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1827 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1828 if (fact <= 1.0) fact = 1.39; 1829 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1830 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1831 } 1832 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1833 1834 #if defined(PETSC_HAVE_PASTIX) 1835 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1836 "MatGetFactor_mpisbaij_pastix", 1837 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1838 #endif 1839 #if defined(PETSC_HAVE_MUMPS) 1840 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1841 "MatGetFactor_sbaij_mumps", 1842 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1843 #endif 1844 #if defined(PETSC_HAVE_SPOOLES) 1845 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1846 "MatGetFactor_mpisbaij_spooles", 1847 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1848 #endif 1849 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1850 "MatStoreValues_MPISBAIJ", 1851 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1852 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1853 "MatRetrieveValues_MPISBAIJ", 1854 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1855 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1856 "MatGetDiagonalBlock_MPISBAIJ", 1857 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1858 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1859 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1860 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1861 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C", 1862 "MatMPISBAIJSetPreallocationCSR_MPISBAIJ", 1863 MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1864 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C", 1865 "MatConvert_MPISBAIJ_MPISBSTRM", 1866 MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1867 1868 B->symmetric = PETSC_TRUE; 1869 B->structurally_symmetric = PETSC_TRUE; 1870 B->symmetric_set = PETSC_TRUE; 1871 B->structurally_symmetric_set = PETSC_TRUE; 1872 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1873 PetscFunctionReturn(0); 1874 } 1875 EXTERN_C_END 1876 1877 /*MC 1878 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1879 1880 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1881 and MATMPISBAIJ otherwise. 1882 1883 Options Database Keys: 1884 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1885 1886 Level: beginner 1887 1888 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1889 M*/ 1890 1891 #undef __FUNCT__ 1892 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1893 /*@C 1894 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1895 the user should preallocate the matrix storage by setting the parameters 1896 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1897 performance can be increased by more than a factor of 50. 1898 1899 Collective on Mat 1900 1901 Input Parameters: 1902 + A - the matrix 1903 . bs - size of blockk 1904 . d_nz - number of block nonzeros per block row in diagonal portion of local 1905 submatrix (same for all local rows) 1906 . d_nnz - array containing the number of block nonzeros in the various block rows 1907 in the upper triangular and diagonal part of the in diagonal portion of the local 1908 (possibly different for each block row) or PETSC_NULL. You must leave room 1909 for the diagonal entry even if it is zero. 1910 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1911 submatrix (same for all local rows). 1912 - o_nnz - array containing the number of nonzeros in the various block rows of the 1913 off-diagonal portion of the local submatrix that is right of the diagonal 1914 (possibly different for each block row) or PETSC_NULL. 1915 1916 1917 Options Database Keys: 1918 . -mat_no_unroll - uses code that does not unroll the loops in the 1919 block calculations (much slower) 1920 . -mat_block_size - size of the blocks to use 1921 1922 Notes: 1923 1924 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1925 than it must be used on all processors that share the object for that argument. 1926 1927 If the *_nnz parameter is given then the *_nz parameter is ignored 1928 1929 Storage Information: 1930 For a square global matrix we define each processor's diagonal portion 1931 to be its local rows and the corresponding columns (a square submatrix); 1932 each processor's off-diagonal portion encompasses the remainder of the 1933 local matrix (a rectangular submatrix). 1934 1935 The user can specify preallocated storage for the diagonal part of 1936 the local submatrix with either d_nz or d_nnz (not both). Set 1937 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1938 memory allocation. Likewise, specify preallocated storage for the 1939 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1940 1941 You can call MatGetInfo() to get information on how effective the preallocation was; 1942 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1943 You can also run with the option -info and look for messages with the string 1944 malloc in them to see if additional memory allocation was needed. 1945 1946 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1947 the figure below we depict these three local rows and all columns (0-11). 1948 1949 .vb 1950 0 1 2 3 4 5 6 7 8 9 10 11 1951 ------------------- 1952 row 3 | . . . d d d o o o o o o 1953 row 4 | . . . d d d o o o o o o 1954 row 5 | . . . d d d o o o o o o 1955 ------------------- 1956 .ve 1957 1958 Thus, any entries in the d locations are stored in the d (diagonal) 1959 submatrix, and any entries in the o locations are stored in the 1960 o (off-diagonal) submatrix. Note that the d matrix is stored in 1961 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1962 1963 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1964 plus the diagonal part of the d matrix, 1965 and o_nz should indicate the number of block nonzeros per row in the o matrix 1966 1967 In general, for PDE problems in which most nonzeros are near the diagonal, 1968 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1969 or you will get TERRIBLE performance; see the users' manual chapter on 1970 matrices. 1971 1972 Level: intermediate 1973 1974 .keywords: matrix, block, aij, compressed row, sparse, parallel 1975 1976 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1977 @*/ 1978 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1979 { 1980 PetscErrorCode ierr; 1981 1982 PetscFunctionBegin; 1983 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1984 PetscValidType(B,1); 1985 PetscValidLogicalCollectiveInt(B,bs,2); 1986 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 1987 PetscFunctionReturn(0); 1988 } 1989 1990 #undef __FUNCT__ 1991 #define __FUNCT__ "MatCreateMPISBAIJ" 1992 /*@C 1993 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1994 (block compressed row). For good matrix assembly performance 1995 the user should preallocate the matrix storage by setting the parameters 1996 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1997 performance can be increased by more than a factor of 50. 1998 1999 Collective on MPI_Comm 2000 2001 Input Parameters: 2002 + comm - MPI communicator 2003 . bs - size of blockk 2004 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2005 This value should be the same as the local size used in creating the 2006 y vector for the matrix-vector product y = Ax. 2007 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2008 This value should be the same as the local size used in creating the 2009 x vector for the matrix-vector product y = Ax. 2010 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2011 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2012 . d_nz - number of block nonzeros per block row in diagonal portion of local 2013 submatrix (same for all local rows) 2014 . d_nnz - array containing the number of block nonzeros in the various block rows 2015 in the upper triangular portion of the in diagonal portion of the local 2016 (possibly different for each block block row) or PETSC_NULL. 2017 You must leave room for the diagonal entry even if it is zero. 2018 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2019 submatrix (same for all local rows). 2020 - o_nnz - array containing the number of nonzeros in the various block rows of the 2021 off-diagonal portion of the local submatrix (possibly different for 2022 each block row) or PETSC_NULL. 2023 2024 Output Parameter: 2025 . A - the matrix 2026 2027 Options Database Keys: 2028 . -mat_no_unroll - uses code that does not unroll the loops in the 2029 block calculations (much slower) 2030 . -mat_block_size - size of the blocks to use 2031 . -mat_mpi - use the parallel matrix data structures even on one processor 2032 (defaults to using SeqBAIJ format on one processor) 2033 2034 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2035 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2036 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2037 2038 Notes: 2039 The number of rows and columns must be divisible by blocksize. 2040 This matrix type does not support complex Hermitian operation. 2041 2042 The user MUST specify either the local or global matrix dimensions 2043 (possibly both). 2044 2045 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2046 than it must be used on all processors that share the object for that argument. 2047 2048 If the *_nnz parameter is given then the *_nz parameter is ignored 2049 2050 Storage Information: 2051 For a square global matrix we define each processor's diagonal portion 2052 to be its local rows and the corresponding columns (a square submatrix); 2053 each processor's off-diagonal portion encompasses the remainder of the 2054 local matrix (a rectangular submatrix). 2055 2056 The user can specify preallocated storage for the diagonal part of 2057 the local submatrix with either d_nz or d_nnz (not both). Set 2058 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2059 memory allocation. Likewise, specify preallocated storage for the 2060 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2061 2062 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2063 the figure below we depict these three local rows and all columns (0-11). 2064 2065 .vb 2066 0 1 2 3 4 5 6 7 8 9 10 11 2067 ------------------- 2068 row 3 | . . . d d d o o o o o o 2069 row 4 | . . . d d d o o o o o o 2070 row 5 | . . . d d d o o o o o o 2071 ------------------- 2072 .ve 2073 2074 Thus, any entries in the d locations are stored in the d (diagonal) 2075 submatrix, and any entries in the o locations are stored in the 2076 o (off-diagonal) submatrix. Note that the d matrix is stored in 2077 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2078 2079 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2080 plus the diagonal part of the d matrix, 2081 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2082 In general, for PDE problems in which most nonzeros are near the diagonal, 2083 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2084 or you will get TERRIBLE performance; see the users' manual chapter on 2085 matrices. 2086 2087 Level: intermediate 2088 2089 .keywords: matrix, block, aij, compressed row, sparse, parallel 2090 2091 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2092 @*/ 2093 2094 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2095 { 2096 PetscErrorCode ierr; 2097 PetscMPIInt size; 2098 2099 PetscFunctionBegin; 2100 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2101 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2102 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2103 if (size > 1) { 2104 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2105 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2106 } else { 2107 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2108 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2109 } 2110 PetscFunctionReturn(0); 2111 } 2112 2113 2114 #undef __FUNCT__ 2115 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2116 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2117 { 2118 Mat mat; 2119 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2120 PetscErrorCode ierr; 2121 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2122 PetscScalar *array; 2123 2124 PetscFunctionBegin; 2125 *newmat = 0; 2126 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2127 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2128 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2129 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2130 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2131 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2132 2133 mat->factortype = matin->factortype; 2134 mat->preallocated = PETSC_TRUE; 2135 mat->assembled = PETSC_TRUE; 2136 mat->insertmode = NOT_SET_VALUES; 2137 2138 a = (Mat_MPISBAIJ*)mat->data; 2139 a->bs2 = oldmat->bs2; 2140 a->mbs = oldmat->mbs; 2141 a->nbs = oldmat->nbs; 2142 a->Mbs = oldmat->Mbs; 2143 a->Nbs = oldmat->Nbs; 2144 2145 2146 a->size = oldmat->size; 2147 a->rank = oldmat->rank; 2148 a->donotstash = oldmat->donotstash; 2149 a->roworiented = oldmat->roworiented; 2150 a->rowindices = 0; 2151 a->rowvalues = 0; 2152 a->getrowactive = PETSC_FALSE; 2153 a->barray = 0; 2154 a->rstartbs = oldmat->rstartbs; 2155 a->rendbs = oldmat->rendbs; 2156 a->cstartbs = oldmat->cstartbs; 2157 a->cendbs = oldmat->cendbs; 2158 2159 /* hash table stuff */ 2160 a->ht = 0; 2161 a->hd = 0; 2162 a->ht_size = 0; 2163 a->ht_flag = oldmat->ht_flag; 2164 a->ht_fact = oldmat->ht_fact; 2165 a->ht_total_ct = 0; 2166 a->ht_insert_ct = 0; 2167 2168 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2169 if (oldmat->colmap) { 2170 #if defined (PETSC_USE_CTABLE) 2171 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2172 #else 2173 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2174 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2175 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2176 #endif 2177 } else a->colmap = 0; 2178 2179 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2180 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2181 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2182 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2183 } else a->garray = 0; 2184 2185 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2186 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2187 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2188 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2189 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2190 2191 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2192 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2193 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2194 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2195 2196 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2197 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2198 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2199 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2200 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2201 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2202 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2203 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2204 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2205 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2206 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2207 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2208 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2209 2210 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2211 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2212 a->sMvctx = oldmat->sMvctx; 2213 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2214 2215 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2216 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2217 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2218 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2219 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2220 *newmat = mat; 2221 PetscFunctionReturn(0); 2222 } 2223 2224 #undef __FUNCT__ 2225 #define __FUNCT__ "MatLoad_MPISBAIJ" 2226 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2227 { 2228 PetscErrorCode ierr; 2229 PetscInt i,nz,j,rstart,rend; 2230 PetscScalar *vals,*buf; 2231 MPI_Comm comm = ((PetscObject)viewer)->comm; 2232 MPI_Status status; 2233 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2234 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2235 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2236 PetscInt bs=1,Mbs,mbs,extra_rows; 2237 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2238 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2239 int fd; 2240 2241 PetscFunctionBegin; 2242 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2243 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2244 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2245 2246 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2247 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2248 if (!rank) { 2249 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2250 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2251 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2252 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2253 } 2254 2255 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2256 2257 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2258 M = header[1]; N = header[2]; 2259 2260 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2261 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2262 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2263 2264 /* If global sizes are set, check if they are consistent with that given in the file */ 2265 if (sizesset) { 2266 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2267 } 2268 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2269 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2270 2271 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2272 2273 /* 2274 This code adds extra rows to make sure the number of rows is 2275 divisible by the blocksize 2276 */ 2277 Mbs = M/bs; 2278 extra_rows = bs - M + bs*(Mbs); 2279 if (extra_rows == bs) extra_rows = 0; 2280 else Mbs++; 2281 if (extra_rows &&!rank) { 2282 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2283 } 2284 2285 /* determine ownership of all rows */ 2286 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2287 mbs = Mbs/size + ((Mbs % size) > rank); 2288 m = mbs*bs; 2289 } else { /* User Set */ 2290 m = newmat->rmap->n; 2291 mbs = m/bs; 2292 } 2293 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2294 mmbs = PetscMPIIntCast(mbs); 2295 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2296 rowners[0] = 0; 2297 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2298 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2299 rstart = rowners[rank]; 2300 rend = rowners[rank+1]; 2301 2302 /* distribute row lengths to all processors */ 2303 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2304 if (!rank) { 2305 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2306 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2307 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2308 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2309 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2310 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2311 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2312 } else { 2313 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2314 } 2315 2316 if (!rank) { /* procs[0] */ 2317 /* calculate the number of nonzeros on each processor */ 2318 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2319 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2320 for (i=0; i<size; i++) { 2321 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2322 procsnz[i] += rowlengths[j]; 2323 } 2324 } 2325 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2326 2327 /* determine max buffer needed and allocate it */ 2328 maxnz = 0; 2329 for (i=0; i<size; i++) { 2330 maxnz = PetscMax(maxnz,procsnz[i]); 2331 } 2332 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2333 2334 /* read in my part of the matrix column indices */ 2335 nz = procsnz[0]; 2336 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2337 mycols = ibuf; 2338 if (size == 1) nz -= extra_rows; 2339 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2340 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2341 2342 /* read in every ones (except the last) and ship off */ 2343 for (i=1; i<size-1; i++) { 2344 nz = procsnz[i]; 2345 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2346 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2347 } 2348 /* read in the stuff for the last proc */ 2349 if (size != 1) { 2350 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2351 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2352 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2353 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2354 } 2355 ierr = PetscFree(cols);CHKERRQ(ierr); 2356 } else { /* procs[i], i>0 */ 2357 /* determine buffer space needed for message */ 2358 nz = 0; 2359 for (i=0; i<m; i++) { 2360 nz += locrowlens[i]; 2361 } 2362 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2363 mycols = ibuf; 2364 /* receive message of column indices*/ 2365 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2366 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2367 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2368 } 2369 2370 /* loop over local rows, determining number of off diagonal entries */ 2371 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2372 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2373 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2374 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2375 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2376 rowcount = 0; 2377 nzcount = 0; 2378 for (i=0; i<mbs; i++) { 2379 dcount = 0; 2380 odcount = 0; 2381 for (j=0; j<bs; j++) { 2382 kmax = locrowlens[rowcount]; 2383 for (k=0; k<kmax; k++) { 2384 tmp = mycols[nzcount++]/bs; /* block col. index */ 2385 if (!mask[tmp]) { 2386 mask[tmp] = 1; 2387 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2388 else masked1[dcount++] = tmp; /* entry in diag portion */ 2389 } 2390 } 2391 rowcount++; 2392 } 2393 2394 dlens[i] = dcount; /* d_nzz[i] */ 2395 odlens[i] = odcount; /* o_nzz[i] */ 2396 2397 /* zero out the mask elements we set */ 2398 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2399 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2400 } 2401 if (!sizesset) { 2402 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2403 } 2404 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2405 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2406 2407 if (!rank) { 2408 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2409 /* read in my part of the matrix numerical values */ 2410 nz = procsnz[0]; 2411 vals = buf; 2412 mycols = ibuf; 2413 if (size == 1) nz -= extra_rows; 2414 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2415 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2416 2417 /* insert into matrix */ 2418 jj = rstart*bs; 2419 for (i=0; i<m; i++) { 2420 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2421 mycols += locrowlens[i]; 2422 vals += locrowlens[i]; 2423 jj++; 2424 } 2425 2426 /* read in other processors (except the last one) and ship out */ 2427 for (i=1; i<size-1; i++) { 2428 nz = procsnz[i]; 2429 vals = buf; 2430 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2431 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2432 } 2433 /* the last proc */ 2434 if (size != 1){ 2435 nz = procsnz[i] - extra_rows; 2436 vals = buf; 2437 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2438 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2439 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2440 } 2441 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2442 2443 } else { 2444 /* receive numeric values */ 2445 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2446 2447 /* receive message of values*/ 2448 vals = buf; 2449 mycols = ibuf; 2450 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2451 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2452 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2453 2454 /* insert into matrix */ 2455 jj = rstart*bs; 2456 for (i=0; i<m; i++) { 2457 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2458 mycols += locrowlens[i]; 2459 vals += locrowlens[i]; 2460 jj++; 2461 } 2462 } 2463 2464 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2465 ierr = PetscFree(buf);CHKERRQ(ierr); 2466 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2467 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2468 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2469 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2470 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2471 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 #undef __FUNCT__ 2476 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2477 /*XXXXX@ 2478 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2479 2480 Input Parameters: 2481 . mat - the matrix 2482 . fact - factor 2483 2484 Not Collective on Mat, each process can have a different hash factor 2485 2486 Level: advanced 2487 2488 Notes: 2489 This can also be set by the command line option: -mat_use_hash_table fact 2490 2491 .keywords: matrix, hashtable, factor, HT 2492 2493 .seealso: MatSetOption() 2494 @XXXXX*/ 2495 2496 2497 #undef __FUNCT__ 2498 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2499 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2500 { 2501 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2502 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2503 PetscReal atmp; 2504 PetscReal *work,*svalues,*rvalues; 2505 PetscErrorCode ierr; 2506 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2507 PetscMPIInt rank,size; 2508 PetscInt *rowners_bs,dest,count,source; 2509 PetscScalar *va; 2510 MatScalar *ba; 2511 MPI_Status stat; 2512 2513 PetscFunctionBegin; 2514 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2515 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2516 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2517 2518 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2519 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2520 2521 bs = A->rmap->bs; 2522 mbs = a->mbs; 2523 Mbs = a->Mbs; 2524 ba = b->a; 2525 bi = b->i; 2526 bj = b->j; 2527 2528 /* find ownerships */ 2529 rowners_bs = A->rmap->range; 2530 2531 /* each proc creates an array to be distributed */ 2532 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2533 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2534 2535 /* row_max for B */ 2536 if (rank != size-1){ 2537 for (i=0; i<mbs; i++) { 2538 ncols = bi[1] - bi[0]; bi++; 2539 brow = bs*i; 2540 for (j=0; j<ncols; j++){ 2541 bcol = bs*(*bj); 2542 for (kcol=0; kcol<bs; kcol++){ 2543 col = bcol + kcol; /* local col index */ 2544 col += rowners_bs[rank+1]; /* global col index */ 2545 for (krow=0; krow<bs; krow++){ 2546 atmp = PetscAbsScalar(*ba); ba++; 2547 row = brow + krow; /* local row index */ 2548 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2549 if (work[col] < atmp) work[col] = atmp; 2550 } 2551 } 2552 bj++; 2553 } 2554 } 2555 2556 /* send values to its owners */ 2557 for (dest=rank+1; dest<size; dest++){ 2558 svalues = work + rowners_bs[dest]; 2559 count = rowners_bs[dest+1]-rowners_bs[dest]; 2560 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2561 } 2562 } 2563 2564 /* receive values */ 2565 if (rank){ 2566 rvalues = work; 2567 count = rowners_bs[rank+1]-rowners_bs[rank]; 2568 for (source=0; source<rank; source++){ 2569 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2570 /* process values */ 2571 for (i=0; i<count; i++){ 2572 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2573 } 2574 } 2575 } 2576 2577 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2578 ierr = PetscFree(work);CHKERRQ(ierr); 2579 PetscFunctionReturn(0); 2580 } 2581 2582 #undef __FUNCT__ 2583 #define __FUNCT__ "MatSOR_MPISBAIJ" 2584 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2585 { 2586 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2587 PetscErrorCode ierr; 2588 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2589 PetscScalar *x,*ptr,*from; 2590 Vec bb1; 2591 const PetscScalar *b; 2592 2593 PetscFunctionBegin; 2594 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2595 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2596 2597 if (flag == SOR_APPLY_UPPER) { 2598 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2599 PetscFunctionReturn(0); 2600 } 2601 2602 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2603 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2604 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2605 its--; 2606 } 2607 2608 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2609 while (its--){ 2610 2611 /* lower triangular part: slvec0b = - B^T*xx */ 2612 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2613 2614 /* copy xx into slvec0a */ 2615 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2616 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2617 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2618 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2619 2620 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2621 2622 /* copy bb into slvec1a */ 2623 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2624 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2625 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2626 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2627 2628 /* set slvec1b = 0 */ 2629 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2630 2631 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2632 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2633 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2634 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2635 2636 /* upper triangular part: bb1 = bb1 - B*x */ 2637 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2638 2639 /* local diagonal sweep */ 2640 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2641 } 2642 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2643 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2644 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2645 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2646 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2647 } else if (flag & SOR_EISENSTAT) { 2648 Vec xx1; 2649 PetscBool hasop; 2650 const PetscScalar *diag; 2651 PetscScalar *sl,scale = (omega - 2.0)/omega; 2652 PetscInt i,n; 2653 2654 if (!mat->xx1) { 2655 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2656 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2657 } 2658 xx1 = mat->xx1; 2659 bb1 = mat->bb1; 2660 2661 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2662 2663 if (!mat->diag) { 2664 /* this is wrong for same matrix with new nonzero values */ 2665 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2666 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2667 } 2668 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2669 2670 if (hasop) { 2671 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2672 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2673 } else { 2674 /* 2675 These two lines are replaced by code that may be a bit faster for a good compiler 2676 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2677 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2678 */ 2679 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2680 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2681 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2682 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2683 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2684 if (omega == 1.0) { 2685 for (i=0; i<n; i++) { 2686 sl[i] = b[i] - diag[i]*x[i]; 2687 } 2688 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2689 } else { 2690 for (i=0; i<n; i++) { 2691 sl[i] = b[i] + scale*diag[i]*x[i]; 2692 } 2693 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2694 } 2695 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2696 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2697 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2698 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2699 } 2700 2701 /* multiply off-diagonal portion of matrix */ 2702 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2703 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2704 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2705 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2706 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2707 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2708 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2709 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2710 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2711 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2712 2713 /* local sweep */ 2714 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2715 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2716 } else { 2717 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2718 } 2719 PetscFunctionReturn(0); 2720 } 2721 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2724 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2725 { 2726 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2727 PetscErrorCode ierr; 2728 Vec lvec1,bb1; 2729 2730 PetscFunctionBegin; 2731 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2732 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2733 2734 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2735 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2736 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2737 its--; 2738 } 2739 2740 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2741 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2742 while (its--){ 2743 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2744 2745 /* lower diagonal part: bb1 = bb - B^T*xx */ 2746 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2747 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2748 2749 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2750 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2751 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2752 2753 /* upper diagonal part: bb1 = bb1 - B*x */ 2754 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2755 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2756 2757 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2758 2759 /* diagonal sweep */ 2760 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2761 } 2762 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2763 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2764 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2765 PetscFunctionReturn(0); 2766 } 2767 2768 #undef __FUNCT__ 2769 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2770 /*@ 2771 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2772 CSR format the local rows. 2773 2774 Collective on MPI_Comm 2775 2776 Input Parameters: 2777 + comm - MPI communicator 2778 . bs - the block size, only a block size of 1 is supported 2779 . m - number of local rows (Cannot be PETSC_DECIDE) 2780 . n - This value should be the same as the local size used in creating the 2781 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2782 calculated if N is given) For square matrices n is almost always m. 2783 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2784 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2785 . i - row indices 2786 . j - column indices 2787 - a - matrix values 2788 2789 Output Parameter: 2790 . mat - the matrix 2791 2792 Level: intermediate 2793 2794 Notes: 2795 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2796 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2797 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2798 2799 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2800 2801 .keywords: matrix, aij, compressed row, sparse, parallel 2802 2803 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2804 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 2805 @*/ 2806 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2807 { 2808 PetscErrorCode ierr; 2809 2810 2811 PetscFunctionBegin; 2812 if (i[0]) { 2813 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2814 } 2815 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2816 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2817 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2818 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2819 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2820 PetscFunctionReturn(0); 2821 } 2822 2823 2824 #undef __FUNCT__ 2825 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2826 /*@C 2827 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2828 (the default parallel PETSc format). 2829 2830 Collective on MPI_Comm 2831 2832 Input Parameters: 2833 + A - the matrix 2834 . bs - the block size 2835 . i - the indices into j for the start of each local row (starts with zero) 2836 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2837 - v - optional values in the matrix 2838 2839 Level: developer 2840 2841 .keywords: matrix, aij, compressed row, sparse, parallel 2842 2843 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2844 @*/ 2845 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2846 { 2847 PetscErrorCode ierr; 2848 2849 PetscFunctionBegin; 2850 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2851 PetscFunctionReturn(0); 2852 } 2853 2854 2855