1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 #if defined(PETSC_HAVE_ELEMENTAL) 23 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 24 #endif 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 28 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 29 { 30 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 31 PetscErrorCode ierr; 32 33 PetscFunctionBegin; 34 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 35 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 36 PetscFunctionReturn(0); 37 } 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 53 { \ 54 \ 55 brow = row/bs; \ 56 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 57 rmax = aimax[brow]; nrow = ailen[brow]; \ 58 bcol = col/bs; \ 59 ridx = row % bs; cidx = col % bs; \ 60 low = 0; high = nrow; \ 61 while (high-low > 3) { \ 62 t = (low+high)/2; \ 63 if (rp[t] > bcol) high = t; \ 64 else low = t; \ 65 } \ 66 for (_i=low; _i<high; _i++) { \ 67 if (rp[_i] > bcol) break; \ 68 if (rp[_i] == bcol) { \ 69 bap = ap + bs2*_i + bs*cidx + ridx; \ 70 if (addv == ADD_VALUES) *bap += value; \ 71 else *bap = value; \ 72 goto a_noinsert; \ 73 } \ 74 } \ 75 if (a->nonew == 1) goto a_noinsert; \ 76 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 77 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 78 N = nrow++ - 1; \ 79 /* shift up all the later entries in this row */ \ 80 for (ii=N; ii>=_i; ii--) { \ 81 rp[ii+1] = rp[ii]; \ 82 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 83 } \ 84 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 85 rp[_i] = bcol; \ 86 ap[bs2*_i + bs*cidx + ridx] = value; \ 87 A->nonzerostate++;\ 88 a_noinsert:; \ 89 ailen[brow] = nrow; \ 90 } 91 92 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 93 { \ 94 brow = row/bs; \ 95 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 96 rmax = bimax[brow]; nrow = bilen[brow]; \ 97 bcol = col/bs; \ 98 ridx = row % bs; cidx = col % bs; \ 99 low = 0; high = nrow; \ 100 while (high-low > 3) { \ 101 t = (low+high)/2; \ 102 if (rp[t] > bcol) high = t; \ 103 else low = t; \ 104 } \ 105 for (_i=low; _i<high; _i++) { \ 106 if (rp[_i] > bcol) break; \ 107 if (rp[_i] == bcol) { \ 108 bap = ap + bs2*_i + bs*cidx + ridx; \ 109 if (addv == ADD_VALUES) *bap += value; \ 110 else *bap = value; \ 111 goto b_noinsert; \ 112 } \ 113 } \ 114 if (b->nonew == 1) goto b_noinsert; \ 115 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 116 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 117 N = nrow++ - 1; \ 118 /* shift up all the later entries in this row */ \ 119 for (ii=N; ii>=_i; ii--) { \ 120 rp[ii+1] = rp[ii]; \ 121 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 122 } \ 123 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 124 rp[_i] = bcol; \ 125 ap[bs2*_i + bs*cidx + ridx] = value; \ 126 B->nonzerostate++;\ 127 b_noinsert:; \ 128 bilen[brow] = nrow; \ 129 } 130 131 /* Only add/insert a(i,j) with i<=j (blocks). 132 Any a(i,j) with i>j input by user is ingored. 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetValues_MPISBAIJ" 136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 137 { 138 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 139 MatScalar value; 140 PetscBool roworiented = baij->roworiented; 141 PetscErrorCode ierr; 142 PetscInt i,j,row,col; 143 PetscInt rstart_orig=mat->rmap->rstart; 144 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 145 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 146 147 /* Some Variables required in the macro */ 148 Mat A = baij->A; 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 150 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 151 MatScalar *aa =a->a; 152 153 Mat B = baij->B; 154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 155 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 156 MatScalar *ba =b->a; 157 158 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 159 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 160 MatScalar *ap,*bap; 161 162 /* for stash */ 163 PetscInt n_loc, *in_loc = NULL; 164 MatScalar *v_loc = NULL; 165 166 PetscFunctionBegin; 167 if (!baij->donotstash) { 168 if (n > baij->n_loc) { 169 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 170 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 171 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 172 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 173 174 baij->n_loc = n; 175 } 176 in_loc = baij->in_loc; 177 v_loc = baij->v_loc; 178 } 179 180 for (i=0; i<m; i++) { 181 if (im[i] < 0) continue; 182 #if defined(PETSC_USE_DEBUG) 183 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 184 #endif 185 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 186 row = im[i] - rstart_orig; /* local row index */ 187 for (j=0; j<n; j++) { 188 if (im[i]/bs > in[j]/bs) { 189 if (a->ignore_ltriangular) { 190 continue; /* ignore lower triangular blocks */ 191 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 192 } 193 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 194 col = in[j] - cstart_orig; /* local col index */ 195 brow = row/bs; bcol = col/bs; 196 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 197 if (roworiented) value = v[i*n+j]; 198 else value = v[i+j*m]; 199 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 200 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 201 } else if (in[j] < 0) continue; 202 #if defined(PETSC_USE_DEBUG) 203 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 204 #endif 205 else { /* off-diag entry (B) */ 206 if (mat->was_assembled) { 207 if (!baij->colmap) { 208 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 209 } 210 #if defined(PETSC_USE_CTABLE) 211 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 212 col = col - 1; 213 #else 214 col = baij->colmap[in[j]/bs] - 1; 215 #endif 216 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 217 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 218 col = in[j]; 219 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 220 B = baij->B; 221 b = (Mat_SeqBAIJ*)(B)->data; 222 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 223 ba = b->a; 224 } else col += in[j]%bs; 225 } else col = in[j]; 226 if (roworiented) value = v[i*n+j]; 227 else value = v[i+j*m]; 228 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } 231 } 232 } else { /* off processor entry */ 233 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 234 if (!baij->donotstash) { 235 mat->assembled = PETSC_FALSE; 236 n_loc = 0; 237 for (j=0; j<n; j++) { 238 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 239 in_loc[n_loc] = in[j]; 240 if (roworiented) { 241 v_loc[n_loc] = v[i*n+j]; 242 } else { 243 v_loc[n_loc] = v[j*m+i]; 244 } 245 n_loc++; 246 } 247 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 248 } 249 } 250 } 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 257 { 258 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 259 const MatScalar *value; 260 MatScalar *barray =baij->barray; 261 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 262 PetscErrorCode ierr; 263 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 264 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 265 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 266 267 PetscFunctionBegin; 268 if (!barray) { 269 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 270 baij->barray = barray; 271 } 272 273 if (roworiented) { 274 stepval = (n-1)*bs; 275 } else { 276 stepval = (m-1)*bs; 277 } 278 for (i=0; i<m; i++) { 279 if (im[i] < 0) continue; 280 #if defined(PETSC_USE_DEBUG) 281 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 282 #endif 283 if (im[i] >= rstart && im[i] < rend) { 284 row = im[i] - rstart; 285 for (j=0; j<n; j++) { 286 if (im[i] > in[j]) { 287 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 288 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 289 } 290 /* If NumCol = 1 then a copy is not required */ 291 if ((roworiented) && (n == 1)) { 292 barray = (MatScalar*) v + i*bs2; 293 } else if ((!roworiented) && (m == 1)) { 294 barray = (MatScalar*) v + j*bs2; 295 } else { /* Here a copy is required */ 296 if (roworiented) { 297 value = v + i*(stepval+bs)*bs + j*bs; 298 } else { 299 value = v + j*(stepval+bs)*bs + i*bs; 300 } 301 for (ii=0; ii<bs; ii++,value+=stepval) { 302 for (jj=0; jj<bs; jj++) { 303 *barray++ = *value++; 304 } 305 } 306 barray -=bs2; 307 } 308 309 if (in[j] >= cstart && in[j] < cend) { 310 col = in[j] - cstart; 311 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 312 } else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined(PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined(PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 348 if (!baij->donotstash) { 349 if (roworiented) { 350 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 351 } else { 352 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 353 } 354 } 355 } 356 } 357 PetscFunctionReturn(0); 358 } 359 360 #undef __FUNCT__ 361 #define __FUNCT__ "MatGetValues_MPISBAIJ" 362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 363 { 364 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 365 PetscErrorCode ierr; 366 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 367 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 368 369 PetscFunctionBegin; 370 for (i=0; i<m; i++) { 371 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 372 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 373 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 374 row = idxm[i] - bsrstart; 375 for (j=0; j<n; j++) { 376 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 377 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 378 if (idxn[j] >= bscstart && idxn[j] < bscend) { 379 col = idxn[j] - bscstart; 380 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 381 } else { 382 if (!baij->colmap) { 383 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 384 } 385 #if defined(PETSC_USE_CTABLE) 386 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 387 data--; 388 #else 389 data = baij->colmap[idxn[j]/bs]-1; 390 #endif 391 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 392 else { 393 col = data + idxn[j]%bs; 394 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 395 } 396 } 397 } 398 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 399 } 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatNorm_MPISBAIJ" 405 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 406 { 407 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscReal sum[2],*lnorm2; 410 411 PetscFunctionBegin; 412 if (baij->size == 1) { 413 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 414 } else { 415 if (type == NORM_FROBENIUS) { 416 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 417 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 418 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 419 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 420 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 421 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 422 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 423 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 424 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 425 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 426 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 427 PetscReal *rsum,*rsum2,vabs; 428 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 429 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 430 MatScalar *v; 431 432 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 433 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 434 /* Amat */ 435 v = amat->a; jj = amat->j; 436 for (brow=0; brow<mbs; brow++) { 437 grow = bs*(rstart + brow); 438 nz = amat->i[brow+1] - amat->i[brow]; 439 for (bcol=0; bcol<nz; bcol++) { 440 gcol = bs*(rstart + *jj); jj++; 441 for (col=0; col<bs; col++) { 442 for (row=0; row<bs; row++) { 443 vabs = PetscAbsScalar(*v); v++; 444 rsum[gcol+col] += vabs; 445 /* non-diagonal block */ 446 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 447 } 448 } 449 } 450 } 451 /* Bmat */ 452 v = bmat->a; jj = bmat->j; 453 for (brow=0; brow<mbs; brow++) { 454 grow = bs*(rstart + brow); 455 nz = bmat->i[brow+1] - bmat->i[brow]; 456 for (bcol=0; bcol<nz; bcol++) { 457 gcol = bs*garray[*jj]; jj++; 458 for (col=0; col<bs; col++) { 459 for (row=0; row<bs; row++) { 460 vabs = PetscAbsScalar(*v); v++; 461 rsum[gcol+col] += vabs; 462 rsum[grow+row] += vabs; 463 } 464 } 465 } 466 } 467 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 468 *norm = 0.0; 469 for (col=0; col<mat->cmap->N; col++) { 470 if (rsum2[col] > *norm) *norm = rsum2[col]; 471 } 472 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 473 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 474 } 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 480 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 481 { 482 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 483 PetscErrorCode ierr; 484 PetscInt nstash,reallocs; 485 InsertMode addv; 486 487 PetscFunctionBegin; 488 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 489 490 /* make sure all processors are either in INSERTMODE or ADDMODE */ 491 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 492 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 493 mat->insertmode = addv; /* in case this processor had no cache */ 494 495 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 496 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 497 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 498 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 499 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 500 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 506 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 507 { 508 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 509 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 510 PetscErrorCode ierr; 511 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 512 PetscInt *row,*col; 513 PetscBool other_disassembled; 514 PetscMPIInt n; 515 PetscBool r1,r2,r3; 516 MatScalar *val; 517 InsertMode addv = mat->insertmode; 518 519 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 520 PetscFunctionBegin; 521 if (!baij->donotstash && !mat->nooffprocentries) { 522 while (1) { 523 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 524 if (!flg) break; 525 526 for (i=0; i<n;) { 527 /* Now identify the consecutive vals belonging to the same row */ 528 for (j=i,rstart=row[j]; j<n; j++) { 529 if (row[j] != rstart) break; 530 } 531 if (j < n) ncols = j-i; 532 else ncols = n-i; 533 /* Now assemble all these values with a single function call */ 534 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 535 i = j; 536 } 537 } 538 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 539 /* Now process the block-stash. Since the values are stashed column-oriented, 540 set the roworiented flag to column oriented, and after MatSetValues() 541 restore the original flags */ 542 r1 = baij->roworiented; 543 r2 = a->roworiented; 544 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 545 546 baij->roworiented = PETSC_FALSE; 547 a->roworiented = PETSC_FALSE; 548 549 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 550 while (1) { 551 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 552 if (!flg) break; 553 554 for (i=0; i<n;) { 555 /* Now identify the consecutive vals belonging to the same row */ 556 for (j=i,rstart=row[j]; j<n; j++) { 557 if (row[j] != rstart) break; 558 } 559 if (j < n) ncols = j-i; 560 else ncols = n-i; 561 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 562 i = j; 563 } 564 } 565 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 566 567 baij->roworiented = r1; 568 a->roworiented = r2; 569 570 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 571 } 572 573 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 575 576 /* determine if any processor has disassembled, if so we must 577 also disassemble ourselfs, in order that we may reassemble. */ 578 /* 579 if nonzero structure of submatrix B cannot change then we know that 580 no processor disassembled thus we can skip this stuff 581 */ 582 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 583 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 584 if (mat->was_assembled && !other_disassembled) { 585 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 586 } 587 } 588 589 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 590 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 591 } 592 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 593 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 594 595 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 596 597 baij->rowvalues = 0; 598 599 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 600 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 601 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 602 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 608 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 609 #include <petscdraw.h> 610 #undef __FUNCT__ 611 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 612 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 613 { 614 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 615 PetscErrorCode ierr; 616 PetscInt bs = mat->rmap->bs; 617 PetscMPIInt rank = baij->rank; 618 PetscBool iascii,isdraw; 619 PetscViewer sviewer; 620 PetscViewerFormat format; 621 622 PetscFunctionBegin; 623 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 624 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 625 if (iascii) { 626 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 627 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 628 MatInfo info; 629 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 630 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 631 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 632 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 633 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 634 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 635 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 636 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 637 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 638 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 639 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 640 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } else if (format == PETSC_VIEWER_ASCII_INFO) { 643 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 646 PetscFunctionReturn(0); 647 } 648 } 649 650 if (isdraw) { 651 PetscDraw draw; 652 PetscBool isnull; 653 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 654 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 655 } 656 657 { 658 /* assemble the entire matrix onto first processor. */ 659 Mat A; 660 Mat_SeqSBAIJ *Aloc; 661 Mat_SeqBAIJ *Bloc; 662 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 663 MatScalar *a; 664 const char *matname; 665 666 /* Should this be the same type as mat? */ 667 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 668 if (!rank) { 669 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 670 } else { 671 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 672 } 673 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 674 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 675 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 676 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 677 678 /* copy over the A part */ 679 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 680 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 681 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 682 683 for (i=0; i<mbs; i++) { 684 rvals[0] = bs*(baij->rstartbs + i); 685 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 686 for (j=ai[i]; j<ai[i+1]; j++) { 687 col = (baij->cstartbs+aj[j])*bs; 688 for (k=0; k<bs; k++) { 689 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 690 col++; 691 a += bs; 692 } 693 } 694 } 695 /* copy over the B part */ 696 Bloc = (Mat_SeqBAIJ*)baij->B->data; 697 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 698 for (i=0; i<mbs; i++) { 699 700 rvals[0] = bs*(baij->rstartbs + i); 701 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 702 for (j=ai[i]; j<ai[i+1]; j++) { 703 col = baij->garray[aj[j]]*bs; 704 for (k=0; k<bs; k++) { 705 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 706 col++; 707 a += bs; 708 } 709 } 710 } 711 ierr = PetscFree(rvals);CHKERRQ(ierr); 712 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 713 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 714 /* 715 Everyone has to call to draw the matrix since the graphics waits are 716 synchronized across all processors that share the PetscDraw object 717 */ 718 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 719 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 720 if (!rank) { 721 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 722 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 723 } 724 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 725 ierr = MatDestroy(&A);CHKERRQ(ierr); 726 } 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatView_MPISBAIJ" 732 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 733 { 734 PetscErrorCode ierr; 735 PetscBool iascii,isdraw,issocket,isbinary; 736 737 PetscFunctionBegin; 738 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 739 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 740 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 741 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 742 if (iascii || isdraw || issocket || isbinary) { 743 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 744 } 745 PetscFunctionReturn(0); 746 } 747 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatDestroy_MPISBAIJ" 750 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 751 { 752 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 753 PetscErrorCode ierr; 754 755 PetscFunctionBegin; 756 #if defined(PETSC_USE_LOG) 757 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 758 #endif 759 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 760 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 761 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 762 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 763 #if defined(PETSC_USE_CTABLE) 764 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 765 #else 766 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 767 #endif 768 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 769 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 770 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 771 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 772 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 773 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 774 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 775 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 776 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 777 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 778 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 779 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 780 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 781 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 782 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 783 #if defined(PETSC_USE_REAL_MAT_SINGLE) 784 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 785 #endif 786 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 787 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 788 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 789 ierr = PetscFree(mat->data);CHKERRQ(ierr); 790 791 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 792 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 793 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 794 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 795 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 796 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 797 #if defined(PETSC_HAVE_ELEMENTAL) 798 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 799 #endif 800 PetscFunctionReturn(0); 801 } 802 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 805 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 806 { 807 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 808 PetscErrorCode ierr; 809 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 810 PetscScalar *x,*from; 811 812 PetscFunctionBegin; 813 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 814 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 815 816 /* diagonal part */ 817 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 818 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 819 820 /* subdiagonal part */ 821 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 822 823 /* copy x into the vec slvec0 */ 824 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 825 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 826 827 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 828 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 829 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 830 831 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 832 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 833 /* supperdiagonal part */ 834 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatMult_MPISBAIJ" 840 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 841 { 842 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 843 PetscErrorCode ierr; 844 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 845 PetscScalar *x,*from; 846 847 PetscFunctionBegin; 848 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 849 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 850 851 /* diagonal part */ 852 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 853 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 854 855 /* subdiagonal part */ 856 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 857 858 /* copy x into the vec slvec0 */ 859 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 860 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 861 862 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 863 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 864 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 865 866 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 /* supperdiagonal part */ 869 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 875 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 876 { 877 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 878 PetscErrorCode ierr; 879 PetscInt nt; 880 881 PetscFunctionBegin; 882 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 883 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 884 885 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 886 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 887 888 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 /* do diagonal part */ 890 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 891 /* do supperdiagonal part */ 892 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 894 /* do subdiagonal part */ 895 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 896 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 897 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } 900 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 903 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 904 { 905 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 906 PetscErrorCode ierr; 907 PetscInt mbs=a->mbs,bs=A->rmap->bs; 908 PetscScalar *x,*from,zero=0.0; 909 910 PetscFunctionBegin; 911 /* 912 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 913 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 914 */ 915 /* diagonal part */ 916 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 917 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 918 919 /* subdiagonal part */ 920 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 921 922 /* copy x into the vec slvec0 */ 923 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 924 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 925 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 926 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 927 928 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 929 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 930 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931 932 /* supperdiagonal part */ 933 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 934 PetscFunctionReturn(0); 935 } 936 937 #undef __FUNCT__ 938 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 939 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 940 { 941 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 942 PetscErrorCode ierr; 943 944 PetscFunctionBegin; 945 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 /* do diagonal part */ 947 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 948 /* do supperdiagonal part */ 949 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 951 952 /* do subdiagonal part */ 953 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 954 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 955 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 956 PetscFunctionReturn(0); 957 } 958 959 /* 960 This only works correctly for square matrices where the subblock A->A is the 961 diagonal block 962 */ 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 965 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 966 { 967 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 968 PetscErrorCode ierr; 969 970 PetscFunctionBegin; 971 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 972 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 #undef __FUNCT__ 977 #define __FUNCT__ "MatScale_MPISBAIJ" 978 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 979 { 980 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 981 PetscErrorCode ierr; 982 983 PetscFunctionBegin; 984 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 985 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatGetRow_MPISBAIJ" 991 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 992 { 993 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 994 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 995 PetscErrorCode ierr; 996 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 997 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 998 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 999 1000 PetscFunctionBegin; 1001 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1002 mat->getrowactive = PETSC_TRUE; 1003 1004 if (!mat->rowvalues && (idx || v)) { 1005 /* 1006 allocate enough space to hold information from the longest row. 1007 */ 1008 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1009 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1010 PetscInt max = 1,mbs = mat->mbs,tmp; 1011 for (i=0; i<mbs; i++) { 1012 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1013 if (max < tmp) max = tmp; 1014 } 1015 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1016 } 1017 1018 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1019 lrow = row - brstart; /* local row index */ 1020 1021 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1022 if (!v) {pvA = 0; pvB = 0;} 1023 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1024 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1025 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1026 nztot = nzA + nzB; 1027 1028 cmap = mat->garray; 1029 if (v || idx) { 1030 if (nztot) { 1031 /* Sort by increasing column numbers, assuming A and B already sorted */ 1032 PetscInt imark = -1; 1033 if (v) { 1034 *v = v_p = mat->rowvalues; 1035 for (i=0; i<nzB; i++) { 1036 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1037 else break; 1038 } 1039 imark = i; 1040 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1041 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1042 } 1043 if (idx) { 1044 *idx = idx_p = mat->rowindices; 1045 if (imark > -1) { 1046 for (i=0; i<imark; i++) { 1047 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1048 } 1049 } else { 1050 for (i=0; i<nzB; i++) { 1051 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1052 else break; 1053 } 1054 imark = i; 1055 } 1056 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1057 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1058 } 1059 } else { 1060 if (idx) *idx = 0; 1061 if (v) *v = 0; 1062 } 1063 } 1064 *nz = nztot; 1065 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1066 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1072 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1073 { 1074 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1075 1076 PetscFunctionBegin; 1077 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1078 baij->getrowactive = PETSC_FALSE; 1079 PetscFunctionReturn(0); 1080 } 1081 1082 #undef __FUNCT__ 1083 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1084 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1085 { 1086 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1087 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1088 1089 PetscFunctionBegin; 1090 aA->getrow_utriangular = PETSC_TRUE; 1091 PetscFunctionReturn(0); 1092 } 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1095 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1096 { 1097 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1098 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1099 1100 PetscFunctionBegin; 1101 aA->getrow_utriangular = PETSC_FALSE; 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1107 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1108 { 1109 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1110 PetscErrorCode ierr; 1111 1112 PetscFunctionBegin; 1113 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1114 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1120 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1121 { 1122 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1127 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1133 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1134 { 1135 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1136 PetscErrorCode ierr; 1137 1138 PetscFunctionBegin; 1139 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1140 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1146 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1147 { 1148 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1149 Mat A = a->A,B = a->B; 1150 PetscErrorCode ierr; 1151 PetscReal isend[5],irecv[5]; 1152 1153 PetscFunctionBegin; 1154 info->block_size = (PetscReal)matin->rmap->bs; 1155 1156 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1157 1158 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1159 isend[3] = info->memory; isend[4] = info->mallocs; 1160 1161 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1162 1163 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1164 isend[3] += info->memory; isend[4] += info->mallocs; 1165 if (flag == MAT_LOCAL) { 1166 info->nz_used = isend[0]; 1167 info->nz_allocated = isend[1]; 1168 info->nz_unneeded = isend[2]; 1169 info->memory = isend[3]; 1170 info->mallocs = isend[4]; 1171 } else if (flag == MAT_GLOBAL_MAX) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1173 1174 info->nz_used = irecv[0]; 1175 info->nz_allocated = irecv[1]; 1176 info->nz_unneeded = irecv[2]; 1177 info->memory = irecv[3]; 1178 info->mallocs = irecv[4]; 1179 } else if (flag == MAT_GLOBAL_SUM) { 1180 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1181 1182 info->nz_used = irecv[0]; 1183 info->nz_allocated = irecv[1]; 1184 info->nz_unneeded = irecv[2]; 1185 info->memory = irecv[3]; 1186 info->mallocs = irecv[4]; 1187 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1188 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1189 info->fill_ratio_needed = 0; 1190 info->factor_mallocs = 0; 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1196 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1197 { 1198 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1199 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 switch (op) { 1204 case MAT_NEW_NONZERO_LOCATIONS: 1205 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1206 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1207 case MAT_KEEP_NONZERO_PATTERN: 1208 case MAT_NEW_NONZERO_LOCATION_ERR: 1209 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1210 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1211 break; 1212 case MAT_ROW_ORIENTED: 1213 a->roworiented = flg; 1214 1215 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1216 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1217 break; 1218 case MAT_NEW_DIAGONALS: 1219 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1220 break; 1221 case MAT_IGNORE_OFF_PROC_ENTRIES: 1222 a->donotstash = flg; 1223 break; 1224 case MAT_USE_HASH_TABLE: 1225 a->ht_flag = flg; 1226 break; 1227 case MAT_HERMITIAN: 1228 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1229 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1230 1231 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1232 break; 1233 case MAT_SPD: 1234 A->spd_set = PETSC_TRUE; 1235 A->spd = flg; 1236 if (flg) { 1237 A->symmetric = PETSC_TRUE; 1238 A->structurally_symmetric = PETSC_TRUE; 1239 A->symmetric_set = PETSC_TRUE; 1240 A->structurally_symmetric_set = PETSC_TRUE; 1241 } 1242 break; 1243 case MAT_SYMMETRIC: 1244 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1245 break; 1246 case MAT_STRUCTURALLY_SYMMETRIC: 1247 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1248 break; 1249 case MAT_SYMMETRY_ETERNAL: 1250 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1251 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1252 break; 1253 case MAT_IGNORE_LOWER_TRIANGULAR: 1254 aA->ignore_ltriangular = flg; 1255 break; 1256 case MAT_ERROR_LOWER_TRIANGULAR: 1257 aA->ignore_ltriangular = flg; 1258 break; 1259 case MAT_GETROW_UPPERTRIANGULAR: 1260 aA->getrow_utriangular = flg; 1261 break; 1262 default: 1263 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1264 } 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1270 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1271 { 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 if (MAT_INITIAL_MATRIX || *B != A) { 1276 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1277 } 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1283 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1284 { 1285 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1286 Mat a = baij->A, b=baij->B; 1287 PetscErrorCode ierr; 1288 PetscInt nv,m,n; 1289 PetscBool flg; 1290 1291 PetscFunctionBegin; 1292 if (ll != rr) { 1293 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1294 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1295 } 1296 if (!ll) PetscFunctionReturn(0); 1297 1298 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1299 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1300 1301 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1302 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1303 1304 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1305 1306 /* left diagonalscale the off-diagonal part */ 1307 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1308 1309 /* scale the diagonal part */ 1310 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1311 1312 /* right diagonalscale the off-diagonal part */ 1313 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1314 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1320 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1321 { 1322 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1323 PetscErrorCode ierr; 1324 1325 PetscFunctionBegin; 1326 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatEqual_MPISBAIJ" 1334 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1335 { 1336 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1337 Mat a,b,c,d; 1338 PetscBool flg; 1339 PetscErrorCode ierr; 1340 1341 PetscFunctionBegin; 1342 a = matA->A; b = matA->B; 1343 c = matB->A; d = matB->B; 1344 1345 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1346 if (flg) { 1347 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1348 } 1349 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatCopy_MPISBAIJ" 1355 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1356 { 1357 PetscErrorCode ierr; 1358 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1359 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1360 1361 PetscFunctionBegin; 1362 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1363 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1364 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1365 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1366 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1367 } else { 1368 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1369 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1370 } 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1376 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1377 { 1378 PetscErrorCode ierr; 1379 1380 PetscFunctionBegin; 1381 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1387 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1388 { 1389 PetscErrorCode ierr; 1390 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1391 PetscBLASInt bnz,one=1; 1392 Mat_SeqSBAIJ *xa,*ya; 1393 Mat_SeqBAIJ *xb,*yb; 1394 1395 PetscFunctionBegin; 1396 if (str == SAME_NONZERO_PATTERN) { 1397 PetscScalar alpha = a; 1398 xa = (Mat_SeqSBAIJ*)xx->A->data; 1399 ya = (Mat_SeqSBAIJ*)yy->A->data; 1400 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1401 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1402 xb = (Mat_SeqBAIJ*)xx->B->data; 1403 yb = (Mat_SeqBAIJ*)yy->B->data; 1404 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1405 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1406 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1407 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1408 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1409 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1410 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1411 } else { 1412 Mat B; 1413 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1414 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1415 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1416 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1417 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1418 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1419 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1420 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1421 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1422 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1423 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1424 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1425 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1426 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1427 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1428 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1429 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1430 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1431 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1432 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1433 } 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1439 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1440 { 1441 PetscErrorCode ierr; 1442 PetscInt i; 1443 PetscBool flg; 1444 1445 PetscFunctionBegin; 1446 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1447 for (i=0; i<n; i++) { 1448 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1449 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1450 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1451 } 1452 } 1453 PetscFunctionReturn(0); 1454 } 1455 1456 /* -------------------------------------------------------------------*/ 1457 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1458 MatGetRow_MPISBAIJ, 1459 MatRestoreRow_MPISBAIJ, 1460 MatMult_MPISBAIJ, 1461 /* 4*/ MatMultAdd_MPISBAIJ, 1462 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1463 MatMultAdd_MPISBAIJ, 1464 0, 1465 0, 1466 0, 1467 /* 10*/ 0, 1468 0, 1469 0, 1470 MatSOR_MPISBAIJ, 1471 MatTranspose_MPISBAIJ, 1472 /* 15*/ MatGetInfo_MPISBAIJ, 1473 MatEqual_MPISBAIJ, 1474 MatGetDiagonal_MPISBAIJ, 1475 MatDiagonalScale_MPISBAIJ, 1476 MatNorm_MPISBAIJ, 1477 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1478 MatAssemblyEnd_MPISBAIJ, 1479 MatSetOption_MPISBAIJ, 1480 MatZeroEntries_MPISBAIJ, 1481 /* 24*/ 0, 1482 0, 1483 0, 1484 0, 1485 0, 1486 /* 29*/ MatSetUp_MPISBAIJ, 1487 0, 1488 0, 1489 0, 1490 0, 1491 /* 34*/ MatDuplicate_MPISBAIJ, 1492 0, 1493 0, 1494 0, 1495 0, 1496 /* 39*/ MatAXPY_MPISBAIJ, 1497 MatGetSubMatrices_MPISBAIJ, 1498 MatIncreaseOverlap_MPISBAIJ, 1499 MatGetValues_MPISBAIJ, 1500 MatCopy_MPISBAIJ, 1501 /* 44*/ 0, 1502 MatScale_MPISBAIJ, 1503 0, 1504 0, 1505 0, 1506 /* 49*/ 0, 1507 0, 1508 0, 1509 0, 1510 0, 1511 /* 54*/ 0, 1512 0, 1513 MatSetUnfactored_MPISBAIJ, 1514 0, 1515 MatSetValuesBlocked_MPISBAIJ, 1516 /* 59*/ 0, 1517 0, 1518 0, 1519 0, 1520 0, 1521 /* 64*/ 0, 1522 0, 1523 0, 1524 0, 1525 0, 1526 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1527 0, 1528 0, 1529 0, 1530 0, 1531 /* 74*/ 0, 1532 0, 1533 0, 1534 0, 1535 0, 1536 /* 79*/ 0, 1537 0, 1538 0, 1539 0, 1540 MatLoad_MPISBAIJ, 1541 /* 84*/ 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 /* 89*/ 0, 1547 0, 1548 0, 1549 0, 1550 0, 1551 /* 94*/ 0, 1552 0, 1553 0, 1554 0, 1555 0, 1556 /* 99*/ 0, 1557 0, 1558 0, 1559 0, 1560 0, 1561 /*104*/ 0, 1562 MatRealPart_MPISBAIJ, 1563 MatImaginaryPart_MPISBAIJ, 1564 MatGetRowUpperTriangular_MPISBAIJ, 1565 MatRestoreRowUpperTriangular_MPISBAIJ, 1566 /*109*/ 0, 1567 0, 1568 0, 1569 0, 1570 0, 1571 /*114*/ 0, 1572 0, 1573 0, 1574 0, 1575 0, 1576 /*119*/ 0, 1577 0, 1578 0, 1579 0, 1580 0, 1581 /*124*/ 0, 1582 0, 1583 0, 1584 0, 1585 0, 1586 /*129*/ 0, 1587 0, 1588 0, 1589 0, 1590 0, 1591 /*134*/ 0, 1592 0, 1593 0, 1594 0, 1595 0, 1596 /*139*/ 0, 1597 0, 1598 0, 1599 0, 1600 0, 1601 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1602 }; 1603 1604 #undef __FUNCT__ 1605 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1606 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1607 { 1608 PetscFunctionBegin; 1609 *a = ((Mat_MPISBAIJ*)A->data)->A; 1610 PetscFunctionReturn(0); 1611 } 1612 1613 #undef __FUNCT__ 1614 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1615 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1616 { 1617 Mat_MPISBAIJ *b; 1618 PetscErrorCode ierr; 1619 PetscInt i,mbs,Mbs; 1620 1621 PetscFunctionBegin; 1622 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1623 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1624 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1625 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1626 1627 b = (Mat_MPISBAIJ*)B->data; 1628 mbs = B->rmap->n/bs; 1629 Mbs = B->rmap->N/bs; 1630 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1631 1632 B->rmap->bs = bs; 1633 b->bs2 = bs*bs; 1634 b->mbs = mbs; 1635 b->Mbs = Mbs; 1636 b->nbs = B->cmap->n/bs; 1637 b->Nbs = B->cmap->N/bs; 1638 1639 for (i=0; i<=b->size; i++) { 1640 b->rangebs[i] = B->rmap->range[i]/bs; 1641 } 1642 b->rstartbs = B->rmap->rstart/bs; 1643 b->rendbs = B->rmap->rend/bs; 1644 1645 b->cstartbs = B->cmap->rstart/bs; 1646 b->cendbs = B->cmap->rend/bs; 1647 1648 if (!B->preallocated) { 1649 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1650 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1651 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1652 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1653 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1654 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1655 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1656 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1657 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1658 } 1659 1660 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1661 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1662 1663 B->preallocated = PETSC_TRUE; 1664 PetscFunctionReturn(0); 1665 } 1666 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1669 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1670 { 1671 PetscInt m,rstart,cstart,cend; 1672 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1673 const PetscInt *JJ =0; 1674 PetscScalar *values=0; 1675 PetscErrorCode ierr; 1676 1677 PetscFunctionBegin; 1678 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1679 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1680 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1681 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1682 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1683 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1684 m = B->rmap->n/bs; 1685 rstart = B->rmap->rstart/bs; 1686 cstart = B->cmap->rstart/bs; 1687 cend = B->cmap->rend/bs; 1688 1689 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1690 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1691 for (i=0; i<m; i++) { 1692 nz = ii[i+1] - ii[i]; 1693 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1694 nz_max = PetscMax(nz_max,nz); 1695 JJ = jj + ii[i]; 1696 for (j=0; j<nz; j++) { 1697 if (*JJ >= cstart) break; 1698 JJ++; 1699 } 1700 d = 0; 1701 for (; j<nz; j++) { 1702 if (*JJ++ >= cend) break; 1703 d++; 1704 } 1705 d_nnz[i] = d; 1706 o_nnz[i] = nz - d; 1707 } 1708 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1709 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1710 1711 values = (PetscScalar*)V; 1712 if (!values) { 1713 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 1714 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1715 } 1716 for (i=0; i<m; i++) { 1717 PetscInt row = i + rstart; 1718 PetscInt ncols = ii[i+1] - ii[i]; 1719 const PetscInt *icols = jj + ii[i]; 1720 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1721 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1722 } 1723 1724 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1725 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1726 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1727 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1728 PetscFunctionReturn(0); 1729 } 1730 1731 /*MC 1732 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1733 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1734 the matrix is stored. 1735 1736 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1737 can call MatSetOption(Mat, MAT_HERMITIAN); 1738 1739 Options Database Keys: 1740 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1741 1742 Level: beginner 1743 1744 .seealso: MatCreateMPISBAIJ 1745 M*/ 1746 1747 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 1748 1749 #undef __FUNCT__ 1750 #define __FUNCT__ "MatCreate_MPISBAIJ" 1751 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1752 { 1753 Mat_MPISBAIJ *b; 1754 PetscErrorCode ierr; 1755 PetscBool flg = PETSC_FALSE; 1756 1757 PetscFunctionBegin; 1758 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1759 B->data = (void*)b; 1760 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1761 1762 B->ops->destroy = MatDestroy_MPISBAIJ; 1763 B->ops->view = MatView_MPISBAIJ; 1764 B->assembled = PETSC_FALSE; 1765 B->insertmode = NOT_SET_VALUES; 1766 1767 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 1768 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 1769 1770 /* build local table of row and column ownerships */ 1771 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 1772 1773 /* build cache for off array entries formed */ 1774 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 1775 1776 b->donotstash = PETSC_FALSE; 1777 b->colmap = NULL; 1778 b->garray = NULL; 1779 b->roworiented = PETSC_TRUE; 1780 1781 /* stuff used in block assembly */ 1782 b->barray = 0; 1783 1784 /* stuff used for matrix vector multiply */ 1785 b->lvec = 0; 1786 b->Mvctx = 0; 1787 b->slvec0 = 0; 1788 b->slvec0b = 0; 1789 b->slvec1 = 0; 1790 b->slvec1a = 0; 1791 b->slvec1b = 0; 1792 b->sMvctx = 0; 1793 1794 /* stuff for MatGetRow() */ 1795 b->rowindices = 0; 1796 b->rowvalues = 0; 1797 b->getrowactive = PETSC_FALSE; 1798 1799 /* hash table stuff */ 1800 b->ht = 0; 1801 b->hd = 0; 1802 b->ht_size = 0; 1803 b->ht_flag = PETSC_FALSE; 1804 b->ht_fact = 0; 1805 b->ht_total_ct = 0; 1806 b->ht_insert_ct = 0; 1807 1808 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1809 b->ijonly = PETSC_FALSE; 1810 1811 b->in_loc = 0; 1812 b->v_loc = 0; 1813 b->n_loc = 0; 1814 1815 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1816 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1817 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1818 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1819 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1820 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1821 #if defined(PETSC_HAVE_ELEMENTAL) 1822 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 1823 #endif 1824 1825 B->symmetric = PETSC_TRUE; 1826 B->structurally_symmetric = PETSC_TRUE; 1827 B->symmetric_set = PETSC_TRUE; 1828 B->structurally_symmetric_set = PETSC_TRUE; 1829 1830 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1831 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1832 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 1833 if (flg) { 1834 PetscReal fact = 1.39; 1835 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1836 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 1837 if (fact <= 1.0) fact = 1.39; 1838 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1839 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1840 } 1841 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1842 PetscFunctionReturn(0); 1843 } 1844 1845 /*MC 1846 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1847 1848 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1849 and MATMPISBAIJ otherwise. 1850 1851 Options Database Keys: 1852 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1853 1854 Level: beginner 1855 1856 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1857 M*/ 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1861 /*@C 1862 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1863 the user should preallocate the matrix storage by setting the parameters 1864 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1865 performance can be increased by more than a factor of 50. 1866 1867 Collective on Mat 1868 1869 Input Parameters: 1870 + B - the matrix 1871 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 1872 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 1873 . d_nz - number of block nonzeros per block row in diagonal portion of local 1874 submatrix (same for all local rows) 1875 . d_nnz - array containing the number of block nonzeros in the various block rows 1876 in the upper triangular and diagonal part of the in diagonal portion of the local 1877 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 1878 for the diagonal entry and set a value even if it is zero. 1879 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1880 submatrix (same for all local rows). 1881 - o_nnz - array containing the number of nonzeros in the various block rows of the 1882 off-diagonal portion of the local submatrix that is right of the diagonal 1883 (possibly different for each block row) or NULL. 1884 1885 1886 Options Database Keys: 1887 . -mat_no_unroll - uses code that does not unroll the loops in the 1888 block calculations (much slower) 1889 . -mat_block_size - size of the blocks to use 1890 1891 Notes: 1892 1893 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1894 than it must be used on all processors that share the object for that argument. 1895 1896 If the *_nnz parameter is given then the *_nz parameter is ignored 1897 1898 Storage Information: 1899 For a square global matrix we define each processor's diagonal portion 1900 to be its local rows and the corresponding columns (a square submatrix); 1901 each processor's off-diagonal portion encompasses the remainder of the 1902 local matrix (a rectangular submatrix). 1903 1904 The user can specify preallocated storage for the diagonal part of 1905 the local submatrix with either d_nz or d_nnz (not both). Set 1906 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 1907 memory allocation. Likewise, specify preallocated storage for the 1908 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1909 1910 You can call MatGetInfo() to get information on how effective the preallocation was; 1911 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1912 You can also run with the option -info and look for messages with the string 1913 malloc in them to see if additional memory allocation was needed. 1914 1915 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1916 the figure below we depict these three local rows and all columns (0-11). 1917 1918 .vb 1919 0 1 2 3 4 5 6 7 8 9 10 11 1920 -------------------------- 1921 row 3 |. . . d d d o o o o o o 1922 row 4 |. . . d d d o o o o o o 1923 row 5 |. . . d d d o o o o o o 1924 -------------------------- 1925 .ve 1926 1927 Thus, any entries in the d locations are stored in the d (diagonal) 1928 submatrix, and any entries in the o locations are stored in the 1929 o (off-diagonal) submatrix. Note that the d matrix is stored in 1930 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1931 1932 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1933 plus the diagonal part of the d matrix, 1934 and o_nz should indicate the number of block nonzeros per row in the o matrix 1935 1936 In general, for PDE problems in which most nonzeros are near the diagonal, 1937 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1938 or you will get TERRIBLE performance; see the users' manual chapter on 1939 matrices. 1940 1941 Level: intermediate 1942 1943 .keywords: matrix, block, aij, compressed row, sparse, parallel 1944 1945 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 1946 @*/ 1947 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1948 { 1949 PetscErrorCode ierr; 1950 1951 PetscFunctionBegin; 1952 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1953 PetscValidType(B,1); 1954 PetscValidLogicalCollectiveInt(B,bs,2); 1955 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 1956 PetscFunctionReturn(0); 1957 } 1958 1959 #undef __FUNCT__ 1960 #define __FUNCT__ "MatCreateSBAIJ" 1961 /*@C 1962 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1963 (block compressed row). For good matrix assembly performance 1964 the user should preallocate the matrix storage by setting the parameters 1965 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1966 performance can be increased by more than a factor of 50. 1967 1968 Collective on MPI_Comm 1969 1970 Input Parameters: 1971 + comm - MPI communicator 1972 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 1973 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 1974 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1975 This value should be the same as the local size used in creating the 1976 y vector for the matrix-vector product y = Ax. 1977 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1978 This value should be the same as the local size used in creating the 1979 x vector for the matrix-vector product y = Ax. 1980 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1981 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1982 . d_nz - number of block nonzeros per block row in diagonal portion of local 1983 submatrix (same for all local rows) 1984 . d_nnz - array containing the number of block nonzeros in the various block rows 1985 in the upper triangular portion of the in diagonal portion of the local 1986 (possibly different for each block block row) or NULL. 1987 If you plan to factor the matrix you must leave room for the diagonal entry and 1988 set its value even if it is zero. 1989 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1990 submatrix (same for all local rows). 1991 - o_nnz - array containing the number of nonzeros in the various block rows of the 1992 off-diagonal portion of the local submatrix (possibly different for 1993 each block row) or NULL. 1994 1995 Output Parameter: 1996 . A - the matrix 1997 1998 Options Database Keys: 1999 . -mat_no_unroll - uses code that does not unroll the loops in the 2000 block calculations (much slower) 2001 . -mat_block_size - size of the blocks to use 2002 . -mat_mpi - use the parallel matrix data structures even on one processor 2003 (defaults to using SeqBAIJ format on one processor) 2004 2005 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2006 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2007 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2008 2009 Notes: 2010 The number of rows and columns must be divisible by blocksize. 2011 This matrix type does not support complex Hermitian operation. 2012 2013 The user MUST specify either the local or global matrix dimensions 2014 (possibly both). 2015 2016 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2017 than it must be used on all processors that share the object for that argument. 2018 2019 If the *_nnz parameter is given then the *_nz parameter is ignored 2020 2021 Storage Information: 2022 For a square global matrix we define each processor's diagonal portion 2023 to be its local rows and the corresponding columns (a square submatrix); 2024 each processor's off-diagonal portion encompasses the remainder of the 2025 local matrix (a rectangular submatrix). 2026 2027 The user can specify preallocated storage for the diagonal part of 2028 the local submatrix with either d_nz or d_nnz (not both). Set 2029 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2030 memory allocation. Likewise, specify preallocated storage for the 2031 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2032 2033 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2034 the figure below we depict these three local rows and all columns (0-11). 2035 2036 .vb 2037 0 1 2 3 4 5 6 7 8 9 10 11 2038 -------------------------- 2039 row 3 |. . . d d d o o o o o o 2040 row 4 |. . . d d d o o o o o o 2041 row 5 |. . . d d d o o o o o o 2042 -------------------------- 2043 .ve 2044 2045 Thus, any entries in the d locations are stored in the d (diagonal) 2046 submatrix, and any entries in the o locations are stored in the 2047 o (off-diagonal) submatrix. Note that the d matrix is stored in 2048 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2049 2050 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2051 plus the diagonal part of the d matrix, 2052 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2053 In general, for PDE problems in which most nonzeros are near the diagonal, 2054 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2055 or you will get TERRIBLE performance; see the users' manual chapter on 2056 matrices. 2057 2058 Level: intermediate 2059 2060 .keywords: matrix, block, aij, compressed row, sparse, parallel 2061 2062 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2063 @*/ 2064 2065 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2066 { 2067 PetscErrorCode ierr; 2068 PetscMPIInt size; 2069 2070 PetscFunctionBegin; 2071 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2072 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2073 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2074 if (size > 1) { 2075 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2076 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2077 } else { 2078 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2079 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2080 } 2081 PetscFunctionReturn(0); 2082 } 2083 2084 2085 #undef __FUNCT__ 2086 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2087 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2088 { 2089 Mat mat; 2090 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2091 PetscErrorCode ierr; 2092 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2093 PetscScalar *array; 2094 2095 PetscFunctionBegin; 2096 *newmat = 0; 2097 2098 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2099 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2100 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2101 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2102 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2103 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2104 2105 mat->factortype = matin->factortype; 2106 mat->preallocated = PETSC_TRUE; 2107 mat->assembled = PETSC_TRUE; 2108 mat->insertmode = NOT_SET_VALUES; 2109 2110 a = (Mat_MPISBAIJ*)mat->data; 2111 a->bs2 = oldmat->bs2; 2112 a->mbs = oldmat->mbs; 2113 a->nbs = oldmat->nbs; 2114 a->Mbs = oldmat->Mbs; 2115 a->Nbs = oldmat->Nbs; 2116 2117 2118 a->size = oldmat->size; 2119 a->rank = oldmat->rank; 2120 a->donotstash = oldmat->donotstash; 2121 a->roworiented = oldmat->roworiented; 2122 a->rowindices = 0; 2123 a->rowvalues = 0; 2124 a->getrowactive = PETSC_FALSE; 2125 a->barray = 0; 2126 a->rstartbs = oldmat->rstartbs; 2127 a->rendbs = oldmat->rendbs; 2128 a->cstartbs = oldmat->cstartbs; 2129 a->cendbs = oldmat->cendbs; 2130 2131 /* hash table stuff */ 2132 a->ht = 0; 2133 a->hd = 0; 2134 a->ht_size = 0; 2135 a->ht_flag = oldmat->ht_flag; 2136 a->ht_fact = oldmat->ht_fact; 2137 a->ht_total_ct = 0; 2138 a->ht_insert_ct = 0; 2139 2140 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2141 if (oldmat->colmap) { 2142 #if defined(PETSC_USE_CTABLE) 2143 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2144 #else 2145 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2146 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2147 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2148 #endif 2149 } else a->colmap = 0; 2150 2151 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2152 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2153 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2154 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2155 } else a->garray = 0; 2156 2157 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2158 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2159 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2160 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2161 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2162 2163 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2164 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2165 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2166 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2167 2168 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2169 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2170 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2171 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2172 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2173 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2174 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2175 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2176 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2177 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2178 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2179 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2180 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2181 2182 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2183 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2184 a->sMvctx = oldmat->sMvctx; 2185 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2186 2187 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2188 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2189 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2190 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2191 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2192 *newmat = mat; 2193 PetscFunctionReturn(0); 2194 } 2195 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatLoad_MPISBAIJ" 2198 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2199 { 2200 PetscErrorCode ierr; 2201 PetscInt i,nz,j,rstart,rend; 2202 PetscScalar *vals,*buf; 2203 MPI_Comm comm; 2204 MPI_Status status; 2205 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2206 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2207 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2208 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2209 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2210 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2211 int fd; 2212 2213 PetscFunctionBegin; 2214 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2215 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2216 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2217 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2218 if (bs < 0) bs = 1; 2219 2220 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2221 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2222 if (!rank) { 2223 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2224 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2225 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2226 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2227 } 2228 2229 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2230 2231 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2232 M = header[1]; 2233 N = header[2]; 2234 2235 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2236 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2237 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2238 2239 /* If global sizes are set, check if they are consistent with that given in the file */ 2240 if (sizesset) { 2241 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2242 } 2243 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2244 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2245 2246 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2247 2248 /* 2249 This code adds extra rows to make sure the number of rows is 2250 divisible by the blocksize 2251 */ 2252 Mbs = M/bs; 2253 extra_rows = bs - M + bs*(Mbs); 2254 if (extra_rows == bs) extra_rows = 0; 2255 else Mbs++; 2256 if (extra_rows &&!rank) { 2257 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2258 } 2259 2260 /* determine ownership of all rows */ 2261 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2262 mbs = Mbs/size + ((Mbs % size) > rank); 2263 m = mbs*bs; 2264 } else { /* User Set */ 2265 m = newmat->rmap->n; 2266 mbs = m/bs; 2267 } 2268 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2269 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2270 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2271 rowners[0] = 0; 2272 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2273 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2274 rstart = rowners[rank]; 2275 rend = rowners[rank+1]; 2276 2277 /* distribute row lengths to all processors */ 2278 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2279 if (!rank) { 2280 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2281 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2282 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2283 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2284 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2285 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2286 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2287 } else { 2288 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2289 } 2290 2291 if (!rank) { /* procs[0] */ 2292 /* calculate the number of nonzeros on each processor */ 2293 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2294 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2295 for (i=0; i<size; i++) { 2296 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2297 procsnz[i] += rowlengths[j]; 2298 } 2299 } 2300 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2301 2302 /* determine max buffer needed and allocate it */ 2303 maxnz = 0; 2304 for (i=0; i<size; i++) { 2305 maxnz = PetscMax(maxnz,procsnz[i]); 2306 } 2307 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2308 2309 /* read in my part of the matrix column indices */ 2310 nz = procsnz[0]; 2311 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2312 mycols = ibuf; 2313 if (size == 1) nz -= extra_rows; 2314 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2315 if (size == 1) { 2316 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2317 } 2318 2319 /* read in every ones (except the last) and ship off */ 2320 for (i=1; i<size-1; i++) { 2321 nz = procsnz[i]; 2322 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2323 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2324 } 2325 /* read in the stuff for the last proc */ 2326 if (size != 1) { 2327 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2328 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2329 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2330 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2331 } 2332 ierr = PetscFree(cols);CHKERRQ(ierr); 2333 } else { /* procs[i], i>0 */ 2334 /* determine buffer space needed for message */ 2335 nz = 0; 2336 for (i=0; i<m; i++) nz += locrowlens[i]; 2337 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2338 mycols = ibuf; 2339 /* receive message of column indices*/ 2340 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2341 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2342 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2343 } 2344 2345 /* loop over local rows, determining number of off diagonal entries */ 2346 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2347 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2348 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2349 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2350 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2351 rowcount = 0; 2352 nzcount = 0; 2353 for (i=0; i<mbs; i++) { 2354 dcount = 0; 2355 odcount = 0; 2356 for (j=0; j<bs; j++) { 2357 kmax = locrowlens[rowcount]; 2358 for (k=0; k<kmax; k++) { 2359 tmp = mycols[nzcount++]/bs; /* block col. index */ 2360 if (!mask[tmp]) { 2361 mask[tmp] = 1; 2362 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2363 else masked1[dcount++] = tmp; /* entry in diag portion */ 2364 } 2365 } 2366 rowcount++; 2367 } 2368 2369 dlens[i] = dcount; /* d_nzz[i] */ 2370 odlens[i] = odcount; /* o_nzz[i] */ 2371 2372 /* zero out the mask elements we set */ 2373 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2374 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2375 } 2376 if (!sizesset) { 2377 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2378 } 2379 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2380 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2381 2382 if (!rank) { 2383 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2384 /* read in my part of the matrix numerical values */ 2385 nz = procsnz[0]; 2386 vals = buf; 2387 mycols = ibuf; 2388 if (size == 1) nz -= extra_rows; 2389 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2390 if (size == 1) { 2391 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2392 } 2393 2394 /* insert into matrix */ 2395 jj = rstart*bs; 2396 for (i=0; i<m; i++) { 2397 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2398 mycols += locrowlens[i]; 2399 vals += locrowlens[i]; 2400 jj++; 2401 } 2402 2403 /* read in other processors (except the last one) and ship out */ 2404 for (i=1; i<size-1; i++) { 2405 nz = procsnz[i]; 2406 vals = buf; 2407 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2408 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2409 } 2410 /* the last proc */ 2411 if (size != 1) { 2412 nz = procsnz[i] - extra_rows; 2413 vals = buf; 2414 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2415 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2416 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2417 } 2418 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2419 2420 } else { 2421 /* receive numeric values */ 2422 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2423 2424 /* receive message of values*/ 2425 vals = buf; 2426 mycols = ibuf; 2427 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2428 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2429 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2430 2431 /* insert into matrix */ 2432 jj = rstart*bs; 2433 for (i=0; i<m; i++) { 2434 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2435 mycols += locrowlens[i]; 2436 vals += locrowlens[i]; 2437 jj++; 2438 } 2439 } 2440 2441 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2442 ierr = PetscFree(buf);CHKERRQ(ierr); 2443 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2444 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2445 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2446 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2447 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2454 /*XXXXX@ 2455 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2456 2457 Input Parameters: 2458 . mat - the matrix 2459 . fact - factor 2460 2461 Not Collective on Mat, each process can have a different hash factor 2462 2463 Level: advanced 2464 2465 Notes: 2466 This can also be set by the command line option: -mat_use_hash_table fact 2467 2468 .keywords: matrix, hashtable, factor, HT 2469 2470 .seealso: MatSetOption() 2471 @XXXXX*/ 2472 2473 2474 #undef __FUNCT__ 2475 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2476 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2477 { 2478 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2479 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2480 PetscReal atmp; 2481 PetscReal *work,*svalues,*rvalues; 2482 PetscErrorCode ierr; 2483 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2484 PetscMPIInt rank,size; 2485 PetscInt *rowners_bs,dest,count,source; 2486 PetscScalar *va; 2487 MatScalar *ba; 2488 MPI_Status stat; 2489 2490 PetscFunctionBegin; 2491 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2492 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2493 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2494 2495 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2496 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2497 2498 bs = A->rmap->bs; 2499 mbs = a->mbs; 2500 Mbs = a->Mbs; 2501 ba = b->a; 2502 bi = b->i; 2503 bj = b->j; 2504 2505 /* find ownerships */ 2506 rowners_bs = A->rmap->range; 2507 2508 /* each proc creates an array to be distributed */ 2509 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2510 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2511 2512 /* row_max for B */ 2513 if (rank != size-1) { 2514 for (i=0; i<mbs; i++) { 2515 ncols = bi[1] - bi[0]; bi++; 2516 brow = bs*i; 2517 for (j=0; j<ncols; j++) { 2518 bcol = bs*(*bj); 2519 for (kcol=0; kcol<bs; kcol++) { 2520 col = bcol + kcol; /* local col index */ 2521 col += rowners_bs[rank+1]; /* global col index */ 2522 for (krow=0; krow<bs; krow++) { 2523 atmp = PetscAbsScalar(*ba); ba++; 2524 row = brow + krow; /* local row index */ 2525 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2526 if (work[col] < atmp) work[col] = atmp; 2527 } 2528 } 2529 bj++; 2530 } 2531 } 2532 2533 /* send values to its owners */ 2534 for (dest=rank+1; dest<size; dest++) { 2535 svalues = work + rowners_bs[dest]; 2536 count = rowners_bs[dest+1]-rowners_bs[dest]; 2537 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2538 } 2539 } 2540 2541 /* receive values */ 2542 if (rank) { 2543 rvalues = work; 2544 count = rowners_bs[rank+1]-rowners_bs[rank]; 2545 for (source=0; source<rank; source++) { 2546 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2547 /* process values */ 2548 for (i=0; i<count; i++) { 2549 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2550 } 2551 } 2552 } 2553 2554 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2555 ierr = PetscFree(work);CHKERRQ(ierr); 2556 PetscFunctionReturn(0); 2557 } 2558 2559 #undef __FUNCT__ 2560 #define __FUNCT__ "MatSOR_MPISBAIJ" 2561 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2562 { 2563 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2564 PetscErrorCode ierr; 2565 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2566 PetscScalar *x,*ptr,*from; 2567 Vec bb1; 2568 const PetscScalar *b; 2569 2570 PetscFunctionBegin; 2571 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2572 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2573 2574 if (flag == SOR_APPLY_UPPER) { 2575 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2576 PetscFunctionReturn(0); 2577 } 2578 2579 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2580 if (flag & SOR_ZERO_INITIAL_GUESS) { 2581 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2582 its--; 2583 } 2584 2585 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2586 while (its--) { 2587 2588 /* lower triangular part: slvec0b = - B^T*xx */ 2589 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2590 2591 /* copy xx into slvec0a */ 2592 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2593 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2594 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2595 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2596 2597 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2598 2599 /* copy bb into slvec1a */ 2600 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2601 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2602 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2603 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2604 2605 /* set slvec1b = 0 */ 2606 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2607 2608 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2609 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2610 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2611 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2612 2613 /* upper triangular part: bb1 = bb1 - B*x */ 2614 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2615 2616 /* local diagonal sweep */ 2617 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2618 } 2619 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2620 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2621 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2622 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2624 } else if (flag & SOR_EISENSTAT) { 2625 Vec xx1; 2626 PetscBool hasop; 2627 const PetscScalar *diag; 2628 PetscScalar *sl,scale = (omega - 2.0)/omega; 2629 PetscInt i,n; 2630 2631 if (!mat->xx1) { 2632 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2633 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2634 } 2635 xx1 = mat->xx1; 2636 bb1 = mat->bb1; 2637 2638 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2639 2640 if (!mat->diag) { 2641 /* this is wrong for same matrix with new nonzero values */ 2642 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2643 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2644 } 2645 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2646 2647 if (hasop) { 2648 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2649 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2650 } else { 2651 /* 2652 These two lines are replaced by code that may be a bit faster for a good compiler 2653 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2654 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2655 */ 2656 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2657 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2658 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2659 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2660 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2661 if (omega == 1.0) { 2662 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2663 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2664 } else { 2665 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2666 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2667 } 2668 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2669 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2670 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2671 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2672 } 2673 2674 /* multiply off-diagonal portion of matrix */ 2675 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2676 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2677 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2678 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2679 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2680 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2681 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2682 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2683 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2684 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2685 2686 /* local sweep */ 2687 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2688 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2689 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2690 PetscFunctionReturn(0); 2691 } 2692 2693 #undef __FUNCT__ 2694 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2695 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2696 { 2697 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2698 PetscErrorCode ierr; 2699 Vec lvec1,bb1; 2700 2701 PetscFunctionBegin; 2702 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2703 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2704 2705 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2706 if (flag & SOR_ZERO_INITIAL_GUESS) { 2707 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2708 its--; 2709 } 2710 2711 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2712 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2713 while (its--) { 2714 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2715 2716 /* lower diagonal part: bb1 = bb - B^T*xx */ 2717 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2718 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2719 2720 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2721 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2722 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2723 2724 /* upper diagonal part: bb1 = bb1 - B*x */ 2725 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2726 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2727 2728 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2729 2730 /* diagonal sweep */ 2731 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2732 } 2733 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2734 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2735 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2736 PetscFunctionReturn(0); 2737 } 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2741 /*@ 2742 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2743 CSR format the local rows. 2744 2745 Collective on MPI_Comm 2746 2747 Input Parameters: 2748 + comm - MPI communicator 2749 . bs - the block size, only a block size of 1 is supported 2750 . m - number of local rows (Cannot be PETSC_DECIDE) 2751 . n - This value should be the same as the local size used in creating the 2752 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2753 calculated if N is given) For square matrices n is almost always m. 2754 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2755 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2756 . i - row indices 2757 . j - column indices 2758 - a - matrix values 2759 2760 Output Parameter: 2761 . mat - the matrix 2762 2763 Level: intermediate 2764 2765 Notes: 2766 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2767 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2768 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2769 2770 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2771 2772 .keywords: matrix, aij, compressed row, sparse, parallel 2773 2774 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2775 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2776 @*/ 2777 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2778 { 2779 PetscErrorCode ierr; 2780 2781 2782 PetscFunctionBegin; 2783 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2784 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2785 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2786 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2787 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2788 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2789 PetscFunctionReturn(0); 2790 } 2791 2792 2793 #undef __FUNCT__ 2794 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2795 /*@C 2796 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2797 (the default parallel PETSc format). 2798 2799 Collective on MPI_Comm 2800 2801 Input Parameters: 2802 + B - the matrix 2803 . bs - the block size 2804 . i - the indices into j for the start of each local row (starts with zero) 2805 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2806 - v - optional values in the matrix 2807 2808 Level: developer 2809 2810 .keywords: matrix, aij, compressed row, sparse, parallel 2811 2812 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2813 @*/ 2814 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2815 { 2816 PetscErrorCode ierr; 2817 2818 PetscFunctionBegin; 2819 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2820 PetscFunctionReturn(0); 2821 } 2822 2823 #undef __FUNCT__ 2824 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 2825 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2826 { 2827 PetscErrorCode ierr; 2828 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2829 PetscInt *indx; 2830 PetscScalar *values; 2831 2832 PetscFunctionBegin; 2833 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2834 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2835 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2836 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 2837 PetscInt *bindx,rmax=a->rmax,j; 2838 2839 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2840 mbs = m/bs; Nbs = N/cbs; 2841 if (n == PETSC_DECIDE) { 2842 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 2843 } 2844 /* Check sum(n) = Nbs */ 2845 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2846 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 2847 2848 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2849 rstart -= mbs; 2850 2851 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 2852 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 2853 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2854 for (i=0; i<mbs; i++) { 2855 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 2856 nnz = nnz/bs; 2857 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2858 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 2859 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 2860 } 2861 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2862 ierr = PetscFree(bindx);CHKERRQ(ierr); 2863 2864 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2865 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2866 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 2867 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 2868 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2869 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2870 } 2871 2872 /* numeric phase */ 2873 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2874 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 2875 2876 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2877 for (i=0; i<m; i++) { 2878 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2879 Ii = i + rstart; 2880 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2881 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2882 } 2883 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2884 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2885 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2886 PetscFunctionReturn(0); 2887 } 2888