1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 25 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 32 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 38 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 39 { 40 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 41 PetscErrorCode ierr; 42 43 PetscFunctionBegin; 44 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 45 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 46 PetscFunctionReturn(0); 47 } 48 49 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 50 { \ 51 \ 52 brow = row/bs; \ 53 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 54 rmax = aimax[brow]; nrow = ailen[brow]; \ 55 bcol = col/bs; \ 56 ridx = row % bs; cidx = col % bs; \ 57 low = 0; high = nrow; \ 58 while (high-low > 3) { \ 59 t = (low+high)/2; \ 60 if (rp[t] > bcol) high = t; \ 61 else low = t; \ 62 } \ 63 for (_i=low; _i<high; _i++) { \ 64 if (rp[_i] > bcol) break; \ 65 if (rp[_i] == bcol) { \ 66 bap = ap + bs2*_i + bs*cidx + ridx; \ 67 if (addv == ADD_VALUES) *bap += value; \ 68 else *bap = value; \ 69 goto a_noinsert; \ 70 } \ 71 } \ 72 if (a->nonew == 1) goto a_noinsert; \ 73 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 74 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 75 N = nrow++ - 1; \ 76 /* shift up all the later entries in this row */ \ 77 for (ii=N; ii>=_i; ii--) { \ 78 rp[ii+1] = rp[ii]; \ 79 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 80 } \ 81 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 82 rp[_i] = bcol; \ 83 ap[bs2*_i + bs*cidx + ridx] = value; \ 84 A->nonzerostate++;\ 85 a_noinsert:; \ 86 ailen[brow] = nrow; \ 87 } 88 89 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 90 { \ 91 brow = row/bs; \ 92 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 93 rmax = bimax[brow]; nrow = bilen[brow]; \ 94 bcol = col/bs; \ 95 ridx = row % bs; cidx = col % bs; \ 96 low = 0; high = nrow; \ 97 while (high-low > 3) { \ 98 t = (low+high)/2; \ 99 if (rp[t] > bcol) high = t; \ 100 else low = t; \ 101 } \ 102 for (_i=low; _i<high; _i++) { \ 103 if (rp[_i] > bcol) break; \ 104 if (rp[_i] == bcol) { \ 105 bap = ap + bs2*_i + bs*cidx + ridx; \ 106 if (addv == ADD_VALUES) *bap += value; \ 107 else *bap = value; \ 108 goto b_noinsert; \ 109 } \ 110 } \ 111 if (b->nonew == 1) goto b_noinsert; \ 112 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 113 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 114 N = nrow++ - 1; \ 115 /* shift up all the later entries in this row */ \ 116 for (ii=N; ii>=_i; ii--) { \ 117 rp[ii+1] = rp[ii]; \ 118 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 119 } \ 120 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 121 rp[_i] = bcol; \ 122 ap[bs2*_i + bs*cidx + ridx] = value; \ 123 B->nonzerostate++;\ 124 b_noinsert:; \ 125 bilen[brow] = nrow; \ 126 } 127 128 /* Only add/insert a(i,j) with i<=j (blocks). 129 Any a(i,j) with i>j input by user is ingored. 130 */ 131 #undef __FUNCT__ 132 #define __FUNCT__ "MatSetValues_MPISBAIJ" 133 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 134 { 135 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 136 MatScalar value; 137 PetscBool roworiented = baij->roworiented; 138 PetscErrorCode ierr; 139 PetscInt i,j,row,col; 140 PetscInt rstart_orig=mat->rmap->rstart; 141 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 142 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 143 144 /* Some Variables required in the macro */ 145 Mat A = baij->A; 146 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 147 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 148 MatScalar *aa =a->a; 149 150 Mat B = baij->B; 151 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 152 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 153 MatScalar *ba =b->a; 154 155 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 156 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 157 MatScalar *ap,*bap; 158 159 /* for stash */ 160 PetscInt n_loc, *in_loc = NULL; 161 MatScalar *v_loc = NULL; 162 163 PetscFunctionBegin; 164 if (v) PetscValidScalarPointer(v,6); 165 if (!baij->donotstash) { 166 if (n > baij->n_loc) { 167 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 168 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 169 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 170 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 171 172 baij->n_loc = n; 173 } 174 in_loc = baij->in_loc; 175 v_loc = baij->v_loc; 176 } 177 178 for (i=0; i<m; i++) { 179 if (im[i] < 0) continue; 180 #if defined(PETSC_USE_DEBUG) 181 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 182 #endif 183 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 184 row = im[i] - rstart_orig; /* local row index */ 185 for (j=0; j<n; j++) { 186 if (im[i]/bs > in[j]/bs) { 187 if (a->ignore_ltriangular) { 188 continue; /* ignore lower triangular blocks */ 189 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 190 } 191 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 192 col = in[j] - cstart_orig; /* local col index */ 193 brow = row/bs; bcol = col/bs; 194 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 195 if (roworiented) value = v[i*n+j]; 196 else value = v[i+j*m]; 197 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 198 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 199 } else if (in[j] < 0) continue; 200 #if defined(PETSC_USE_DEBUG) 201 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 202 #endif 203 else { /* off-diag entry (B) */ 204 if (mat->was_assembled) { 205 if (!baij->colmap) { 206 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 207 } 208 #if defined(PETSC_USE_CTABLE) 209 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 210 col = col - 1; 211 #else 212 col = baij->colmap[in[j]/bs] - 1; 213 #endif 214 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 215 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 216 col = in[j]; 217 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 218 B = baij->B; 219 b = (Mat_SeqBAIJ*)(B)->data; 220 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 221 ba = b->a; 222 } else col += in[j]%bs; 223 } else col = in[j]; 224 if (roworiented) value = v[i*n+j]; 225 else value = v[i+j*m]; 226 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } 229 } 230 } else { /* off processor entry */ 231 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 232 if (!baij->donotstash) { 233 mat->assembled = PETSC_FALSE; 234 n_loc = 0; 235 for (j=0; j<n; j++) { 236 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 237 in_loc[n_loc] = in[j]; 238 if (roworiented) { 239 v_loc[n_loc] = v[i*n+j]; 240 } else { 241 v_loc[n_loc] = v[j*m+i]; 242 } 243 n_loc++; 244 } 245 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 246 } 247 } 248 } 249 PetscFunctionReturn(0); 250 } 251 252 #undef __FUNCT__ 253 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 254 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 255 { 256 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 257 const MatScalar *value; 258 MatScalar *barray =baij->barray; 259 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 260 PetscErrorCode ierr; 261 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 262 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 263 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 264 265 PetscFunctionBegin; 266 if (!barray) { 267 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 268 baij->barray = barray; 269 } 270 271 if (roworiented) { 272 stepval = (n-1)*bs; 273 } else { 274 stepval = (m-1)*bs; 275 } 276 for (i=0; i<m; i++) { 277 if (im[i] < 0) continue; 278 #if defined(PETSC_USE_DEBUG) 279 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 280 #endif 281 if (im[i] >= rstart && im[i] < rend) { 282 row = im[i] - rstart; 283 for (j=0; j<n; j++) { 284 if (im[i] > in[j]) { 285 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 286 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 287 } 288 /* If NumCol = 1 then a copy is not required */ 289 if ((roworiented) && (n == 1)) { 290 barray = (MatScalar*) v + i*bs2; 291 } else if ((!roworiented) && (m == 1)) { 292 barray = (MatScalar*) v + j*bs2; 293 } else { /* Here a copy is required */ 294 if (roworiented) { 295 value = v + i*(stepval+bs)*bs + j*bs; 296 } else { 297 value = v + j*(stepval+bs)*bs + i*bs; 298 } 299 for (ii=0; ii<bs; ii++,value+=stepval) { 300 for (jj=0; jj<bs; jj++) { 301 *barray++ = *value++; 302 } 303 } 304 barray -=bs2; 305 } 306 307 if (in[j] >= cstart && in[j] < cend) { 308 col = in[j] - cstart; 309 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 310 } else if (in[j] < 0) continue; 311 #if defined(PETSC_USE_DEBUG) 312 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 313 #endif 314 else { 315 if (mat->was_assembled) { 316 if (!baij->colmap) { 317 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 318 } 319 320 #if defined(PETSC_USE_DEBUG) 321 #if defined(PETSC_USE_CTABLE) 322 { PetscInt data; 323 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 324 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 325 } 326 #else 327 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 328 #endif 329 #endif 330 #if defined(PETSC_USE_CTABLE) 331 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 332 col = (col - 1)/bs; 333 #else 334 col = (baij->colmap[in[j]] - 1)/bs; 335 #endif 336 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 337 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 338 col = in[j]; 339 } 340 } else col = in[j]; 341 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 342 } 343 } 344 } else { 345 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 346 if (!baij->donotstash) { 347 if (roworiented) { 348 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 349 } else { 350 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 351 } 352 } 353 } 354 } 355 PetscFunctionReturn(0); 356 } 357 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatGetValues_MPISBAIJ" 360 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 361 { 362 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 365 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 366 367 PetscFunctionBegin; 368 for (i=0; i<m; i++) { 369 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 370 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 371 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 372 row = idxm[i] - bsrstart; 373 for (j=0; j<n; j++) { 374 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 375 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 376 if (idxn[j] >= bscstart && idxn[j] < bscend) { 377 col = idxn[j] - bscstart; 378 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 379 } else { 380 if (!baij->colmap) { 381 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 382 } 383 #if defined(PETSC_USE_CTABLE) 384 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 385 data--; 386 #else 387 data = baij->colmap[idxn[j]/bs]-1; 388 #endif 389 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 390 else { 391 col = data + idxn[j]%bs; 392 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 393 } 394 } 395 } 396 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 397 } 398 PetscFunctionReturn(0); 399 } 400 401 #undef __FUNCT__ 402 #define __FUNCT__ "MatNorm_MPISBAIJ" 403 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 404 { 405 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 406 PetscErrorCode ierr; 407 PetscReal sum[2],*lnorm2; 408 409 PetscFunctionBegin; 410 if (baij->size == 1) { 411 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 412 } else { 413 if (type == NORM_FROBENIUS) { 414 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 415 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 416 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 417 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 418 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 419 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 420 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 421 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 422 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 423 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 424 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 425 PetscReal *rsum,*rsum2,vabs; 426 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 427 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 428 MatScalar *v; 429 430 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 431 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 432 /* Amat */ 433 v = amat->a; jj = amat->j; 434 for (brow=0; brow<mbs; brow++) { 435 grow = bs*(rstart + brow); 436 nz = amat->i[brow+1] - amat->i[brow]; 437 for (bcol=0; bcol<nz; bcol++) { 438 gcol = bs*(rstart + *jj); jj++; 439 for (col=0; col<bs; col++) { 440 for (row=0; row<bs; row++) { 441 vabs = PetscAbsScalar(*v); v++; 442 rsum[gcol+col] += vabs; 443 /* non-diagonal block */ 444 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 445 } 446 } 447 } 448 } 449 /* Bmat */ 450 v = bmat->a; jj = bmat->j; 451 for (brow=0; brow<mbs; brow++) { 452 grow = bs*(rstart + brow); 453 nz = bmat->i[brow+1] - bmat->i[brow]; 454 for (bcol=0; bcol<nz; bcol++) { 455 gcol = bs*garray[*jj]; jj++; 456 for (col=0; col<bs; col++) { 457 for (row=0; row<bs; row++) { 458 vabs = PetscAbsScalar(*v); v++; 459 rsum[gcol+col] += vabs; 460 rsum[grow+row] += vabs; 461 } 462 } 463 } 464 } 465 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 466 *norm = 0.0; 467 for (col=0; col<mat->cmap->N; col++) { 468 if (rsum2[col] > *norm) *norm = rsum2[col]; 469 } 470 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 471 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 472 } 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 478 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 479 { 480 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 481 PetscErrorCode ierr; 482 PetscInt nstash,reallocs; 483 InsertMode addv; 484 485 PetscFunctionBegin; 486 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 487 488 /* make sure all processors are either in INSERTMODE or ADDMODE */ 489 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 490 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 491 mat->insertmode = addv; /* in case this processor had no cache */ 492 493 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 494 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 495 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 496 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 497 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 498 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNCT__ 503 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 504 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 505 { 506 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 507 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 508 PetscErrorCode ierr; 509 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 510 PetscInt *row,*col; 511 PetscBool other_disassembled; 512 PetscMPIInt n; 513 PetscBool r1,r2,r3; 514 MatScalar *val; 515 InsertMode addv = mat->insertmode; 516 517 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 518 PetscFunctionBegin; 519 if (!baij->donotstash && !mat->nooffprocentries) { 520 while (1) { 521 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 522 if (!flg) break; 523 524 for (i=0; i<n;) { 525 /* Now identify the consecutive vals belonging to the same row */ 526 for (j=i,rstart=row[j]; j<n; j++) { 527 if (row[j] != rstart) break; 528 } 529 if (j < n) ncols = j-i; 530 else ncols = n-i; 531 /* Now assemble all these values with a single function call */ 532 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 533 i = j; 534 } 535 } 536 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 537 /* Now process the block-stash. Since the values are stashed column-oriented, 538 set the roworiented flag to column oriented, and after MatSetValues() 539 restore the original flags */ 540 r1 = baij->roworiented; 541 r2 = a->roworiented; 542 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 543 544 baij->roworiented = PETSC_FALSE; 545 a->roworiented = PETSC_FALSE; 546 547 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 548 while (1) { 549 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 550 if (!flg) break; 551 552 for (i=0; i<n;) { 553 /* Now identify the consecutive vals belonging to the same row */ 554 for (j=i,rstart=row[j]; j<n; j++) { 555 if (row[j] != rstart) break; 556 } 557 if (j < n) ncols = j-i; 558 else ncols = n-i; 559 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 560 i = j; 561 } 562 } 563 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 564 565 baij->roworiented = r1; 566 a->roworiented = r2; 567 568 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 569 } 570 571 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 572 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 573 574 /* determine if any processor has disassembled, if so we must 575 also disassemble ourselfs, in order that we may reassemble. */ 576 /* 577 if nonzero structure of submatrix B cannot change then we know that 578 no processor disassembled thus we can skip this stuff 579 */ 580 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 581 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 582 if (mat->was_assembled && !other_disassembled) { 583 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 584 } 585 } 586 587 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 588 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 589 } 590 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 591 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 592 593 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 594 595 baij->rowvalues = 0; 596 597 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 598 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 599 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 600 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 601 } 602 PetscFunctionReturn(0); 603 } 604 605 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 606 #include <petscdraw.h> 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 609 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 610 { 611 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 612 PetscErrorCode ierr; 613 PetscInt bs = mat->rmap->bs; 614 PetscMPIInt size = baij->size,rank = baij->rank; 615 PetscBool iascii,isdraw; 616 PetscViewer sviewer; 617 PetscViewerFormat format; 618 619 PetscFunctionBegin; 620 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 621 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 622 if (iascii) { 623 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 624 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 625 MatInfo info; 626 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 627 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 628 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 630 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 631 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 632 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 633 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 634 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 635 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 636 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 637 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 638 PetscFunctionReturn(0); 639 } else if (format == PETSC_VIEWER_ASCII_INFO) { 640 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 643 PetscFunctionReturn(0); 644 } 645 } 646 647 if (isdraw) { 648 PetscDraw draw; 649 PetscBool isnull; 650 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 651 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 652 } 653 654 if (size == 1) { 655 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 656 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 657 } else { 658 /* assemble the entire matrix onto first processor. */ 659 Mat A; 660 Mat_SeqSBAIJ *Aloc; 661 Mat_SeqBAIJ *Bloc; 662 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 663 MatScalar *a; 664 665 /* Should this be the same type as mat? */ 666 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 667 if (!rank) { 668 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 669 } else { 670 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 671 } 672 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 673 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 674 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 675 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 676 677 /* copy over the A part */ 678 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 679 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 680 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 681 682 for (i=0; i<mbs; i++) { 683 rvals[0] = bs*(baij->rstartbs + i); 684 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 685 for (j=ai[i]; j<ai[i+1]; j++) { 686 col = (baij->cstartbs+aj[j])*bs; 687 for (k=0; k<bs; k++) { 688 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 689 col++; 690 a += bs; 691 } 692 } 693 } 694 /* copy over the B part */ 695 Bloc = (Mat_SeqBAIJ*)baij->B->data; 696 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 697 for (i=0; i<mbs; i++) { 698 699 rvals[0] = bs*(baij->rstartbs + i); 700 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 701 for (j=ai[i]; j<ai[i+1]; j++) { 702 col = baij->garray[aj[j]]*bs; 703 for (k=0; k<bs; k++) { 704 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 705 col++; 706 a += bs; 707 } 708 } 709 } 710 ierr = PetscFree(rvals);CHKERRQ(ierr); 711 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 712 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 713 /* 714 Everyone has to call to draw the matrix since the graphics waits are 715 synchronized across all processors that share the PetscDraw object 716 */ 717 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 718 if (!rank) { 719 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 720 /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/ 721 ierr = PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);CHKERRQ(ierr); 722 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 723 } 724 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 725 ierr = MatDestroy(&A);CHKERRQ(ierr); 726 } 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatView_MPISBAIJ" 732 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 733 { 734 PetscErrorCode ierr; 735 PetscBool iascii,isdraw,issocket,isbinary; 736 737 PetscFunctionBegin; 738 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 739 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 740 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 741 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 742 if (iascii || isdraw || issocket || isbinary) { 743 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 744 } 745 PetscFunctionReturn(0); 746 } 747 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatDestroy_MPISBAIJ" 750 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 751 { 752 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 753 PetscErrorCode ierr; 754 755 PetscFunctionBegin; 756 #if defined(PETSC_USE_LOG) 757 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 758 #endif 759 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 760 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 761 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 762 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 763 #if defined(PETSC_USE_CTABLE) 764 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 765 #else 766 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 767 #endif 768 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 769 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 770 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 771 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 772 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 773 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 774 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 775 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 776 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 777 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 778 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 779 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 780 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 781 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 782 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 783 #if defined(PETSC_USE_REAL_MAT_SINGLE) 784 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 785 #endif 786 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 787 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 788 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 789 ierr = PetscFree(mat->data);CHKERRQ(ierr); 790 791 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 792 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 793 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 794 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 795 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 796 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 802 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 803 { 804 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 805 PetscErrorCode ierr; 806 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 807 PetscScalar *x,*from; 808 809 PetscFunctionBegin; 810 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 811 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 812 813 /* diagonal part */ 814 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 815 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 816 817 /* subdiagonal part */ 818 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 819 820 /* copy x into the vec slvec0 */ 821 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 822 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 823 824 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 825 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 826 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 827 828 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 829 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 830 /* supperdiagonal part */ 831 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 832 PetscFunctionReturn(0); 833 } 834 835 #undef __FUNCT__ 836 #define __FUNCT__ "MatMult_MPISBAIJ" 837 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 838 { 839 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 840 PetscErrorCode ierr; 841 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 842 PetscScalar *x,*from; 843 844 PetscFunctionBegin; 845 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 846 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 847 848 /* diagonal part */ 849 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 850 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 851 852 /* subdiagonal part */ 853 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 854 855 /* copy x into the vec slvec0 */ 856 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 857 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 858 859 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 860 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 861 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 862 863 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 864 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 865 /* supperdiagonal part */ 866 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 872 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 873 { 874 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 875 PetscErrorCode ierr; 876 PetscInt nt; 877 878 PetscFunctionBegin; 879 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 880 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 881 882 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 883 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 884 885 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 /* do diagonal part */ 887 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 888 /* do supperdiagonal part */ 889 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 891 /* do subdiagonal part */ 892 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 893 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 894 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 895 PetscFunctionReturn(0); 896 } 897 898 #undef __FUNCT__ 899 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 900 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 901 { 902 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 903 PetscErrorCode ierr; 904 PetscInt mbs=a->mbs,bs=A->rmap->bs; 905 PetscScalar *x,*from,zero=0.0; 906 907 PetscFunctionBegin; 908 /* 909 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 910 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 911 */ 912 /* diagonal part */ 913 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 914 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 915 916 /* subdiagonal part */ 917 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 918 919 /* copy x into the vec slvec0 */ 920 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 921 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 922 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 923 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 924 925 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 926 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 927 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 929 /* supperdiagonal part */ 930 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 936 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 937 { 938 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 /* do diagonal part */ 944 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 945 /* do supperdiagonal part */ 946 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 948 949 /* do subdiagonal part */ 950 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 951 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 952 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } 955 956 /* 957 This only works correctly for square matrices where the subblock A->A is the 958 diagonal block 959 */ 960 #undef __FUNCT__ 961 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 962 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 963 { 964 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 965 PetscErrorCode ierr; 966 967 PetscFunctionBegin; 968 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 969 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 973 #undef __FUNCT__ 974 #define __FUNCT__ "MatScale_MPISBAIJ" 975 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 976 { 977 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 978 PetscErrorCode ierr; 979 980 PetscFunctionBegin; 981 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 982 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } 985 986 #undef __FUNCT__ 987 #define __FUNCT__ "MatGetRow_MPISBAIJ" 988 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 989 { 990 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 991 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 992 PetscErrorCode ierr; 993 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 994 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 995 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 996 997 PetscFunctionBegin; 998 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 999 mat->getrowactive = PETSC_TRUE; 1000 1001 if (!mat->rowvalues && (idx || v)) { 1002 /* 1003 allocate enough space to hold information from the longest row. 1004 */ 1005 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1006 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1007 PetscInt max = 1,mbs = mat->mbs,tmp; 1008 for (i=0; i<mbs; i++) { 1009 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1010 if (max < tmp) max = tmp; 1011 } 1012 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1013 } 1014 1015 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1016 lrow = row - brstart; /* local row index */ 1017 1018 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1019 if (!v) {pvA = 0; pvB = 0;} 1020 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1021 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1022 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1023 nztot = nzA + nzB; 1024 1025 cmap = mat->garray; 1026 if (v || idx) { 1027 if (nztot) { 1028 /* Sort by increasing column numbers, assuming A and B already sorted */ 1029 PetscInt imark = -1; 1030 if (v) { 1031 *v = v_p = mat->rowvalues; 1032 for (i=0; i<nzB; i++) { 1033 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1034 else break; 1035 } 1036 imark = i; 1037 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1038 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1039 } 1040 if (idx) { 1041 *idx = idx_p = mat->rowindices; 1042 if (imark > -1) { 1043 for (i=0; i<imark; i++) { 1044 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1045 } 1046 } else { 1047 for (i=0; i<nzB; i++) { 1048 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1049 else break; 1050 } 1051 imark = i; 1052 } 1053 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1054 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1055 } 1056 } else { 1057 if (idx) *idx = 0; 1058 if (v) *v = 0; 1059 } 1060 } 1061 *nz = nztot; 1062 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1063 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1069 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1070 { 1071 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1072 1073 PetscFunctionBegin; 1074 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1075 baij->getrowactive = PETSC_FALSE; 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1081 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1082 { 1083 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1084 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1085 1086 PetscFunctionBegin; 1087 aA->getrow_utriangular = PETSC_TRUE; 1088 PetscFunctionReturn(0); 1089 } 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1092 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1093 { 1094 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1095 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1096 1097 PetscFunctionBegin; 1098 aA->getrow_utriangular = PETSC_FALSE; 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1104 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1105 { 1106 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1111 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1117 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1118 { 1119 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1120 PetscErrorCode ierr; 1121 1122 PetscFunctionBegin; 1123 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1124 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1130 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1131 { 1132 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 1135 PetscFunctionBegin; 1136 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1137 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1143 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1144 { 1145 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1146 Mat A = a->A,B = a->B; 1147 PetscErrorCode ierr; 1148 PetscReal isend[5],irecv[5]; 1149 1150 PetscFunctionBegin; 1151 info->block_size = (PetscReal)matin->rmap->bs; 1152 1153 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1154 1155 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1156 isend[3] = info->memory; isend[4] = info->mallocs; 1157 1158 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1159 1160 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1161 isend[3] += info->memory; isend[4] += info->mallocs; 1162 if (flag == MAT_LOCAL) { 1163 info->nz_used = isend[0]; 1164 info->nz_allocated = isend[1]; 1165 info->nz_unneeded = isend[2]; 1166 info->memory = isend[3]; 1167 info->mallocs = isend[4]; 1168 } else if (flag == MAT_GLOBAL_MAX) { 1169 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1170 1171 info->nz_used = irecv[0]; 1172 info->nz_allocated = irecv[1]; 1173 info->nz_unneeded = irecv[2]; 1174 info->memory = irecv[3]; 1175 info->mallocs = irecv[4]; 1176 } else if (flag == MAT_GLOBAL_SUM) { 1177 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1178 1179 info->nz_used = irecv[0]; 1180 info->nz_allocated = irecv[1]; 1181 info->nz_unneeded = irecv[2]; 1182 info->memory = irecv[3]; 1183 info->mallocs = irecv[4]; 1184 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1185 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1186 info->fill_ratio_needed = 0; 1187 info->factor_mallocs = 0; 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1193 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1194 { 1195 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1196 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 switch (op) { 1201 case MAT_NEW_NONZERO_LOCATIONS: 1202 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1203 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1204 case MAT_KEEP_NONZERO_PATTERN: 1205 case MAT_NEW_NONZERO_LOCATION_ERR: 1206 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1207 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1208 break; 1209 case MAT_ROW_ORIENTED: 1210 a->roworiented = flg; 1211 1212 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1213 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1214 break; 1215 case MAT_NEW_DIAGONALS: 1216 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1217 break; 1218 case MAT_IGNORE_OFF_PROC_ENTRIES: 1219 a->donotstash = flg; 1220 break; 1221 case MAT_USE_HASH_TABLE: 1222 a->ht_flag = flg; 1223 break; 1224 case MAT_HERMITIAN: 1225 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1226 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1227 1228 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1229 break; 1230 case MAT_SPD: 1231 A->spd_set = PETSC_TRUE; 1232 A->spd = flg; 1233 if (flg) { 1234 A->symmetric = PETSC_TRUE; 1235 A->structurally_symmetric = PETSC_TRUE; 1236 A->symmetric_set = PETSC_TRUE; 1237 A->structurally_symmetric_set = PETSC_TRUE; 1238 } 1239 break; 1240 case MAT_SYMMETRIC: 1241 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1242 break; 1243 case MAT_STRUCTURALLY_SYMMETRIC: 1244 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1245 break; 1246 case MAT_SYMMETRY_ETERNAL: 1247 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1248 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1249 break; 1250 case MAT_IGNORE_LOWER_TRIANGULAR: 1251 aA->ignore_ltriangular = flg; 1252 break; 1253 case MAT_ERROR_LOWER_TRIANGULAR: 1254 aA->ignore_ltriangular = flg; 1255 break; 1256 case MAT_GETROW_UPPERTRIANGULAR: 1257 aA->getrow_utriangular = flg; 1258 break; 1259 default: 1260 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1261 } 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1267 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1268 { 1269 PetscErrorCode ierr; 1270 1271 PetscFunctionBegin; 1272 if (MAT_INITIAL_MATRIX || *B != A) { 1273 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1274 } 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1280 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1281 { 1282 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1283 Mat a = baij->A, b=baij->B; 1284 PetscErrorCode ierr; 1285 PetscInt nv,m,n; 1286 PetscBool flg; 1287 1288 PetscFunctionBegin; 1289 if (ll != rr) { 1290 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1291 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1292 } 1293 if (!ll) PetscFunctionReturn(0); 1294 1295 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1296 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1297 1298 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1299 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1300 1301 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1302 1303 /* left diagonalscale the off-diagonal part */ 1304 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1305 1306 /* scale the diagonal part */ 1307 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1308 1309 /* right diagonalscale the off-diagonal part */ 1310 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1311 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1317 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1318 { 1319 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatEqual_MPISBAIJ" 1331 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1332 { 1333 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1334 Mat a,b,c,d; 1335 PetscBool flg; 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 a = matA->A; b = matA->B; 1340 c = matB->A; d = matB->B; 1341 1342 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1343 if (flg) { 1344 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1345 } 1346 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatCopy_MPISBAIJ" 1352 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1353 { 1354 PetscErrorCode ierr; 1355 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1356 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1357 1358 PetscFunctionBegin; 1359 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1360 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1361 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1362 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1363 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1364 } else { 1365 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1366 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1367 } 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1373 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1374 { 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 1382 #undef __FUNCT__ 1383 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1384 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1385 { 1386 PetscErrorCode ierr; 1387 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1388 PetscBLASInt bnz,one=1; 1389 Mat_SeqSBAIJ *xa,*ya; 1390 Mat_SeqBAIJ *xb,*yb; 1391 1392 PetscFunctionBegin; 1393 if (str == SAME_NONZERO_PATTERN) { 1394 PetscScalar alpha = a; 1395 xa = (Mat_SeqSBAIJ*)xx->A->data; 1396 ya = (Mat_SeqSBAIJ*)yy->A->data; 1397 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1398 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1399 xb = (Mat_SeqBAIJ*)xx->B->data; 1400 yb = (Mat_SeqBAIJ*)yy->B->data; 1401 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1402 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1403 } else { 1404 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1405 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1406 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1413 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1414 { 1415 PetscErrorCode ierr; 1416 PetscInt i; 1417 PetscBool flg; 1418 1419 PetscFunctionBegin; 1420 for (i=0; i<n; i++) { 1421 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1422 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1423 } 1424 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 1429 /* -------------------------------------------------------------------*/ 1430 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1431 MatGetRow_MPISBAIJ, 1432 MatRestoreRow_MPISBAIJ, 1433 MatMult_MPISBAIJ, 1434 /* 4*/ MatMultAdd_MPISBAIJ, 1435 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1436 MatMultAdd_MPISBAIJ, 1437 0, 1438 0, 1439 0, 1440 /* 10*/ 0, 1441 0, 1442 0, 1443 MatSOR_MPISBAIJ, 1444 MatTranspose_MPISBAIJ, 1445 /* 15*/ MatGetInfo_MPISBAIJ, 1446 MatEqual_MPISBAIJ, 1447 MatGetDiagonal_MPISBAIJ, 1448 MatDiagonalScale_MPISBAIJ, 1449 MatNorm_MPISBAIJ, 1450 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1451 MatAssemblyEnd_MPISBAIJ, 1452 MatSetOption_MPISBAIJ, 1453 MatZeroEntries_MPISBAIJ, 1454 /* 24*/ 0, 1455 0, 1456 0, 1457 0, 1458 0, 1459 /* 29*/ MatSetUp_MPISBAIJ, 1460 0, 1461 0, 1462 0, 1463 0, 1464 /* 34*/ MatDuplicate_MPISBAIJ, 1465 0, 1466 0, 1467 0, 1468 0, 1469 /* 39*/ MatAXPY_MPISBAIJ, 1470 MatGetSubMatrices_MPISBAIJ, 1471 MatIncreaseOverlap_MPISBAIJ, 1472 MatGetValues_MPISBAIJ, 1473 MatCopy_MPISBAIJ, 1474 /* 44*/ 0, 1475 MatScale_MPISBAIJ, 1476 0, 1477 0, 1478 0, 1479 /* 49*/ 0, 1480 0, 1481 0, 1482 0, 1483 0, 1484 /* 54*/ 0, 1485 0, 1486 MatSetUnfactored_MPISBAIJ, 1487 0, 1488 MatSetValuesBlocked_MPISBAIJ, 1489 /* 59*/ 0, 1490 0, 1491 0, 1492 0, 1493 0, 1494 /* 64*/ 0, 1495 0, 1496 0, 1497 0, 1498 0, 1499 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1500 0, 1501 0, 1502 0, 1503 0, 1504 /* 74*/ 0, 1505 0, 1506 0, 1507 0, 1508 0, 1509 /* 79*/ 0, 1510 0, 1511 0, 1512 0, 1513 MatLoad_MPISBAIJ, 1514 /* 84*/ 0, 1515 0, 1516 0, 1517 0, 1518 0, 1519 /* 89*/ 0, 1520 0, 1521 0, 1522 0, 1523 0, 1524 /* 94*/ 0, 1525 0, 1526 0, 1527 0, 1528 0, 1529 /* 99*/ 0, 1530 0, 1531 0, 1532 0, 1533 0, 1534 /*104*/ 0, 1535 MatRealPart_MPISBAIJ, 1536 MatImaginaryPart_MPISBAIJ, 1537 MatGetRowUpperTriangular_MPISBAIJ, 1538 MatRestoreRowUpperTriangular_MPISBAIJ, 1539 /*109*/ 0, 1540 0, 1541 0, 1542 0, 1543 0, 1544 /*114*/ 0, 1545 0, 1546 0, 1547 0, 1548 0, 1549 /*119*/ 0, 1550 0, 1551 0, 1552 0, 1553 0, 1554 /*124*/ 0, 1555 0, 1556 0, 1557 0, 1558 0, 1559 /*129*/ 0, 1560 0, 1561 0, 1562 0, 1563 0, 1564 /*134*/ 0, 1565 0, 1566 0, 1567 0, 1568 0, 1569 /*139*/ 0, 1570 0, 1571 0 1572 }; 1573 1574 1575 #undef __FUNCT__ 1576 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1577 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1578 { 1579 PetscFunctionBegin; 1580 *a = ((Mat_MPISBAIJ*)A->data)->A; 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1586 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1587 { 1588 Mat_MPISBAIJ *b; 1589 PetscErrorCode ierr; 1590 PetscInt i,mbs,Mbs; 1591 1592 PetscFunctionBegin; 1593 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1594 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1595 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1596 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1597 1598 b = (Mat_MPISBAIJ*)B->data; 1599 mbs = B->rmap->n/bs; 1600 Mbs = B->rmap->N/bs; 1601 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1602 1603 B->rmap->bs = bs; 1604 b->bs2 = bs*bs; 1605 b->mbs = mbs; 1606 b->nbs = mbs; 1607 b->Mbs = Mbs; 1608 b->Nbs = Mbs; 1609 1610 for (i=0; i<=b->size; i++) { 1611 b->rangebs[i] = B->rmap->range[i]/bs; 1612 } 1613 b->rstartbs = B->rmap->rstart/bs; 1614 b->rendbs = B->rmap->rend/bs; 1615 1616 b->cstartbs = B->cmap->rstart/bs; 1617 b->cendbs = B->cmap->rend/bs; 1618 1619 if (!B->preallocated) { 1620 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1621 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1622 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1623 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1624 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1625 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1626 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1627 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1628 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1629 } 1630 1631 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1632 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1633 1634 B->preallocated = PETSC_TRUE; 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1640 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1641 { 1642 PetscInt m,rstart,cstart,cend; 1643 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1644 const PetscInt *JJ =0; 1645 PetscScalar *values=0; 1646 PetscErrorCode ierr; 1647 1648 PetscFunctionBegin; 1649 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1650 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1651 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1652 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1653 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1654 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1655 m = B->rmap->n/bs; 1656 rstart = B->rmap->rstart/bs; 1657 cstart = B->cmap->rstart/bs; 1658 cend = B->cmap->rend/bs; 1659 1660 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1661 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1662 for (i=0; i<m; i++) { 1663 nz = ii[i+1] - ii[i]; 1664 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1665 nz_max = PetscMax(nz_max,nz); 1666 JJ = jj + ii[i]; 1667 for (j=0; j<nz; j++) { 1668 if (*JJ >= cstart) break; 1669 JJ++; 1670 } 1671 d = 0; 1672 for (; j<nz; j++) { 1673 if (*JJ++ >= cend) break; 1674 d++; 1675 } 1676 d_nnz[i] = d; 1677 o_nnz[i] = nz - d; 1678 } 1679 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1680 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1681 1682 values = (PetscScalar*)V; 1683 if (!values) { 1684 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 1685 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1686 } 1687 for (i=0; i<m; i++) { 1688 PetscInt row = i + rstart; 1689 PetscInt ncols = ii[i+1] - ii[i]; 1690 const PetscInt *icols = jj + ii[i]; 1691 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1692 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1693 } 1694 1695 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1696 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1697 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1698 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #if defined(PETSC_HAVE_MUMPS) 1703 PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1704 #endif 1705 #if defined(PETSC_HAVE_PASTIX) 1706 PETSC_EXTERN PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1707 #endif 1708 1709 /*MC 1710 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1711 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1712 the matrix is stored. 1713 1714 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1715 can call MatSetOption(Mat, MAT_HERMITIAN); 1716 1717 Options Database Keys: 1718 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1719 1720 Level: beginner 1721 1722 .seealso: MatCreateMPISBAIJ 1723 M*/ 1724 1725 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatCreate_MPISBAIJ" 1729 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1730 { 1731 Mat_MPISBAIJ *b; 1732 PetscErrorCode ierr; 1733 PetscBool flg; 1734 1735 PetscFunctionBegin; 1736 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1737 B->data = (void*)b; 1738 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1739 1740 B->ops->destroy = MatDestroy_MPISBAIJ; 1741 B->ops->view = MatView_MPISBAIJ; 1742 B->assembled = PETSC_FALSE; 1743 B->insertmode = NOT_SET_VALUES; 1744 1745 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 1746 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 1747 1748 /* build local table of row and column ownerships */ 1749 ierr = PetscMalloc1((b->size+2),&b->rangebs);CHKERRQ(ierr); 1750 1751 /* build cache for off array entries formed */ 1752 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 1753 1754 b->donotstash = PETSC_FALSE; 1755 b->colmap = NULL; 1756 b->garray = NULL; 1757 b->roworiented = PETSC_TRUE; 1758 1759 /* stuff used in block assembly */ 1760 b->barray = 0; 1761 1762 /* stuff used for matrix vector multiply */ 1763 b->lvec = 0; 1764 b->Mvctx = 0; 1765 b->slvec0 = 0; 1766 b->slvec0b = 0; 1767 b->slvec1 = 0; 1768 b->slvec1a = 0; 1769 b->slvec1b = 0; 1770 b->sMvctx = 0; 1771 1772 /* stuff for MatGetRow() */ 1773 b->rowindices = 0; 1774 b->rowvalues = 0; 1775 b->getrowactive = PETSC_FALSE; 1776 1777 /* hash table stuff */ 1778 b->ht = 0; 1779 b->hd = 0; 1780 b->ht_size = 0; 1781 b->ht_flag = PETSC_FALSE; 1782 b->ht_fact = 0; 1783 b->ht_total_ct = 0; 1784 b->ht_insert_ct = 0; 1785 1786 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1787 b->ijonly = PETSC_FALSE; 1788 1789 b->in_loc = 0; 1790 b->v_loc = 0; 1791 b->n_loc = 0; 1792 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1793 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 1794 if (flg) { 1795 PetscReal fact = 1.39; 1796 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1797 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 1798 if (fact <= 1.0) fact = 1.39; 1799 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1800 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1801 } 1802 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1803 1804 #if defined(PETSC_HAVE_PASTIX) 1805 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1806 #endif 1807 #if defined(PETSC_HAVE_MUMPS) 1808 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1809 #endif 1810 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1811 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1812 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1813 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1814 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1815 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1816 1817 B->symmetric = PETSC_TRUE; 1818 B->structurally_symmetric = PETSC_TRUE; 1819 B->symmetric_set = PETSC_TRUE; 1820 B->structurally_symmetric_set = PETSC_TRUE; 1821 1822 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 /*MC 1827 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1828 1829 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1830 and MATMPISBAIJ otherwise. 1831 1832 Options Database Keys: 1833 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1834 1835 Level: beginner 1836 1837 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1838 M*/ 1839 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1842 /*@C 1843 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1844 the user should preallocate the matrix storage by setting the parameters 1845 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1846 performance can be increased by more than a factor of 50. 1847 1848 Collective on Mat 1849 1850 Input Parameters: 1851 + A - the matrix 1852 . bs - size of blockk 1853 . d_nz - number of block nonzeros per block row in diagonal portion of local 1854 submatrix (same for all local rows) 1855 . d_nnz - array containing the number of block nonzeros in the various block rows 1856 in the upper triangular and diagonal part of the in diagonal portion of the local 1857 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 1858 for the diagonal entry and set a value even if it is zero. 1859 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1860 submatrix (same for all local rows). 1861 - o_nnz - array containing the number of nonzeros in the various block rows of the 1862 off-diagonal portion of the local submatrix that is right of the diagonal 1863 (possibly different for each block row) or NULL. 1864 1865 1866 Options Database Keys: 1867 . -mat_no_unroll - uses code that does not unroll the loops in the 1868 block calculations (much slower) 1869 . -mat_block_size - size of the blocks to use 1870 1871 Notes: 1872 1873 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1874 than it must be used on all processors that share the object for that argument. 1875 1876 If the *_nnz parameter is given then the *_nz parameter is ignored 1877 1878 Storage Information: 1879 For a square global matrix we define each processor's diagonal portion 1880 to be its local rows and the corresponding columns (a square submatrix); 1881 each processor's off-diagonal portion encompasses the remainder of the 1882 local matrix (a rectangular submatrix). 1883 1884 The user can specify preallocated storage for the diagonal part of 1885 the local submatrix with either d_nz or d_nnz (not both). Set 1886 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 1887 memory allocation. Likewise, specify preallocated storage for the 1888 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1889 1890 You can call MatGetInfo() to get information on how effective the preallocation was; 1891 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1892 You can also run with the option -info and look for messages with the string 1893 malloc in them to see if additional memory allocation was needed. 1894 1895 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1896 the figure below we depict these three local rows and all columns (0-11). 1897 1898 .vb 1899 0 1 2 3 4 5 6 7 8 9 10 11 1900 -------------------------- 1901 row 3 |. . . d d d o o o o o o 1902 row 4 |. . . d d d o o o o o o 1903 row 5 |. . . d d d o o o o o o 1904 -------------------------- 1905 .ve 1906 1907 Thus, any entries in the d locations are stored in the d (diagonal) 1908 submatrix, and any entries in the o locations are stored in the 1909 o (off-diagonal) submatrix. Note that the d matrix is stored in 1910 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1911 1912 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1913 plus the diagonal part of the d matrix, 1914 and o_nz should indicate the number of block nonzeros per row in the o matrix 1915 1916 In general, for PDE problems in which most nonzeros are near the diagonal, 1917 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1918 or you will get TERRIBLE performance; see the users' manual chapter on 1919 matrices. 1920 1921 Level: intermediate 1922 1923 .keywords: matrix, block, aij, compressed row, sparse, parallel 1924 1925 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 1926 @*/ 1927 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1928 { 1929 PetscErrorCode ierr; 1930 1931 PetscFunctionBegin; 1932 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1933 PetscValidType(B,1); 1934 PetscValidLogicalCollectiveInt(B,bs,2); 1935 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatCreateSBAIJ" 1941 /*@C 1942 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1943 (block compressed row). For good matrix assembly performance 1944 the user should preallocate the matrix storage by setting the parameters 1945 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1946 performance can be increased by more than a factor of 50. 1947 1948 Collective on MPI_Comm 1949 1950 Input Parameters: 1951 + comm - MPI communicator 1952 . bs - size of blockk 1953 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1954 This value should be the same as the local size used in creating the 1955 y vector for the matrix-vector product y = Ax. 1956 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1957 This value should be the same as the local size used in creating the 1958 x vector for the matrix-vector product y = Ax. 1959 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1960 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1961 . d_nz - number of block nonzeros per block row in diagonal portion of local 1962 submatrix (same for all local rows) 1963 . d_nnz - array containing the number of block nonzeros in the various block rows 1964 in the upper triangular portion of the in diagonal portion of the local 1965 (possibly different for each block block row) or NULL. 1966 If you plan to factor the matrix you must leave room for the diagonal entry and 1967 set its value even if it is zero. 1968 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1969 submatrix (same for all local rows). 1970 - o_nnz - array containing the number of nonzeros in the various block rows of the 1971 off-diagonal portion of the local submatrix (possibly different for 1972 each block row) or NULL. 1973 1974 Output Parameter: 1975 . A - the matrix 1976 1977 Options Database Keys: 1978 . -mat_no_unroll - uses code that does not unroll the loops in the 1979 block calculations (much slower) 1980 . -mat_block_size - size of the blocks to use 1981 . -mat_mpi - use the parallel matrix data structures even on one processor 1982 (defaults to using SeqBAIJ format on one processor) 1983 1984 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1985 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1986 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1987 1988 Notes: 1989 The number of rows and columns must be divisible by blocksize. 1990 This matrix type does not support complex Hermitian operation. 1991 1992 The user MUST specify either the local or global matrix dimensions 1993 (possibly both). 1994 1995 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1996 than it must be used on all processors that share the object for that argument. 1997 1998 If the *_nnz parameter is given then the *_nz parameter is ignored 1999 2000 Storage Information: 2001 For a square global matrix we define each processor's diagonal portion 2002 to be its local rows and the corresponding columns (a square submatrix); 2003 each processor's off-diagonal portion encompasses the remainder of the 2004 local matrix (a rectangular submatrix). 2005 2006 The user can specify preallocated storage for the diagonal part of 2007 the local submatrix with either d_nz or d_nnz (not both). Set 2008 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2009 memory allocation. Likewise, specify preallocated storage for the 2010 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2011 2012 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2013 the figure below we depict these three local rows and all columns (0-11). 2014 2015 .vb 2016 0 1 2 3 4 5 6 7 8 9 10 11 2017 -------------------------- 2018 row 3 |. . . d d d o o o o o o 2019 row 4 |. . . d d d o o o o o o 2020 row 5 |. . . d d d o o o o o o 2021 -------------------------- 2022 .ve 2023 2024 Thus, any entries in the d locations are stored in the d (diagonal) 2025 submatrix, and any entries in the o locations are stored in the 2026 o (off-diagonal) submatrix. Note that the d matrix is stored in 2027 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2028 2029 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2030 plus the diagonal part of the d matrix, 2031 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2032 In general, for PDE problems in which most nonzeros are near the diagonal, 2033 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2034 or you will get TERRIBLE performance; see the users' manual chapter on 2035 matrices. 2036 2037 Level: intermediate 2038 2039 .keywords: matrix, block, aij, compressed row, sparse, parallel 2040 2041 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2042 @*/ 2043 2044 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2045 { 2046 PetscErrorCode ierr; 2047 PetscMPIInt size; 2048 2049 PetscFunctionBegin; 2050 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2051 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2052 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2053 if (size > 1) { 2054 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2055 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2056 } else { 2057 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2058 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2059 } 2060 PetscFunctionReturn(0); 2061 } 2062 2063 2064 #undef __FUNCT__ 2065 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2066 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2067 { 2068 Mat mat; 2069 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2070 PetscErrorCode ierr; 2071 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2072 PetscScalar *array; 2073 2074 PetscFunctionBegin; 2075 *newmat = 0; 2076 2077 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2078 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2079 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2080 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2081 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2082 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2083 2084 mat->factortype = matin->factortype; 2085 mat->preallocated = PETSC_TRUE; 2086 mat->assembled = PETSC_TRUE; 2087 mat->insertmode = NOT_SET_VALUES; 2088 2089 a = (Mat_MPISBAIJ*)mat->data; 2090 a->bs2 = oldmat->bs2; 2091 a->mbs = oldmat->mbs; 2092 a->nbs = oldmat->nbs; 2093 a->Mbs = oldmat->Mbs; 2094 a->Nbs = oldmat->Nbs; 2095 2096 2097 a->size = oldmat->size; 2098 a->rank = oldmat->rank; 2099 a->donotstash = oldmat->donotstash; 2100 a->roworiented = oldmat->roworiented; 2101 a->rowindices = 0; 2102 a->rowvalues = 0; 2103 a->getrowactive = PETSC_FALSE; 2104 a->barray = 0; 2105 a->rstartbs = oldmat->rstartbs; 2106 a->rendbs = oldmat->rendbs; 2107 a->cstartbs = oldmat->cstartbs; 2108 a->cendbs = oldmat->cendbs; 2109 2110 /* hash table stuff */ 2111 a->ht = 0; 2112 a->hd = 0; 2113 a->ht_size = 0; 2114 a->ht_flag = oldmat->ht_flag; 2115 a->ht_fact = oldmat->ht_fact; 2116 a->ht_total_ct = 0; 2117 a->ht_insert_ct = 0; 2118 2119 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2120 if (oldmat->colmap) { 2121 #if defined(PETSC_USE_CTABLE) 2122 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2123 #else 2124 ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr); 2125 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2126 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2127 #endif 2128 } else a->colmap = 0; 2129 2130 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2131 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2132 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2133 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2134 } else a->garray = 0; 2135 2136 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2137 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2138 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2139 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2140 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2141 2142 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2143 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2144 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2145 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2146 2147 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2148 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2149 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2150 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2151 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2152 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2153 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2154 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2155 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2156 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2157 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2158 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2159 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2160 2161 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2162 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2163 a->sMvctx = oldmat->sMvctx; 2164 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2165 2166 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2167 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2168 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2169 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2170 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2171 *newmat = mat; 2172 PetscFunctionReturn(0); 2173 } 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "MatLoad_MPISBAIJ" 2177 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2178 { 2179 PetscErrorCode ierr; 2180 PetscInt i,nz,j,rstart,rend; 2181 PetscScalar *vals,*buf; 2182 MPI_Comm comm; 2183 MPI_Status status; 2184 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2185 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2186 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2187 PetscInt bs =1,Mbs,mbs,extra_rows; 2188 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2189 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2190 int fd; 2191 2192 PetscFunctionBegin; 2193 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2194 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2195 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2196 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2197 2198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2199 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2200 if (!rank) { 2201 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2202 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2203 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2204 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2205 } 2206 2207 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2208 2209 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2210 M = header[1]; 2211 N = header[2]; 2212 2213 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2214 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2215 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2216 2217 /* If global sizes are set, check if they are consistent with that given in the file */ 2218 if (sizesset) { 2219 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2220 } 2221 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2222 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2223 2224 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2225 2226 /* 2227 This code adds extra rows to make sure the number of rows is 2228 divisible by the blocksize 2229 */ 2230 Mbs = M/bs; 2231 extra_rows = bs - M + bs*(Mbs); 2232 if (extra_rows == bs) extra_rows = 0; 2233 else Mbs++; 2234 if (extra_rows &&!rank) { 2235 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2236 } 2237 2238 /* determine ownership of all rows */ 2239 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2240 mbs = Mbs/size + ((Mbs % size) > rank); 2241 m = mbs*bs; 2242 } else { /* User Set */ 2243 m = newmat->rmap->n; 2244 mbs = m/bs; 2245 } 2246 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2247 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2248 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2249 rowners[0] = 0; 2250 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2251 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2252 rstart = rowners[rank]; 2253 rend = rowners[rank+1]; 2254 2255 /* distribute row lengths to all processors */ 2256 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2257 if (!rank) { 2258 ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr); 2259 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2260 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2261 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2262 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2263 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2264 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2265 } else { 2266 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2267 } 2268 2269 if (!rank) { /* procs[0] */ 2270 /* calculate the number of nonzeros on each processor */ 2271 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2272 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2273 for (i=0; i<size; i++) { 2274 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2275 procsnz[i] += rowlengths[j]; 2276 } 2277 } 2278 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2279 2280 /* determine max buffer needed and allocate it */ 2281 maxnz = 0; 2282 for (i=0; i<size; i++) { 2283 maxnz = PetscMax(maxnz,procsnz[i]); 2284 } 2285 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2286 2287 /* read in my part of the matrix column indices */ 2288 nz = procsnz[0]; 2289 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2290 mycols = ibuf; 2291 if (size == 1) nz -= extra_rows; 2292 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2293 if (size == 1) { 2294 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2295 } 2296 2297 /* read in every ones (except the last) and ship off */ 2298 for (i=1; i<size-1; i++) { 2299 nz = procsnz[i]; 2300 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2301 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2302 } 2303 /* read in the stuff for the last proc */ 2304 if (size != 1) { 2305 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2306 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2307 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2308 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2309 } 2310 ierr = PetscFree(cols);CHKERRQ(ierr); 2311 } else { /* procs[i], i>0 */ 2312 /* determine buffer space needed for message */ 2313 nz = 0; 2314 for (i=0; i<m; i++) nz += locrowlens[i]; 2315 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2316 mycols = ibuf; 2317 /* receive message of column indices*/ 2318 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2319 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2320 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2321 } 2322 2323 /* loop over local rows, determining number of off diagonal entries */ 2324 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2325 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2326 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2327 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2328 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2329 rowcount = 0; 2330 nzcount = 0; 2331 for (i=0; i<mbs; i++) { 2332 dcount = 0; 2333 odcount = 0; 2334 for (j=0; j<bs; j++) { 2335 kmax = locrowlens[rowcount]; 2336 for (k=0; k<kmax; k++) { 2337 tmp = mycols[nzcount++]/bs; /* block col. index */ 2338 if (!mask[tmp]) { 2339 mask[tmp] = 1; 2340 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2341 else masked1[dcount++] = tmp; /* entry in diag portion */ 2342 } 2343 } 2344 rowcount++; 2345 } 2346 2347 dlens[i] = dcount; /* d_nzz[i] */ 2348 odlens[i] = odcount; /* o_nzz[i] */ 2349 2350 /* zero out the mask elements we set */ 2351 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2352 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2353 } 2354 if (!sizesset) { 2355 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2356 } 2357 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2358 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2359 2360 if (!rank) { 2361 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2362 /* read in my part of the matrix numerical values */ 2363 nz = procsnz[0]; 2364 vals = buf; 2365 mycols = ibuf; 2366 if (size == 1) nz -= extra_rows; 2367 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2368 if (size == 1) { 2369 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2370 } 2371 2372 /* insert into matrix */ 2373 jj = rstart*bs; 2374 for (i=0; i<m; i++) { 2375 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2376 mycols += locrowlens[i]; 2377 vals += locrowlens[i]; 2378 jj++; 2379 } 2380 2381 /* read in other processors (except the last one) and ship out */ 2382 for (i=1; i<size-1; i++) { 2383 nz = procsnz[i]; 2384 vals = buf; 2385 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2386 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2387 } 2388 /* the last proc */ 2389 if (size != 1) { 2390 nz = procsnz[i] - extra_rows; 2391 vals = buf; 2392 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2393 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2394 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2395 } 2396 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2397 2398 } else { 2399 /* receive numeric values */ 2400 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2401 2402 /* receive message of values*/ 2403 vals = buf; 2404 mycols = ibuf; 2405 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2406 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2407 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2408 2409 /* insert into matrix */ 2410 jj = rstart*bs; 2411 for (i=0; i<m; i++) { 2412 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2413 mycols += locrowlens[i]; 2414 vals += locrowlens[i]; 2415 jj++; 2416 } 2417 } 2418 2419 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2420 ierr = PetscFree(buf);CHKERRQ(ierr); 2421 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2422 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2423 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2424 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2425 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2426 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2432 /*XXXXX@ 2433 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2434 2435 Input Parameters: 2436 . mat - the matrix 2437 . fact - factor 2438 2439 Not Collective on Mat, each process can have a different hash factor 2440 2441 Level: advanced 2442 2443 Notes: 2444 This can also be set by the command line option: -mat_use_hash_table fact 2445 2446 .keywords: matrix, hashtable, factor, HT 2447 2448 .seealso: MatSetOption() 2449 @XXXXX*/ 2450 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2454 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2455 { 2456 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2457 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2458 PetscReal atmp; 2459 PetscReal *work,*svalues,*rvalues; 2460 PetscErrorCode ierr; 2461 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2462 PetscMPIInt rank,size; 2463 PetscInt *rowners_bs,dest,count,source; 2464 PetscScalar *va; 2465 MatScalar *ba; 2466 MPI_Status stat; 2467 2468 PetscFunctionBegin; 2469 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2470 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2471 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2472 2473 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2474 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2475 2476 bs = A->rmap->bs; 2477 mbs = a->mbs; 2478 Mbs = a->Mbs; 2479 ba = b->a; 2480 bi = b->i; 2481 bj = b->j; 2482 2483 /* find ownerships */ 2484 rowners_bs = A->rmap->range; 2485 2486 /* each proc creates an array to be distributed */ 2487 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2488 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2489 2490 /* row_max for B */ 2491 if (rank != size-1) { 2492 for (i=0; i<mbs; i++) { 2493 ncols = bi[1] - bi[0]; bi++; 2494 brow = bs*i; 2495 for (j=0; j<ncols; j++) { 2496 bcol = bs*(*bj); 2497 for (kcol=0; kcol<bs; kcol++) { 2498 col = bcol + kcol; /* local col index */ 2499 col += rowners_bs[rank+1]; /* global col index */ 2500 for (krow=0; krow<bs; krow++) { 2501 atmp = PetscAbsScalar(*ba); ba++; 2502 row = brow + krow; /* local row index */ 2503 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2504 if (work[col] < atmp) work[col] = atmp; 2505 } 2506 } 2507 bj++; 2508 } 2509 } 2510 2511 /* send values to its owners */ 2512 for (dest=rank+1; dest<size; dest++) { 2513 svalues = work + rowners_bs[dest]; 2514 count = rowners_bs[dest+1]-rowners_bs[dest]; 2515 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2516 } 2517 } 2518 2519 /* receive values */ 2520 if (rank) { 2521 rvalues = work; 2522 count = rowners_bs[rank+1]-rowners_bs[rank]; 2523 for (source=0; source<rank; source++) { 2524 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2525 /* process values */ 2526 for (i=0; i<count; i++) { 2527 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2528 } 2529 } 2530 } 2531 2532 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2533 ierr = PetscFree(work);CHKERRQ(ierr); 2534 PetscFunctionReturn(0); 2535 } 2536 2537 #undef __FUNCT__ 2538 #define __FUNCT__ "MatSOR_MPISBAIJ" 2539 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2540 { 2541 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2542 PetscErrorCode ierr; 2543 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2544 PetscScalar *x,*ptr,*from; 2545 Vec bb1; 2546 const PetscScalar *b; 2547 2548 PetscFunctionBegin; 2549 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2550 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2551 2552 if (flag == SOR_APPLY_UPPER) { 2553 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2558 if (flag & SOR_ZERO_INITIAL_GUESS) { 2559 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2560 its--; 2561 } 2562 2563 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2564 while (its--) { 2565 2566 /* lower triangular part: slvec0b = - B^T*xx */ 2567 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2568 2569 /* copy xx into slvec0a */ 2570 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2571 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2572 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2573 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2574 2575 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2576 2577 /* copy bb into slvec1a */ 2578 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2579 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2580 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2581 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2582 2583 /* set slvec1b = 0 */ 2584 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2585 2586 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2587 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2588 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2589 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2590 2591 /* upper triangular part: bb1 = bb1 - B*x */ 2592 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2593 2594 /* local diagonal sweep */ 2595 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2596 } 2597 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2598 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2599 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2600 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2601 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2602 } else if (flag & SOR_EISENSTAT) { 2603 Vec xx1; 2604 PetscBool hasop; 2605 const PetscScalar *diag; 2606 PetscScalar *sl,scale = (omega - 2.0)/omega; 2607 PetscInt i,n; 2608 2609 if (!mat->xx1) { 2610 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2611 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2612 } 2613 xx1 = mat->xx1; 2614 bb1 = mat->bb1; 2615 2616 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2617 2618 if (!mat->diag) { 2619 /* this is wrong for same matrix with new nonzero values */ 2620 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2621 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2622 } 2623 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2624 2625 if (hasop) { 2626 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2627 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2628 } else { 2629 /* 2630 These two lines are replaced by code that may be a bit faster for a good compiler 2631 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2632 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2633 */ 2634 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2635 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2636 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2637 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2638 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2639 if (omega == 1.0) { 2640 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2641 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2642 } else { 2643 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2644 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2645 } 2646 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2647 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2648 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2649 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2650 } 2651 2652 /* multiply off-diagonal portion of matrix */ 2653 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2654 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2655 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2656 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2657 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2658 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2659 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2660 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2661 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2662 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2663 2664 /* local sweep */ 2665 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2666 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2667 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2668 PetscFunctionReturn(0); 2669 } 2670 2671 #undef __FUNCT__ 2672 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2673 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2674 { 2675 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2676 PetscErrorCode ierr; 2677 Vec lvec1,bb1; 2678 2679 PetscFunctionBegin; 2680 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2681 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2682 2683 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2684 if (flag & SOR_ZERO_INITIAL_GUESS) { 2685 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2686 its--; 2687 } 2688 2689 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2690 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2691 while (its--) { 2692 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2693 2694 /* lower diagonal part: bb1 = bb - B^T*xx */ 2695 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2696 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2697 2698 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2699 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2700 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2701 2702 /* upper diagonal part: bb1 = bb1 - B*x */ 2703 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2704 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2705 2706 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2707 2708 /* diagonal sweep */ 2709 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2710 } 2711 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2712 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2713 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2714 PetscFunctionReturn(0); 2715 } 2716 2717 #undef __FUNCT__ 2718 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2719 /*@ 2720 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2721 CSR format the local rows. 2722 2723 Collective on MPI_Comm 2724 2725 Input Parameters: 2726 + comm - MPI communicator 2727 . bs - the block size, only a block size of 1 is supported 2728 . m - number of local rows (Cannot be PETSC_DECIDE) 2729 . n - This value should be the same as the local size used in creating the 2730 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2731 calculated if N is given) For square matrices n is almost always m. 2732 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2733 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2734 . i - row indices 2735 . j - column indices 2736 - a - matrix values 2737 2738 Output Parameter: 2739 . mat - the matrix 2740 2741 Level: intermediate 2742 2743 Notes: 2744 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2745 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2746 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2747 2748 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2749 2750 .keywords: matrix, aij, compressed row, sparse, parallel 2751 2752 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2753 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2754 @*/ 2755 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2756 { 2757 PetscErrorCode ierr; 2758 2759 2760 PetscFunctionBegin; 2761 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2762 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2763 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2764 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2765 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2766 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2767 PetscFunctionReturn(0); 2768 } 2769 2770 2771 #undef __FUNCT__ 2772 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2773 /*@C 2774 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2775 (the default parallel PETSc format). 2776 2777 Collective on MPI_Comm 2778 2779 Input Parameters: 2780 + A - the matrix 2781 . bs - the block size 2782 . i - the indices into j for the start of each local row (starts with zero) 2783 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2784 - v - optional values in the matrix 2785 2786 Level: developer 2787 2788 .keywords: matrix, aij, compressed row, sparse, parallel 2789 2790 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2791 @*/ 2792 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2793 { 2794 PetscErrorCode ierr; 2795 2796 PetscFunctionBegin; 2797 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2798 PetscFunctionReturn(0); 2799 } 2800 2801 2802