1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 #if defined(PETSC_HAVE_ELEMENTAL) 23 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 24 #endif 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 28 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 29 { 30 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 31 PetscErrorCode ierr; 32 33 PetscFunctionBegin; 34 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 35 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 36 PetscFunctionReturn(0); 37 } 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 53 { \ 54 \ 55 brow = row/bs; \ 56 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 57 rmax = aimax[brow]; nrow = ailen[brow]; \ 58 bcol = col/bs; \ 59 ridx = row % bs; cidx = col % bs; \ 60 low = 0; high = nrow; \ 61 while (high-low > 3) { \ 62 t = (low+high)/2; \ 63 if (rp[t] > bcol) high = t; \ 64 else low = t; \ 65 } \ 66 for (_i=low; _i<high; _i++) { \ 67 if (rp[_i] > bcol) break; \ 68 if (rp[_i] == bcol) { \ 69 bap = ap + bs2*_i + bs*cidx + ridx; \ 70 if (addv == ADD_VALUES) *bap += value; \ 71 else *bap = value; \ 72 goto a_noinsert; \ 73 } \ 74 } \ 75 if (a->nonew == 1) goto a_noinsert; \ 76 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 77 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 78 N = nrow++ - 1; \ 79 /* shift up all the later entries in this row */ \ 80 for (ii=N; ii>=_i; ii--) { \ 81 rp[ii+1] = rp[ii]; \ 82 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 83 } \ 84 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 85 rp[_i] = bcol; \ 86 ap[bs2*_i + bs*cidx + ridx] = value; \ 87 A->nonzerostate++;\ 88 a_noinsert:; \ 89 ailen[brow] = nrow; \ 90 } 91 92 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 93 { \ 94 brow = row/bs; \ 95 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 96 rmax = bimax[brow]; nrow = bilen[brow]; \ 97 bcol = col/bs; \ 98 ridx = row % bs; cidx = col % bs; \ 99 low = 0; high = nrow; \ 100 while (high-low > 3) { \ 101 t = (low+high)/2; \ 102 if (rp[t] > bcol) high = t; \ 103 else low = t; \ 104 } \ 105 for (_i=low; _i<high; _i++) { \ 106 if (rp[_i] > bcol) break; \ 107 if (rp[_i] == bcol) { \ 108 bap = ap + bs2*_i + bs*cidx + ridx; \ 109 if (addv == ADD_VALUES) *bap += value; \ 110 else *bap = value; \ 111 goto b_noinsert; \ 112 } \ 113 } \ 114 if (b->nonew == 1) goto b_noinsert; \ 115 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 116 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 117 N = nrow++ - 1; \ 118 /* shift up all the later entries in this row */ \ 119 for (ii=N; ii>=_i; ii--) { \ 120 rp[ii+1] = rp[ii]; \ 121 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 122 } \ 123 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 124 rp[_i] = bcol; \ 125 ap[bs2*_i + bs*cidx + ridx] = value; \ 126 B->nonzerostate++;\ 127 b_noinsert:; \ 128 bilen[brow] = nrow; \ 129 } 130 131 /* Only add/insert a(i,j) with i<=j (blocks). 132 Any a(i,j) with i>j input by user is ingored. 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetValues_MPISBAIJ" 136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 137 { 138 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 139 MatScalar value; 140 PetscBool roworiented = baij->roworiented; 141 PetscErrorCode ierr; 142 PetscInt i,j,row,col; 143 PetscInt rstart_orig=mat->rmap->rstart; 144 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 145 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 146 147 /* Some Variables required in the macro */ 148 Mat A = baij->A; 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 150 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 151 MatScalar *aa =a->a; 152 153 Mat B = baij->B; 154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 155 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 156 MatScalar *ba =b->a; 157 158 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 159 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 160 MatScalar *ap,*bap; 161 162 /* for stash */ 163 PetscInt n_loc, *in_loc = NULL; 164 MatScalar *v_loc = NULL; 165 166 PetscFunctionBegin; 167 if (!baij->donotstash) { 168 if (n > baij->n_loc) { 169 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 170 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 171 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 172 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 173 174 baij->n_loc = n; 175 } 176 in_loc = baij->in_loc; 177 v_loc = baij->v_loc; 178 } 179 180 for (i=0; i<m; i++) { 181 if (im[i] < 0) continue; 182 #if defined(PETSC_USE_DEBUG) 183 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 184 #endif 185 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 186 row = im[i] - rstart_orig; /* local row index */ 187 for (j=0; j<n; j++) { 188 if (im[i]/bs > in[j]/bs) { 189 if (a->ignore_ltriangular) { 190 continue; /* ignore lower triangular blocks */ 191 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 192 } 193 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 194 col = in[j] - cstart_orig; /* local col index */ 195 brow = row/bs; bcol = col/bs; 196 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 197 if (roworiented) value = v[i*n+j]; 198 else value = v[i+j*m]; 199 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 200 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 201 } else if (in[j] < 0) continue; 202 #if defined(PETSC_USE_DEBUG) 203 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 204 #endif 205 else { /* off-diag entry (B) */ 206 if (mat->was_assembled) { 207 if (!baij->colmap) { 208 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 209 } 210 #if defined(PETSC_USE_CTABLE) 211 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 212 col = col - 1; 213 #else 214 col = baij->colmap[in[j]/bs] - 1; 215 #endif 216 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 217 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 218 col = in[j]; 219 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 220 B = baij->B; 221 b = (Mat_SeqBAIJ*)(B)->data; 222 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 223 ba = b->a; 224 } else col += in[j]%bs; 225 } else col = in[j]; 226 if (roworiented) value = v[i*n+j]; 227 else value = v[i+j*m]; 228 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } 231 } 232 } else { /* off processor entry */ 233 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 234 if (!baij->donotstash) { 235 mat->assembled = PETSC_FALSE; 236 n_loc = 0; 237 for (j=0; j<n; j++) { 238 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 239 in_loc[n_loc] = in[j]; 240 if (roworiented) { 241 v_loc[n_loc] = v[i*n+j]; 242 } else { 243 v_loc[n_loc] = v[j*m+i]; 244 } 245 n_loc++; 246 } 247 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 248 } 249 } 250 } 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 257 { 258 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 259 const MatScalar *value; 260 MatScalar *barray =baij->barray; 261 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 262 PetscErrorCode ierr; 263 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 264 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 265 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 266 267 PetscFunctionBegin; 268 if (!barray) { 269 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 270 baij->barray = barray; 271 } 272 273 if (roworiented) { 274 stepval = (n-1)*bs; 275 } else { 276 stepval = (m-1)*bs; 277 } 278 for (i=0; i<m; i++) { 279 if (im[i] < 0) continue; 280 #if defined(PETSC_USE_DEBUG) 281 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 282 #endif 283 if (im[i] >= rstart && im[i] < rend) { 284 row = im[i] - rstart; 285 for (j=0; j<n; j++) { 286 if (im[i] > in[j]) { 287 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 288 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 289 } 290 /* If NumCol = 1 then a copy is not required */ 291 if ((roworiented) && (n == 1)) { 292 barray = (MatScalar*) v + i*bs2; 293 } else if ((!roworiented) && (m == 1)) { 294 barray = (MatScalar*) v + j*bs2; 295 } else { /* Here a copy is required */ 296 if (roworiented) { 297 value = v + i*(stepval+bs)*bs + j*bs; 298 } else { 299 value = v + j*(stepval+bs)*bs + i*bs; 300 } 301 for (ii=0; ii<bs; ii++,value+=stepval) { 302 for (jj=0; jj<bs; jj++) { 303 *barray++ = *value++; 304 } 305 } 306 barray -=bs2; 307 } 308 309 if (in[j] >= cstart && in[j] < cend) { 310 col = in[j] - cstart; 311 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 312 } else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined(PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined(PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 348 if (!baij->donotstash) { 349 if (roworiented) { 350 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 351 } else { 352 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 353 } 354 } 355 } 356 } 357 PetscFunctionReturn(0); 358 } 359 360 #undef __FUNCT__ 361 #define __FUNCT__ "MatGetValues_MPISBAIJ" 362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 363 { 364 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 365 PetscErrorCode ierr; 366 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 367 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 368 369 PetscFunctionBegin; 370 for (i=0; i<m; i++) { 371 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 372 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 373 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 374 row = idxm[i] - bsrstart; 375 for (j=0; j<n; j++) { 376 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 377 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 378 if (idxn[j] >= bscstart && idxn[j] < bscend) { 379 col = idxn[j] - bscstart; 380 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 381 } else { 382 if (!baij->colmap) { 383 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 384 } 385 #if defined(PETSC_USE_CTABLE) 386 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 387 data--; 388 #else 389 data = baij->colmap[idxn[j]/bs]-1; 390 #endif 391 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 392 else { 393 col = data + idxn[j]%bs; 394 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 395 } 396 } 397 } 398 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 399 } 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatNorm_MPISBAIJ" 405 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 406 { 407 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscReal sum[2],*lnorm2; 410 411 PetscFunctionBegin; 412 if (baij->size == 1) { 413 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 414 } else { 415 if (type == NORM_FROBENIUS) { 416 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 417 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 418 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 419 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 420 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 421 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 422 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 423 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 424 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 425 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 426 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 427 PetscReal *rsum,*rsum2,vabs; 428 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 429 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 430 MatScalar *v; 431 432 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 433 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 434 /* Amat */ 435 v = amat->a; jj = amat->j; 436 for (brow=0; brow<mbs; brow++) { 437 grow = bs*(rstart + brow); 438 nz = amat->i[brow+1] - amat->i[brow]; 439 for (bcol=0; bcol<nz; bcol++) { 440 gcol = bs*(rstart + *jj); jj++; 441 for (col=0; col<bs; col++) { 442 for (row=0; row<bs; row++) { 443 vabs = PetscAbsScalar(*v); v++; 444 rsum[gcol+col] += vabs; 445 /* non-diagonal block */ 446 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 447 } 448 } 449 } 450 } 451 /* Bmat */ 452 v = bmat->a; jj = bmat->j; 453 for (brow=0; brow<mbs; brow++) { 454 grow = bs*(rstart + brow); 455 nz = bmat->i[brow+1] - bmat->i[brow]; 456 for (bcol=0; bcol<nz; bcol++) { 457 gcol = bs*garray[*jj]; jj++; 458 for (col=0; col<bs; col++) { 459 for (row=0; row<bs; row++) { 460 vabs = PetscAbsScalar(*v); v++; 461 rsum[gcol+col] += vabs; 462 rsum[grow+row] += vabs; 463 } 464 } 465 } 466 } 467 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 468 *norm = 0.0; 469 for (col=0; col<mat->cmap->N; col++) { 470 if (rsum2[col] > *norm) *norm = rsum2[col]; 471 } 472 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 473 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 474 } 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 480 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 481 { 482 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 483 PetscErrorCode ierr; 484 PetscInt nstash,reallocs; 485 InsertMode addv; 486 487 PetscFunctionBegin; 488 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 489 490 /* make sure all processors are either in INSERTMODE or ADDMODE */ 491 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 492 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 493 mat->insertmode = addv; /* in case this processor had no cache */ 494 495 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 496 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 497 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 498 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 499 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 500 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 506 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 507 { 508 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 509 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 510 PetscErrorCode ierr; 511 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 512 PetscInt *row,*col; 513 PetscBool other_disassembled; 514 PetscMPIInt n; 515 PetscBool r1,r2,r3; 516 MatScalar *val; 517 InsertMode addv = mat->insertmode; 518 519 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 520 PetscFunctionBegin; 521 if (!baij->donotstash && !mat->nooffprocentries) { 522 while (1) { 523 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 524 if (!flg) break; 525 526 for (i=0; i<n;) { 527 /* Now identify the consecutive vals belonging to the same row */ 528 for (j=i,rstart=row[j]; j<n; j++) { 529 if (row[j] != rstart) break; 530 } 531 if (j < n) ncols = j-i; 532 else ncols = n-i; 533 /* Now assemble all these values with a single function call */ 534 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 535 i = j; 536 } 537 } 538 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 539 /* Now process the block-stash. Since the values are stashed column-oriented, 540 set the roworiented flag to column oriented, and after MatSetValues() 541 restore the original flags */ 542 r1 = baij->roworiented; 543 r2 = a->roworiented; 544 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 545 546 baij->roworiented = PETSC_FALSE; 547 a->roworiented = PETSC_FALSE; 548 549 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 550 while (1) { 551 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 552 if (!flg) break; 553 554 for (i=0; i<n;) { 555 /* Now identify the consecutive vals belonging to the same row */ 556 for (j=i,rstart=row[j]; j<n; j++) { 557 if (row[j] != rstart) break; 558 } 559 if (j < n) ncols = j-i; 560 else ncols = n-i; 561 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 562 i = j; 563 } 564 } 565 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 566 567 baij->roworiented = r1; 568 a->roworiented = r2; 569 570 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 571 } 572 573 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 575 576 /* determine if any processor has disassembled, if so we must 577 also disassemble ourselfs, in order that we may reassemble. */ 578 /* 579 if nonzero structure of submatrix B cannot change then we know that 580 no processor disassembled thus we can skip this stuff 581 */ 582 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 583 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 584 if (mat->was_assembled && !other_disassembled) { 585 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 586 } 587 } 588 589 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 590 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 591 } 592 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 593 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 594 595 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 596 597 baij->rowvalues = 0; 598 599 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 600 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 601 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 602 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 608 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 609 #include <petscdraw.h> 610 #undef __FUNCT__ 611 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 612 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 613 { 614 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 615 PetscErrorCode ierr; 616 PetscInt bs = mat->rmap->bs; 617 PetscMPIInt rank = baij->rank; 618 PetscBool iascii,isdraw; 619 PetscViewer sviewer; 620 PetscViewerFormat format; 621 622 PetscFunctionBegin; 623 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 624 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 625 if (iascii) { 626 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 627 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 628 MatInfo info; 629 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 630 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 631 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 632 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 633 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 634 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 635 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 636 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 637 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 638 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 639 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 640 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } else if (format == PETSC_VIEWER_ASCII_INFO) { 643 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 646 PetscFunctionReturn(0); 647 } 648 } 649 650 if (isdraw) { 651 PetscDraw draw; 652 PetscBool isnull; 653 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 654 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 655 } 656 657 { 658 /* assemble the entire matrix onto first processor. */ 659 Mat A; 660 Mat_SeqSBAIJ *Aloc; 661 Mat_SeqBAIJ *Bloc; 662 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 663 MatScalar *a; 664 const char *matname; 665 666 /* Should this be the same type as mat? */ 667 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 668 if (!rank) { 669 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 670 } else { 671 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 672 } 673 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 674 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 675 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 676 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 677 678 /* copy over the A part */ 679 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 680 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 681 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 682 683 for (i=0; i<mbs; i++) { 684 rvals[0] = bs*(baij->rstartbs + i); 685 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 686 for (j=ai[i]; j<ai[i+1]; j++) { 687 col = (baij->cstartbs+aj[j])*bs; 688 for (k=0; k<bs; k++) { 689 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 690 col++; 691 a += bs; 692 } 693 } 694 } 695 /* copy over the B part */ 696 Bloc = (Mat_SeqBAIJ*)baij->B->data; 697 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 698 for (i=0; i<mbs; i++) { 699 700 rvals[0] = bs*(baij->rstartbs + i); 701 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 702 for (j=ai[i]; j<ai[i+1]; j++) { 703 col = baij->garray[aj[j]]*bs; 704 for (k=0; k<bs; k++) { 705 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 706 col++; 707 a += bs; 708 } 709 } 710 } 711 ierr = PetscFree(rvals);CHKERRQ(ierr); 712 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 713 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 714 /* 715 Everyone has to call to draw the matrix since the graphics waits are 716 synchronized across all processors that share the PetscDraw object 717 */ 718 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 719 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 720 if (!rank) { 721 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 722 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 723 } 724 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 725 ierr = MatDestroy(&A);CHKERRQ(ierr); 726 } 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatView_MPISBAIJ" 732 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 733 { 734 PetscErrorCode ierr; 735 PetscBool iascii,isdraw,issocket,isbinary; 736 737 PetscFunctionBegin; 738 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 739 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 740 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 741 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 742 if (iascii || isdraw || issocket || isbinary) { 743 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 744 } 745 PetscFunctionReturn(0); 746 } 747 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatDestroy_MPISBAIJ" 750 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 751 { 752 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 753 PetscErrorCode ierr; 754 755 PetscFunctionBegin; 756 #if defined(PETSC_USE_LOG) 757 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 758 #endif 759 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 760 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 761 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 762 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 763 #if defined(PETSC_USE_CTABLE) 764 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 765 #else 766 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 767 #endif 768 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 769 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 770 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 771 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 772 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 773 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 774 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 775 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 776 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 777 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 778 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 779 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 780 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 781 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 782 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 783 #if defined(PETSC_USE_REAL_MAT_SINGLE) 784 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 785 #endif 786 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 787 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 788 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 789 ierr = PetscFree(mat->data);CHKERRQ(ierr); 790 791 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 792 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 793 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 794 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 795 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 796 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 797 #if defined(PETSC_HAVE_ELEMENTAL) 798 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 799 #endif 800 PetscFunctionReturn(0); 801 } 802 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 805 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 806 { 807 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 808 PetscErrorCode ierr; 809 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 810 PetscScalar *x,*from; 811 812 PetscFunctionBegin; 813 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 814 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 815 816 /* diagonal part */ 817 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 818 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 819 820 /* subdiagonal part */ 821 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 822 823 /* copy x into the vec slvec0 */ 824 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 825 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 826 827 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 828 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 829 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 830 831 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 832 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 833 /* supperdiagonal part */ 834 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatMult_MPISBAIJ" 840 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 841 { 842 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 843 PetscErrorCode ierr; 844 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 845 PetscScalar *from; 846 const PetscScalar *x; 847 848 PetscFunctionBegin; 849 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 850 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 851 852 /* diagonal part */ 853 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 854 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 855 856 /* subdiagonal part */ 857 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 858 859 /* copy x into the vec slvec0 */ 860 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 861 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 862 863 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 864 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 865 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 866 867 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 /* supperdiagonal part */ 870 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 876 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 877 { 878 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 879 PetscErrorCode ierr; 880 PetscInt nt; 881 882 PetscFunctionBegin; 883 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 884 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 885 886 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 887 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 888 889 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 /* do diagonal part */ 891 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 892 /* do supperdiagonal part */ 893 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 895 /* do subdiagonal part */ 896 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 897 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 898 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 904 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 905 { 906 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 907 PetscErrorCode ierr; 908 PetscInt mbs=a->mbs,bs=A->rmap->bs; 909 PetscScalar *from,zero=0.0; 910 const PetscScalar *x; 911 912 PetscFunctionBegin; 913 /* 914 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 915 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 916 */ 917 /* diagonal part */ 918 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 919 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 920 921 /* subdiagonal part */ 922 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 923 924 /* copy x into the vec slvec0 */ 925 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 926 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 927 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 928 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 929 930 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 932 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 933 934 /* supperdiagonal part */ 935 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 941 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 942 { 943 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 944 PetscErrorCode ierr; 945 946 PetscFunctionBegin; 947 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 /* do diagonal part */ 949 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 950 /* do supperdiagonal part */ 951 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 952 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 953 954 /* do subdiagonal part */ 955 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 956 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 957 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 /* 962 This only works correctly for square matrices where the subblock A->A is the 963 diagonal block 964 */ 965 #undef __FUNCT__ 966 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 967 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 968 { 969 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 970 PetscErrorCode ierr; 971 972 PetscFunctionBegin; 973 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 974 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatScale_MPISBAIJ" 980 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 981 { 982 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 987 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatGetRow_MPISBAIJ" 993 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 994 { 995 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 996 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 997 PetscErrorCode ierr; 998 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 999 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1000 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1001 1002 PetscFunctionBegin; 1003 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1004 mat->getrowactive = PETSC_TRUE; 1005 1006 if (!mat->rowvalues && (idx || v)) { 1007 /* 1008 allocate enough space to hold information from the longest row. 1009 */ 1010 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1011 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1012 PetscInt max = 1,mbs = mat->mbs,tmp; 1013 for (i=0; i<mbs; i++) { 1014 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1015 if (max < tmp) max = tmp; 1016 } 1017 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1018 } 1019 1020 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1021 lrow = row - brstart; /* local row index */ 1022 1023 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1024 if (!v) {pvA = 0; pvB = 0;} 1025 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1026 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1027 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1028 nztot = nzA + nzB; 1029 1030 cmap = mat->garray; 1031 if (v || idx) { 1032 if (nztot) { 1033 /* Sort by increasing column numbers, assuming A and B already sorted */ 1034 PetscInt imark = -1; 1035 if (v) { 1036 *v = v_p = mat->rowvalues; 1037 for (i=0; i<nzB; i++) { 1038 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1039 else break; 1040 } 1041 imark = i; 1042 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1043 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1044 } 1045 if (idx) { 1046 *idx = idx_p = mat->rowindices; 1047 if (imark > -1) { 1048 for (i=0; i<imark; i++) { 1049 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1050 } 1051 } else { 1052 for (i=0; i<nzB; i++) { 1053 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1054 else break; 1055 } 1056 imark = i; 1057 } 1058 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1059 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1060 } 1061 } else { 1062 if (idx) *idx = 0; 1063 if (v) *v = 0; 1064 } 1065 } 1066 *nz = nztot; 1067 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1068 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1074 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1075 { 1076 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1077 1078 PetscFunctionBegin; 1079 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1080 baij->getrowactive = PETSC_FALSE; 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1086 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1087 { 1088 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1089 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1090 1091 PetscFunctionBegin; 1092 aA->getrow_utriangular = PETSC_TRUE; 1093 PetscFunctionReturn(0); 1094 } 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1097 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1098 { 1099 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1100 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1101 1102 PetscFunctionBegin; 1103 aA->getrow_utriangular = PETSC_FALSE; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1109 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1110 { 1111 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 1114 PetscFunctionBegin; 1115 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1116 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1122 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1123 { 1124 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1125 PetscErrorCode ierr; 1126 1127 PetscFunctionBegin; 1128 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1129 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1135 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1136 { 1137 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1138 PetscErrorCode ierr; 1139 1140 PetscFunctionBegin; 1141 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1142 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1148 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1149 { 1150 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1151 Mat A = a->A,B = a->B; 1152 PetscErrorCode ierr; 1153 PetscReal isend[5],irecv[5]; 1154 1155 PetscFunctionBegin; 1156 info->block_size = (PetscReal)matin->rmap->bs; 1157 1158 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1159 1160 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1161 isend[3] = info->memory; isend[4] = info->mallocs; 1162 1163 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1164 1165 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1166 isend[3] += info->memory; isend[4] += info->mallocs; 1167 if (flag == MAT_LOCAL) { 1168 info->nz_used = isend[0]; 1169 info->nz_allocated = isend[1]; 1170 info->nz_unneeded = isend[2]; 1171 info->memory = isend[3]; 1172 info->mallocs = isend[4]; 1173 } else if (flag == MAT_GLOBAL_MAX) { 1174 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1175 1176 info->nz_used = irecv[0]; 1177 info->nz_allocated = irecv[1]; 1178 info->nz_unneeded = irecv[2]; 1179 info->memory = irecv[3]; 1180 info->mallocs = irecv[4]; 1181 } else if (flag == MAT_GLOBAL_SUM) { 1182 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1183 1184 info->nz_used = irecv[0]; 1185 info->nz_allocated = irecv[1]; 1186 info->nz_unneeded = irecv[2]; 1187 info->memory = irecv[3]; 1188 info->mallocs = irecv[4]; 1189 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1190 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1191 info->fill_ratio_needed = 0; 1192 info->factor_mallocs = 0; 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1198 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1199 { 1200 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1201 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 switch (op) { 1206 case MAT_NEW_NONZERO_LOCATIONS: 1207 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1208 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1209 case MAT_KEEP_NONZERO_PATTERN: 1210 case MAT_NEW_NONZERO_LOCATION_ERR: 1211 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1212 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1213 break; 1214 case MAT_ROW_ORIENTED: 1215 a->roworiented = flg; 1216 1217 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1218 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1219 break; 1220 case MAT_NEW_DIAGONALS: 1221 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1222 break; 1223 case MAT_IGNORE_OFF_PROC_ENTRIES: 1224 a->donotstash = flg; 1225 break; 1226 case MAT_USE_HASH_TABLE: 1227 a->ht_flag = flg; 1228 break; 1229 case MAT_HERMITIAN: 1230 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1231 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1232 1233 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1234 break; 1235 case MAT_SPD: 1236 A->spd_set = PETSC_TRUE; 1237 A->spd = flg; 1238 if (flg) { 1239 A->symmetric = PETSC_TRUE; 1240 A->structurally_symmetric = PETSC_TRUE; 1241 A->symmetric_set = PETSC_TRUE; 1242 A->structurally_symmetric_set = PETSC_TRUE; 1243 } 1244 break; 1245 case MAT_SYMMETRIC: 1246 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1247 break; 1248 case MAT_STRUCTURALLY_SYMMETRIC: 1249 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1250 break; 1251 case MAT_SYMMETRY_ETERNAL: 1252 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1253 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1254 break; 1255 case MAT_IGNORE_LOWER_TRIANGULAR: 1256 aA->ignore_ltriangular = flg; 1257 break; 1258 case MAT_ERROR_LOWER_TRIANGULAR: 1259 aA->ignore_ltriangular = flg; 1260 break; 1261 case MAT_GETROW_UPPERTRIANGULAR: 1262 aA->getrow_utriangular = flg; 1263 break; 1264 default: 1265 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1266 } 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1272 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1273 { 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 if (MAT_INITIAL_MATRIX || *B != A) { 1278 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1279 } 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1285 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1286 { 1287 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1288 Mat a = baij->A, b=baij->B; 1289 PetscErrorCode ierr; 1290 PetscInt nv,m,n; 1291 PetscBool flg; 1292 1293 PetscFunctionBegin; 1294 if (ll != rr) { 1295 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1296 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1297 } 1298 if (!ll) PetscFunctionReturn(0); 1299 1300 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1301 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1302 1303 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1304 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1305 1306 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1307 1308 /* left diagonalscale the off-diagonal part */ 1309 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1310 1311 /* scale the diagonal part */ 1312 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1313 1314 /* right diagonalscale the off-diagonal part */ 1315 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1316 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1322 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1323 { 1324 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1333 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatEqual_MPISBAIJ" 1336 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1337 { 1338 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1339 Mat a,b,c,d; 1340 PetscBool flg; 1341 PetscErrorCode ierr; 1342 1343 PetscFunctionBegin; 1344 a = matA->A; b = matA->B; 1345 c = matB->A; d = matB->B; 1346 1347 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1348 if (flg) { 1349 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1350 } 1351 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatCopy_MPISBAIJ" 1357 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1358 { 1359 PetscErrorCode ierr; 1360 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1361 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1362 1363 PetscFunctionBegin; 1364 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1365 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1366 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1367 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1368 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1369 } else { 1370 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1371 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1372 } 1373 PetscFunctionReturn(0); 1374 } 1375 1376 #undef __FUNCT__ 1377 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1378 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1379 { 1380 PetscErrorCode ierr; 1381 1382 PetscFunctionBegin; 1383 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1389 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1390 { 1391 PetscErrorCode ierr; 1392 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1393 PetscBLASInt bnz,one=1; 1394 Mat_SeqSBAIJ *xa,*ya; 1395 Mat_SeqBAIJ *xb,*yb; 1396 1397 PetscFunctionBegin; 1398 if (str == SAME_NONZERO_PATTERN) { 1399 PetscScalar alpha = a; 1400 xa = (Mat_SeqSBAIJ*)xx->A->data; 1401 ya = (Mat_SeqSBAIJ*)yy->A->data; 1402 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1403 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1404 xb = (Mat_SeqBAIJ*)xx->B->data; 1405 yb = (Mat_SeqBAIJ*)yy->B->data; 1406 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1407 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1408 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1409 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1410 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1411 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1412 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1413 } else { 1414 Mat B; 1415 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1416 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1417 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1418 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1419 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1420 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1421 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1422 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1423 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1424 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1425 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1426 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1427 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1428 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1429 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1430 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1431 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1432 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1433 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1434 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1435 } 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1441 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1442 { 1443 PetscErrorCode ierr; 1444 PetscInt i; 1445 PetscBool flg; 1446 1447 PetscFunctionBegin; 1448 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1449 for (i=0; i<n; i++) { 1450 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1451 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1452 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1453 } 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 /* -------------------------------------------------------------------*/ 1459 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1460 MatGetRow_MPISBAIJ, 1461 MatRestoreRow_MPISBAIJ, 1462 MatMult_MPISBAIJ, 1463 /* 4*/ MatMultAdd_MPISBAIJ, 1464 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1465 MatMultAdd_MPISBAIJ, 1466 0, 1467 0, 1468 0, 1469 /* 10*/ 0, 1470 0, 1471 0, 1472 MatSOR_MPISBAIJ, 1473 MatTranspose_MPISBAIJ, 1474 /* 15*/ MatGetInfo_MPISBAIJ, 1475 MatEqual_MPISBAIJ, 1476 MatGetDiagonal_MPISBAIJ, 1477 MatDiagonalScale_MPISBAIJ, 1478 MatNorm_MPISBAIJ, 1479 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1480 MatAssemblyEnd_MPISBAIJ, 1481 MatSetOption_MPISBAIJ, 1482 MatZeroEntries_MPISBAIJ, 1483 /* 24*/ 0, 1484 0, 1485 0, 1486 0, 1487 0, 1488 /* 29*/ MatSetUp_MPISBAIJ, 1489 0, 1490 0, 1491 0, 1492 0, 1493 /* 34*/ MatDuplicate_MPISBAIJ, 1494 0, 1495 0, 1496 0, 1497 0, 1498 /* 39*/ MatAXPY_MPISBAIJ, 1499 MatGetSubMatrices_MPISBAIJ, 1500 MatIncreaseOverlap_MPISBAIJ, 1501 MatGetValues_MPISBAIJ, 1502 MatCopy_MPISBAIJ, 1503 /* 44*/ 0, 1504 MatScale_MPISBAIJ, 1505 0, 1506 0, 1507 0, 1508 /* 49*/ 0, 1509 0, 1510 0, 1511 0, 1512 0, 1513 /* 54*/ 0, 1514 0, 1515 MatSetUnfactored_MPISBAIJ, 1516 0, 1517 MatSetValuesBlocked_MPISBAIJ, 1518 /* 59*/ 0, 1519 0, 1520 0, 1521 0, 1522 0, 1523 /* 64*/ 0, 1524 0, 1525 0, 1526 0, 1527 0, 1528 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1529 0, 1530 0, 1531 0, 1532 0, 1533 /* 74*/ 0, 1534 0, 1535 0, 1536 0, 1537 0, 1538 /* 79*/ 0, 1539 0, 1540 0, 1541 0, 1542 MatLoad_MPISBAIJ, 1543 /* 84*/ 0, 1544 0, 1545 0, 1546 0, 1547 0, 1548 /* 89*/ 0, 1549 0, 1550 0, 1551 0, 1552 0, 1553 /* 94*/ 0, 1554 0, 1555 0, 1556 0, 1557 0, 1558 /* 99*/ 0, 1559 0, 1560 0, 1561 0, 1562 0, 1563 /*104*/ 0, 1564 MatRealPart_MPISBAIJ, 1565 MatImaginaryPart_MPISBAIJ, 1566 MatGetRowUpperTriangular_MPISBAIJ, 1567 MatRestoreRowUpperTriangular_MPISBAIJ, 1568 /*109*/ 0, 1569 0, 1570 0, 1571 0, 1572 0, 1573 /*114*/ 0, 1574 0, 1575 0, 1576 0, 1577 0, 1578 /*119*/ 0, 1579 0, 1580 0, 1581 0, 1582 0, 1583 /*124*/ 0, 1584 0, 1585 0, 1586 0, 1587 0, 1588 /*129*/ 0, 1589 0, 1590 0, 1591 0, 1592 0, 1593 /*134*/ 0, 1594 0, 1595 0, 1596 0, 1597 0, 1598 /*139*/ 0, 1599 0, 1600 0, 1601 0, 1602 0, 1603 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1604 }; 1605 1606 #undef __FUNCT__ 1607 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1608 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1609 { 1610 PetscFunctionBegin; 1611 *a = ((Mat_MPISBAIJ*)A->data)->A; 1612 PetscFunctionReturn(0); 1613 } 1614 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1617 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1618 { 1619 Mat_MPISBAIJ *b; 1620 PetscErrorCode ierr; 1621 PetscInt i,mbs,Mbs; 1622 1623 PetscFunctionBegin; 1624 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1625 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1626 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1627 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1628 1629 b = (Mat_MPISBAIJ*)B->data; 1630 mbs = B->rmap->n/bs; 1631 Mbs = B->rmap->N/bs; 1632 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1633 1634 B->rmap->bs = bs; 1635 b->bs2 = bs*bs; 1636 b->mbs = mbs; 1637 b->Mbs = Mbs; 1638 b->nbs = B->cmap->n/bs; 1639 b->Nbs = B->cmap->N/bs; 1640 1641 for (i=0; i<=b->size; i++) { 1642 b->rangebs[i] = B->rmap->range[i]/bs; 1643 } 1644 b->rstartbs = B->rmap->rstart/bs; 1645 b->rendbs = B->rmap->rend/bs; 1646 1647 b->cstartbs = B->cmap->rstart/bs; 1648 b->cendbs = B->cmap->rend/bs; 1649 1650 if (!B->preallocated) { 1651 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1652 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1653 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1654 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1655 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1656 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1657 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1658 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1659 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1660 } 1661 1662 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1663 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1664 1665 B->preallocated = PETSC_TRUE; 1666 PetscFunctionReturn(0); 1667 } 1668 1669 #undef __FUNCT__ 1670 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1671 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1672 { 1673 PetscInt m,rstart,cstart,cend; 1674 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1675 const PetscInt *JJ =0; 1676 PetscScalar *values=0; 1677 PetscErrorCode ierr; 1678 1679 PetscFunctionBegin; 1680 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1681 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1682 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1683 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1684 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1685 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1686 m = B->rmap->n/bs; 1687 rstart = B->rmap->rstart/bs; 1688 cstart = B->cmap->rstart/bs; 1689 cend = B->cmap->rend/bs; 1690 1691 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1692 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1693 for (i=0; i<m; i++) { 1694 nz = ii[i+1] - ii[i]; 1695 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1696 nz_max = PetscMax(nz_max,nz); 1697 JJ = jj + ii[i]; 1698 for (j=0; j<nz; j++) { 1699 if (*JJ >= cstart) break; 1700 JJ++; 1701 } 1702 d = 0; 1703 for (; j<nz; j++) { 1704 if (*JJ++ >= cend) break; 1705 d++; 1706 } 1707 d_nnz[i] = d; 1708 o_nnz[i] = nz - d; 1709 } 1710 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1711 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1712 1713 values = (PetscScalar*)V; 1714 if (!values) { 1715 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 1716 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1717 } 1718 for (i=0; i<m; i++) { 1719 PetscInt row = i + rstart; 1720 PetscInt ncols = ii[i+1] - ii[i]; 1721 const PetscInt *icols = jj + ii[i]; 1722 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1723 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1724 } 1725 1726 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1727 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1728 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1729 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1730 PetscFunctionReturn(0); 1731 } 1732 1733 /*MC 1734 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1735 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1736 the matrix is stored. 1737 1738 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1739 can call MatSetOption(Mat, MAT_HERMITIAN); 1740 1741 Options Database Keys: 1742 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1743 1744 Level: beginner 1745 1746 .seealso: MatCreateMPISBAIJ 1747 M*/ 1748 1749 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 1750 1751 #undef __FUNCT__ 1752 #define __FUNCT__ "MatCreate_MPISBAIJ" 1753 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1754 { 1755 Mat_MPISBAIJ *b; 1756 PetscErrorCode ierr; 1757 PetscBool flg = PETSC_FALSE; 1758 1759 PetscFunctionBegin; 1760 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1761 B->data = (void*)b; 1762 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1763 1764 B->ops->destroy = MatDestroy_MPISBAIJ; 1765 B->ops->view = MatView_MPISBAIJ; 1766 B->assembled = PETSC_FALSE; 1767 B->insertmode = NOT_SET_VALUES; 1768 1769 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 1770 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 1771 1772 /* build local table of row and column ownerships */ 1773 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 1774 1775 /* build cache for off array entries formed */ 1776 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 1777 1778 b->donotstash = PETSC_FALSE; 1779 b->colmap = NULL; 1780 b->garray = NULL; 1781 b->roworiented = PETSC_TRUE; 1782 1783 /* stuff used in block assembly */ 1784 b->barray = 0; 1785 1786 /* stuff used for matrix vector multiply */ 1787 b->lvec = 0; 1788 b->Mvctx = 0; 1789 b->slvec0 = 0; 1790 b->slvec0b = 0; 1791 b->slvec1 = 0; 1792 b->slvec1a = 0; 1793 b->slvec1b = 0; 1794 b->sMvctx = 0; 1795 1796 /* stuff for MatGetRow() */ 1797 b->rowindices = 0; 1798 b->rowvalues = 0; 1799 b->getrowactive = PETSC_FALSE; 1800 1801 /* hash table stuff */ 1802 b->ht = 0; 1803 b->hd = 0; 1804 b->ht_size = 0; 1805 b->ht_flag = PETSC_FALSE; 1806 b->ht_fact = 0; 1807 b->ht_total_ct = 0; 1808 b->ht_insert_ct = 0; 1809 1810 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1811 b->ijonly = PETSC_FALSE; 1812 1813 b->in_loc = 0; 1814 b->v_loc = 0; 1815 b->n_loc = 0; 1816 1817 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1818 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1819 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1820 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1821 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1822 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1823 #if defined(PETSC_HAVE_ELEMENTAL) 1824 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 1825 #endif 1826 1827 B->symmetric = PETSC_TRUE; 1828 B->structurally_symmetric = PETSC_TRUE; 1829 B->symmetric_set = PETSC_TRUE; 1830 B->structurally_symmetric_set = PETSC_TRUE; 1831 1832 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1833 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1834 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 1835 if (flg) { 1836 PetscReal fact = 1.39; 1837 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1838 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 1839 if (fact <= 1.0) fact = 1.39; 1840 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1841 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1842 } 1843 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1844 PetscFunctionReturn(0); 1845 } 1846 1847 /*MC 1848 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1849 1850 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1851 and MATMPISBAIJ otherwise. 1852 1853 Options Database Keys: 1854 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1855 1856 Level: beginner 1857 1858 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1859 M*/ 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1863 /*@C 1864 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1865 the user should preallocate the matrix storage by setting the parameters 1866 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1867 performance can be increased by more than a factor of 50. 1868 1869 Collective on Mat 1870 1871 Input Parameters: 1872 + B - the matrix 1873 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 1874 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 1875 . d_nz - number of block nonzeros per block row in diagonal portion of local 1876 submatrix (same for all local rows) 1877 . d_nnz - array containing the number of block nonzeros in the various block rows 1878 in the upper triangular and diagonal part of the in diagonal portion of the local 1879 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 1880 for the diagonal entry and set a value even if it is zero. 1881 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1882 submatrix (same for all local rows). 1883 - o_nnz - array containing the number of nonzeros in the various block rows of the 1884 off-diagonal portion of the local submatrix that is right of the diagonal 1885 (possibly different for each block row) or NULL. 1886 1887 1888 Options Database Keys: 1889 . -mat_no_unroll - uses code that does not unroll the loops in the 1890 block calculations (much slower) 1891 . -mat_block_size - size of the blocks to use 1892 1893 Notes: 1894 1895 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1896 than it must be used on all processors that share the object for that argument. 1897 1898 If the *_nnz parameter is given then the *_nz parameter is ignored 1899 1900 Storage Information: 1901 For a square global matrix we define each processor's diagonal portion 1902 to be its local rows and the corresponding columns (a square submatrix); 1903 each processor's off-diagonal portion encompasses the remainder of the 1904 local matrix (a rectangular submatrix). 1905 1906 The user can specify preallocated storage for the diagonal part of 1907 the local submatrix with either d_nz or d_nnz (not both). Set 1908 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 1909 memory allocation. Likewise, specify preallocated storage for the 1910 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1911 1912 You can call MatGetInfo() to get information on how effective the preallocation was; 1913 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1914 You can also run with the option -info and look for messages with the string 1915 malloc in them to see if additional memory allocation was needed. 1916 1917 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1918 the figure below we depict these three local rows and all columns (0-11). 1919 1920 .vb 1921 0 1 2 3 4 5 6 7 8 9 10 11 1922 -------------------------- 1923 row 3 |. . . d d d o o o o o o 1924 row 4 |. . . d d d o o o o o o 1925 row 5 |. . . d d d o o o o o o 1926 -------------------------- 1927 .ve 1928 1929 Thus, any entries in the d locations are stored in the d (diagonal) 1930 submatrix, and any entries in the o locations are stored in the 1931 o (off-diagonal) submatrix. Note that the d matrix is stored in 1932 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1933 1934 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1935 plus the diagonal part of the d matrix, 1936 and o_nz should indicate the number of block nonzeros per row in the o matrix 1937 1938 In general, for PDE problems in which most nonzeros are near the diagonal, 1939 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1940 or you will get TERRIBLE performance; see the users' manual chapter on 1941 matrices. 1942 1943 Level: intermediate 1944 1945 .keywords: matrix, block, aij, compressed row, sparse, parallel 1946 1947 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 1948 @*/ 1949 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1950 { 1951 PetscErrorCode ierr; 1952 1953 PetscFunctionBegin; 1954 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1955 PetscValidType(B,1); 1956 PetscValidLogicalCollectiveInt(B,bs,2); 1957 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatCreateSBAIJ" 1963 /*@C 1964 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1965 (block compressed row). For good matrix assembly performance 1966 the user should preallocate the matrix storage by setting the parameters 1967 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1968 performance can be increased by more than a factor of 50. 1969 1970 Collective on MPI_Comm 1971 1972 Input Parameters: 1973 + comm - MPI communicator 1974 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 1975 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 1976 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1977 This value should be the same as the local size used in creating the 1978 y vector for the matrix-vector product y = Ax. 1979 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1980 This value should be the same as the local size used in creating the 1981 x vector for the matrix-vector product y = Ax. 1982 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1983 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1984 . d_nz - number of block nonzeros per block row in diagonal portion of local 1985 submatrix (same for all local rows) 1986 . d_nnz - array containing the number of block nonzeros in the various block rows 1987 in the upper triangular portion of the in diagonal portion of the local 1988 (possibly different for each block block row) or NULL. 1989 If you plan to factor the matrix you must leave room for the diagonal entry and 1990 set its value even if it is zero. 1991 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1992 submatrix (same for all local rows). 1993 - o_nnz - array containing the number of nonzeros in the various block rows of the 1994 off-diagonal portion of the local submatrix (possibly different for 1995 each block row) or NULL. 1996 1997 Output Parameter: 1998 . A - the matrix 1999 2000 Options Database Keys: 2001 . -mat_no_unroll - uses code that does not unroll the loops in the 2002 block calculations (much slower) 2003 . -mat_block_size - size of the blocks to use 2004 . -mat_mpi - use the parallel matrix data structures even on one processor 2005 (defaults to using SeqBAIJ format on one processor) 2006 2007 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2008 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2009 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2010 2011 Notes: 2012 The number of rows and columns must be divisible by blocksize. 2013 This matrix type does not support complex Hermitian operation. 2014 2015 The user MUST specify either the local or global matrix dimensions 2016 (possibly both). 2017 2018 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2019 than it must be used on all processors that share the object for that argument. 2020 2021 If the *_nnz parameter is given then the *_nz parameter is ignored 2022 2023 Storage Information: 2024 For a square global matrix we define each processor's diagonal portion 2025 to be its local rows and the corresponding columns (a square submatrix); 2026 each processor's off-diagonal portion encompasses the remainder of the 2027 local matrix (a rectangular submatrix). 2028 2029 The user can specify preallocated storage for the diagonal part of 2030 the local submatrix with either d_nz or d_nnz (not both). Set 2031 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2032 memory allocation. Likewise, specify preallocated storage for the 2033 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2034 2035 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2036 the figure below we depict these three local rows and all columns (0-11). 2037 2038 .vb 2039 0 1 2 3 4 5 6 7 8 9 10 11 2040 -------------------------- 2041 row 3 |. . . d d d o o o o o o 2042 row 4 |. . . d d d o o o o o o 2043 row 5 |. . . d d d o o o o o o 2044 -------------------------- 2045 .ve 2046 2047 Thus, any entries in the d locations are stored in the d (diagonal) 2048 submatrix, and any entries in the o locations are stored in the 2049 o (off-diagonal) submatrix. Note that the d matrix is stored in 2050 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2051 2052 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2053 plus the diagonal part of the d matrix, 2054 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2055 In general, for PDE problems in which most nonzeros are near the diagonal, 2056 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2057 or you will get TERRIBLE performance; see the users' manual chapter on 2058 matrices. 2059 2060 Level: intermediate 2061 2062 .keywords: matrix, block, aij, compressed row, sparse, parallel 2063 2064 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2065 @*/ 2066 2067 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2068 { 2069 PetscErrorCode ierr; 2070 PetscMPIInt size; 2071 2072 PetscFunctionBegin; 2073 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2074 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2075 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2076 if (size > 1) { 2077 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2078 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2079 } else { 2080 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2081 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2082 } 2083 PetscFunctionReturn(0); 2084 } 2085 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2089 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2090 { 2091 Mat mat; 2092 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2093 PetscErrorCode ierr; 2094 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2095 PetscScalar *array; 2096 2097 PetscFunctionBegin; 2098 *newmat = 0; 2099 2100 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2101 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2102 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2103 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2104 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2105 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2106 2107 mat->factortype = matin->factortype; 2108 mat->preallocated = PETSC_TRUE; 2109 mat->assembled = PETSC_TRUE; 2110 mat->insertmode = NOT_SET_VALUES; 2111 2112 a = (Mat_MPISBAIJ*)mat->data; 2113 a->bs2 = oldmat->bs2; 2114 a->mbs = oldmat->mbs; 2115 a->nbs = oldmat->nbs; 2116 a->Mbs = oldmat->Mbs; 2117 a->Nbs = oldmat->Nbs; 2118 2119 2120 a->size = oldmat->size; 2121 a->rank = oldmat->rank; 2122 a->donotstash = oldmat->donotstash; 2123 a->roworiented = oldmat->roworiented; 2124 a->rowindices = 0; 2125 a->rowvalues = 0; 2126 a->getrowactive = PETSC_FALSE; 2127 a->barray = 0; 2128 a->rstartbs = oldmat->rstartbs; 2129 a->rendbs = oldmat->rendbs; 2130 a->cstartbs = oldmat->cstartbs; 2131 a->cendbs = oldmat->cendbs; 2132 2133 /* hash table stuff */ 2134 a->ht = 0; 2135 a->hd = 0; 2136 a->ht_size = 0; 2137 a->ht_flag = oldmat->ht_flag; 2138 a->ht_fact = oldmat->ht_fact; 2139 a->ht_total_ct = 0; 2140 a->ht_insert_ct = 0; 2141 2142 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2143 if (oldmat->colmap) { 2144 #if defined(PETSC_USE_CTABLE) 2145 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2146 #else 2147 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2148 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2149 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2150 #endif 2151 } else a->colmap = 0; 2152 2153 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2154 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2155 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2156 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2157 } else a->garray = 0; 2158 2159 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2160 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2161 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2162 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2163 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2164 2165 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2166 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2167 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2168 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2169 2170 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2171 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2172 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2173 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2174 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2175 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2176 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2177 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2178 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2179 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2180 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2181 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2182 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2183 2184 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2185 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2186 a->sMvctx = oldmat->sMvctx; 2187 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2188 2189 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2190 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2191 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2192 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2193 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2194 *newmat = mat; 2195 PetscFunctionReturn(0); 2196 } 2197 2198 #undef __FUNCT__ 2199 #define __FUNCT__ "MatLoad_MPISBAIJ" 2200 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2201 { 2202 PetscErrorCode ierr; 2203 PetscInt i,nz,j,rstart,rend; 2204 PetscScalar *vals,*buf; 2205 MPI_Comm comm; 2206 MPI_Status status; 2207 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2208 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2209 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2210 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2211 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2212 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2213 int fd; 2214 2215 PetscFunctionBegin; 2216 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2217 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2218 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2219 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2220 if (bs < 0) bs = 1; 2221 2222 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2223 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2224 if (!rank) { 2225 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2226 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2227 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2228 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2229 } 2230 2231 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2232 2233 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2234 M = header[1]; 2235 N = header[2]; 2236 2237 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2238 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2239 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2240 2241 /* If global sizes are set, check if they are consistent with that given in the file */ 2242 if (sizesset) { 2243 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2244 } 2245 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2246 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2247 2248 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2249 2250 /* 2251 This code adds extra rows to make sure the number of rows is 2252 divisible by the blocksize 2253 */ 2254 Mbs = M/bs; 2255 extra_rows = bs - M + bs*(Mbs); 2256 if (extra_rows == bs) extra_rows = 0; 2257 else Mbs++; 2258 if (extra_rows &&!rank) { 2259 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2260 } 2261 2262 /* determine ownership of all rows */ 2263 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2264 mbs = Mbs/size + ((Mbs % size) > rank); 2265 m = mbs*bs; 2266 } else { /* User Set */ 2267 m = newmat->rmap->n; 2268 mbs = m/bs; 2269 } 2270 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2271 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2272 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2273 rowners[0] = 0; 2274 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2275 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2276 rstart = rowners[rank]; 2277 rend = rowners[rank+1]; 2278 2279 /* distribute row lengths to all processors */ 2280 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2281 if (!rank) { 2282 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2283 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2284 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2285 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2286 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2287 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2288 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2289 } else { 2290 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2291 } 2292 2293 if (!rank) { /* procs[0] */ 2294 /* calculate the number of nonzeros on each processor */ 2295 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2296 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2297 for (i=0; i<size; i++) { 2298 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2299 procsnz[i] += rowlengths[j]; 2300 } 2301 } 2302 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2303 2304 /* determine max buffer needed and allocate it */ 2305 maxnz = 0; 2306 for (i=0; i<size; i++) { 2307 maxnz = PetscMax(maxnz,procsnz[i]); 2308 } 2309 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2310 2311 /* read in my part of the matrix column indices */ 2312 nz = procsnz[0]; 2313 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2314 mycols = ibuf; 2315 if (size == 1) nz -= extra_rows; 2316 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2317 if (size == 1) { 2318 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2319 } 2320 2321 /* read in every ones (except the last) and ship off */ 2322 for (i=1; i<size-1; i++) { 2323 nz = procsnz[i]; 2324 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2325 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2326 } 2327 /* read in the stuff for the last proc */ 2328 if (size != 1) { 2329 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2330 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2331 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2332 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2333 } 2334 ierr = PetscFree(cols);CHKERRQ(ierr); 2335 } else { /* procs[i], i>0 */ 2336 /* determine buffer space needed for message */ 2337 nz = 0; 2338 for (i=0; i<m; i++) nz += locrowlens[i]; 2339 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2340 mycols = ibuf; 2341 /* receive message of column indices*/ 2342 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2343 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2344 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2345 } 2346 2347 /* loop over local rows, determining number of off diagonal entries */ 2348 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2349 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2350 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2351 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2352 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2353 rowcount = 0; 2354 nzcount = 0; 2355 for (i=0; i<mbs; i++) { 2356 dcount = 0; 2357 odcount = 0; 2358 for (j=0; j<bs; j++) { 2359 kmax = locrowlens[rowcount]; 2360 for (k=0; k<kmax; k++) { 2361 tmp = mycols[nzcount++]/bs; /* block col. index */ 2362 if (!mask[tmp]) { 2363 mask[tmp] = 1; 2364 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2365 else masked1[dcount++] = tmp; /* entry in diag portion */ 2366 } 2367 } 2368 rowcount++; 2369 } 2370 2371 dlens[i] = dcount; /* d_nzz[i] */ 2372 odlens[i] = odcount; /* o_nzz[i] */ 2373 2374 /* zero out the mask elements we set */ 2375 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2376 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2377 } 2378 if (!sizesset) { 2379 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2380 } 2381 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2382 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2383 2384 if (!rank) { 2385 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2386 /* read in my part of the matrix numerical values */ 2387 nz = procsnz[0]; 2388 vals = buf; 2389 mycols = ibuf; 2390 if (size == 1) nz -= extra_rows; 2391 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2392 if (size == 1) { 2393 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2394 } 2395 2396 /* insert into matrix */ 2397 jj = rstart*bs; 2398 for (i=0; i<m; i++) { 2399 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2400 mycols += locrowlens[i]; 2401 vals += locrowlens[i]; 2402 jj++; 2403 } 2404 2405 /* read in other processors (except the last one) and ship out */ 2406 for (i=1; i<size-1; i++) { 2407 nz = procsnz[i]; 2408 vals = buf; 2409 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2410 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2411 } 2412 /* the last proc */ 2413 if (size != 1) { 2414 nz = procsnz[i] - extra_rows; 2415 vals = buf; 2416 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2417 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2418 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2419 } 2420 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2421 2422 } else { 2423 /* receive numeric values */ 2424 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2425 2426 /* receive message of values*/ 2427 vals = buf; 2428 mycols = ibuf; 2429 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2430 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2431 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2432 2433 /* insert into matrix */ 2434 jj = rstart*bs; 2435 for (i=0; i<m; i++) { 2436 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2437 mycols += locrowlens[i]; 2438 vals += locrowlens[i]; 2439 jj++; 2440 } 2441 } 2442 2443 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2444 ierr = PetscFree(buf);CHKERRQ(ierr); 2445 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2446 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2447 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2448 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2449 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2450 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2451 PetscFunctionReturn(0); 2452 } 2453 2454 #undef __FUNCT__ 2455 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2456 /*XXXXX@ 2457 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2458 2459 Input Parameters: 2460 . mat - the matrix 2461 . fact - factor 2462 2463 Not Collective on Mat, each process can have a different hash factor 2464 2465 Level: advanced 2466 2467 Notes: 2468 This can also be set by the command line option: -mat_use_hash_table fact 2469 2470 .keywords: matrix, hashtable, factor, HT 2471 2472 .seealso: MatSetOption() 2473 @XXXXX*/ 2474 2475 2476 #undef __FUNCT__ 2477 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2478 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2479 { 2480 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2481 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2482 PetscReal atmp; 2483 PetscReal *work,*svalues,*rvalues; 2484 PetscErrorCode ierr; 2485 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2486 PetscMPIInt rank,size; 2487 PetscInt *rowners_bs,dest,count,source; 2488 PetscScalar *va; 2489 MatScalar *ba; 2490 MPI_Status stat; 2491 2492 PetscFunctionBegin; 2493 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2494 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2495 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2496 2497 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2498 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2499 2500 bs = A->rmap->bs; 2501 mbs = a->mbs; 2502 Mbs = a->Mbs; 2503 ba = b->a; 2504 bi = b->i; 2505 bj = b->j; 2506 2507 /* find ownerships */ 2508 rowners_bs = A->rmap->range; 2509 2510 /* each proc creates an array to be distributed */ 2511 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2512 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2513 2514 /* row_max for B */ 2515 if (rank != size-1) { 2516 for (i=0; i<mbs; i++) { 2517 ncols = bi[1] - bi[0]; bi++; 2518 brow = bs*i; 2519 for (j=0; j<ncols; j++) { 2520 bcol = bs*(*bj); 2521 for (kcol=0; kcol<bs; kcol++) { 2522 col = bcol + kcol; /* local col index */ 2523 col += rowners_bs[rank+1]; /* global col index */ 2524 for (krow=0; krow<bs; krow++) { 2525 atmp = PetscAbsScalar(*ba); ba++; 2526 row = brow + krow; /* local row index */ 2527 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2528 if (work[col] < atmp) work[col] = atmp; 2529 } 2530 } 2531 bj++; 2532 } 2533 } 2534 2535 /* send values to its owners */ 2536 for (dest=rank+1; dest<size; dest++) { 2537 svalues = work + rowners_bs[dest]; 2538 count = rowners_bs[dest+1]-rowners_bs[dest]; 2539 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2540 } 2541 } 2542 2543 /* receive values */ 2544 if (rank) { 2545 rvalues = work; 2546 count = rowners_bs[rank+1]-rowners_bs[rank]; 2547 for (source=0; source<rank; source++) { 2548 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2549 /* process values */ 2550 for (i=0; i<count; i++) { 2551 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2552 } 2553 } 2554 } 2555 2556 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2557 ierr = PetscFree(work);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatSOR_MPISBAIJ" 2563 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2564 { 2565 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2566 PetscErrorCode ierr; 2567 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2568 PetscScalar *x,*ptr,*from; 2569 Vec bb1; 2570 const PetscScalar *b; 2571 2572 PetscFunctionBegin; 2573 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2574 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2575 2576 if (flag == SOR_APPLY_UPPER) { 2577 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2582 if (flag & SOR_ZERO_INITIAL_GUESS) { 2583 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2584 its--; 2585 } 2586 2587 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2588 while (its--) { 2589 2590 /* lower triangular part: slvec0b = - B^T*xx */ 2591 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2592 2593 /* copy xx into slvec0a */ 2594 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2595 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2596 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2597 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2598 2599 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2600 2601 /* copy bb into slvec1a */ 2602 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2603 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2604 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2605 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2606 2607 /* set slvec1b = 0 */ 2608 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2609 2610 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2611 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2612 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2613 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2614 2615 /* upper triangular part: bb1 = bb1 - B*x */ 2616 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2617 2618 /* local diagonal sweep */ 2619 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2620 } 2621 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2622 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2624 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2625 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2626 } else if (flag & SOR_EISENSTAT) { 2627 Vec xx1; 2628 PetscBool hasop; 2629 const PetscScalar *diag; 2630 PetscScalar *sl,scale = (omega - 2.0)/omega; 2631 PetscInt i,n; 2632 2633 if (!mat->xx1) { 2634 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2635 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2636 } 2637 xx1 = mat->xx1; 2638 bb1 = mat->bb1; 2639 2640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2641 2642 if (!mat->diag) { 2643 /* this is wrong for same matrix with new nonzero values */ 2644 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2645 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2646 } 2647 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2648 2649 if (hasop) { 2650 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2651 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2652 } else { 2653 /* 2654 These two lines are replaced by code that may be a bit faster for a good compiler 2655 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2656 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2657 */ 2658 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2659 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2660 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2661 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2662 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2663 if (omega == 1.0) { 2664 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2665 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2666 } else { 2667 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2668 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2669 } 2670 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2671 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2672 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2673 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2674 } 2675 2676 /* multiply off-diagonal portion of matrix */ 2677 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2678 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2679 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2680 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2681 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2682 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2683 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2684 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2685 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2686 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2687 2688 /* local sweep */ 2689 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2690 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2691 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2692 PetscFunctionReturn(0); 2693 } 2694 2695 #undef __FUNCT__ 2696 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2697 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2698 { 2699 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2700 PetscErrorCode ierr; 2701 Vec lvec1,bb1; 2702 2703 PetscFunctionBegin; 2704 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2705 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2706 2707 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2708 if (flag & SOR_ZERO_INITIAL_GUESS) { 2709 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2710 its--; 2711 } 2712 2713 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2714 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2715 while (its--) { 2716 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2717 2718 /* lower diagonal part: bb1 = bb - B^T*xx */ 2719 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2720 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2721 2722 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2723 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2724 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2725 2726 /* upper diagonal part: bb1 = bb1 - B*x */ 2727 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2728 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2729 2730 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2731 2732 /* diagonal sweep */ 2733 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2734 } 2735 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2736 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2737 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2738 PetscFunctionReturn(0); 2739 } 2740 2741 #undef __FUNCT__ 2742 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2743 /*@ 2744 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2745 CSR format the local rows. 2746 2747 Collective on MPI_Comm 2748 2749 Input Parameters: 2750 + comm - MPI communicator 2751 . bs - the block size, only a block size of 1 is supported 2752 . m - number of local rows (Cannot be PETSC_DECIDE) 2753 . n - This value should be the same as the local size used in creating the 2754 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2755 calculated if N is given) For square matrices n is almost always m. 2756 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2757 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2758 . i - row indices 2759 . j - column indices 2760 - a - matrix values 2761 2762 Output Parameter: 2763 . mat - the matrix 2764 2765 Level: intermediate 2766 2767 Notes: 2768 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2769 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2770 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2771 2772 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2773 2774 .keywords: matrix, aij, compressed row, sparse, parallel 2775 2776 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2777 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2778 @*/ 2779 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2780 { 2781 PetscErrorCode ierr; 2782 2783 2784 PetscFunctionBegin; 2785 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2786 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2787 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2788 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2789 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2790 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2791 PetscFunctionReturn(0); 2792 } 2793 2794 2795 #undef __FUNCT__ 2796 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2797 /*@C 2798 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2799 (the default parallel PETSc format). 2800 2801 Collective on MPI_Comm 2802 2803 Input Parameters: 2804 + B - the matrix 2805 . bs - the block size 2806 . i - the indices into j for the start of each local row (starts with zero) 2807 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2808 - v - optional values in the matrix 2809 2810 Level: developer 2811 2812 .keywords: matrix, aij, compressed row, sparse, parallel 2813 2814 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2815 @*/ 2816 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2817 { 2818 PetscErrorCode ierr; 2819 2820 PetscFunctionBegin; 2821 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2822 PetscFunctionReturn(0); 2823 } 2824 2825 #undef __FUNCT__ 2826 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 2827 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2828 { 2829 PetscErrorCode ierr; 2830 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2831 PetscInt *indx; 2832 PetscScalar *values; 2833 2834 PetscFunctionBegin; 2835 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2836 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2837 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2838 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 2839 PetscInt *bindx,rmax=a->rmax,j; 2840 2841 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2842 mbs = m/bs; Nbs = N/cbs; 2843 if (n == PETSC_DECIDE) { 2844 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 2845 } 2846 /* Check sum(n) = Nbs */ 2847 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2848 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 2849 2850 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2851 rstart -= mbs; 2852 2853 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 2854 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 2855 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2856 for (i=0; i<mbs; i++) { 2857 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 2858 nnz = nnz/bs; 2859 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2860 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 2861 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 2862 } 2863 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2864 ierr = PetscFree(bindx);CHKERRQ(ierr); 2865 2866 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2867 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2868 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 2869 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 2870 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2871 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2872 } 2873 2874 /* numeric phase */ 2875 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2876 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 2877 2878 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2879 for (i=0; i<m; i++) { 2880 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2881 Ii = i + rstart; 2882 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2883 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2884 } 2885 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2886 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2887 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2888 PetscFunctionReturn(0); 2889 } 2890