1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 EXTERN_C_BEGIN 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 26 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 27 { 28 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 29 PetscErrorCode ierr; 30 31 PetscFunctionBegin; 32 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 33 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN_C_BEGIN 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 EXTERN_C_END 52 53 54 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 55 { \ 56 \ 57 brow = row/bs; \ 58 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 59 rmax = aimax[brow]; nrow = ailen[brow]; \ 60 bcol = col/bs; \ 61 ridx = row % bs; cidx = col % bs; \ 62 low = 0; high = nrow; \ 63 while (high-low > 3) { \ 64 t = (low+high)/2; \ 65 if (rp[t] > bcol) high = t; \ 66 else low = t; \ 67 } \ 68 for (_i=low; _i<high; _i++) { \ 69 if (rp[_i] > bcol) break; \ 70 if (rp[_i] == bcol) { \ 71 bap = ap + bs2*_i + bs*cidx + ridx; \ 72 if (addv == ADD_VALUES) *bap += value; \ 73 else *bap = value; \ 74 goto a_noinsert; \ 75 } \ 76 } \ 77 if (a->nonew == 1) goto a_noinsert; \ 78 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 79 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 80 N = nrow++ - 1; \ 81 /* shift up all the later entries in this row */ \ 82 for (ii=N; ii>=_i; ii--) { \ 83 rp[ii+1] = rp[ii]; \ 84 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 85 } \ 86 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 87 rp[_i] = bcol; \ 88 ap[bs2*_i + bs*cidx + ridx] = value; \ 89 a_noinsert:; \ 90 ailen[brow] = nrow; \ 91 } 92 93 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 94 { \ 95 brow = row/bs; \ 96 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 97 rmax = bimax[brow]; nrow = bilen[brow]; \ 98 bcol = col/bs; \ 99 ridx = row % bs; cidx = col % bs; \ 100 low = 0; high = nrow; \ 101 while (high-low > 3) { \ 102 t = (low+high)/2; \ 103 if (rp[t] > bcol) high = t; \ 104 else low = t; \ 105 } \ 106 for (_i=low; _i<high; _i++) { \ 107 if (rp[_i] > bcol) break; \ 108 if (rp[_i] == bcol) { \ 109 bap = ap + bs2*_i + bs*cidx + ridx; \ 110 if (addv == ADD_VALUES) *bap += value; \ 111 else *bap = value; \ 112 goto b_noinsert; \ 113 } \ 114 } \ 115 if (b->nonew == 1) goto b_noinsert; \ 116 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 117 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 118 N = nrow++ - 1; \ 119 /* shift up all the later entries in this row */ \ 120 for (ii=N; ii>=_i; ii--) { \ 121 rp[ii+1] = rp[ii]; \ 122 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 } \ 124 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 125 rp[_i] = bcol; \ 126 ap[bs2*_i + bs*cidx + ridx] = value; \ 127 b_noinsert:; \ 128 bilen[brow] = nrow; \ 129 } 130 131 /* Only add/insert a(i,j) with i<=j (blocks). 132 Any a(i,j) with i>j input by user is ingored. 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetValues_MPISBAIJ" 136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 137 { 138 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 139 MatScalar value; 140 PetscBool roworiented = baij->roworiented; 141 PetscErrorCode ierr; 142 PetscInt i,j,row,col; 143 PetscInt rstart_orig=mat->rmap->rstart; 144 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 145 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 146 147 /* Some Variables required in the macro */ 148 Mat A = baij->A; 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 150 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 151 MatScalar *aa=a->a; 152 153 Mat B = baij->B; 154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 155 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 156 MatScalar *ba=b->a; 157 158 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 159 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 160 MatScalar *ap,*bap; 161 162 /* for stash */ 163 PetscInt n_loc, *in_loc = PETSC_NULL; 164 MatScalar *v_loc = PETSC_NULL; 165 166 PetscFunctionBegin; 167 if (v) PetscValidScalarPointer(v,6); 168 if (!baij->donotstash){ 169 if (n > baij->n_loc) { 170 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 171 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 172 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 174 baij->n_loc = n; 175 } 176 in_loc = baij->in_loc; 177 v_loc = baij->v_loc; 178 } 179 180 for (i=0; i<m; i++) { 181 if (im[i] < 0) continue; 182 #if defined(PETSC_USE_DEBUG) 183 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 184 #endif 185 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 186 row = im[i] - rstart_orig; /* local row index */ 187 for (j=0; j<n; j++) { 188 if (im[i]/bs > in[j]/bs){ 189 if (a->ignore_ltriangular){ 190 continue; /* ignore lower triangular blocks */ 191 } else { 192 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 193 } 194 } 195 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 196 col = in[j] - cstart_orig; /* local col index */ 197 brow = row/bs; bcol = col/bs; 198 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 199 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 200 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 201 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 202 } else if (in[j] < 0) continue; 203 #if defined(PETSC_USE_DEBUG) 204 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 205 #endif 206 else { /* off-diag entry (B) */ 207 if (mat->was_assembled) { 208 if (!baij->colmap) { 209 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 210 } 211 #if defined (PETSC_USE_CTABLE) 212 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 213 col = col - 1; 214 #else 215 col = baij->colmap[in[j]/bs] - 1; 216 #endif 217 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 218 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 219 col = in[j]; 220 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 221 B = baij->B; 222 b = (Mat_SeqBAIJ*)(B)->data; 223 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 224 ba=b->a; 225 } else col += in[j]%bs; 226 } else col = in[j]; 227 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 228 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } 231 } 232 } else { /* off processor entry */ 233 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 234 if (!baij->donotstash) { 235 n_loc = 0; 236 for (j=0; j<n; j++){ 237 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 238 in_loc[n_loc] = in[j]; 239 if (roworiented) { 240 v_loc[n_loc] = v[i*n+j]; 241 } else { 242 v_loc[n_loc] = v[j*m+i]; 243 } 244 n_loc++; 245 } 246 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 247 } 248 } 249 } 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 256 { 257 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 258 const MatScalar *value; 259 MatScalar *barray=baij->barray; 260 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 261 PetscErrorCode ierr; 262 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 263 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 264 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 265 266 PetscFunctionBegin; 267 if(!barray) { 268 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 269 baij->barray = barray; 270 } 271 272 if (roworiented) { 273 stepval = (n-1)*bs; 274 } else { 275 stepval = (m-1)*bs; 276 } 277 for (i=0; i<m; i++) { 278 if (im[i] < 0) continue; 279 #if defined(PETSC_USE_DEBUG) 280 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 281 #endif 282 if (im[i] >= rstart && im[i] < rend) { 283 row = im[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (im[i] > in[j]) { 286 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 287 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 288 } 289 /* If NumCol = 1 then a copy is not required */ 290 if ((roworiented) && (n == 1)) { 291 barray = (MatScalar*) v + i*bs2; 292 } else if((!roworiented) && (m == 1)) { 293 barray = (MatScalar*) v + j*bs2; 294 } else { /* Here a copy is required */ 295 if (roworiented) { 296 value = v + i*(stepval+bs)*bs + j*bs; 297 } else { 298 value = v + j*(stepval+bs)*bs + i*bs; 299 } 300 for (ii=0; ii<bs; ii++,value+=stepval) { 301 for (jj=0; jj<bs; jj++) { 302 *barray++ = *value++; 303 } 304 } 305 barray -=bs2; 306 } 307 308 if (in[j] >= cstart && in[j] < cend){ 309 col = in[j] - cstart; 310 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 311 } 312 else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined (PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined (PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } 343 else col = in[j]; 344 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 345 } 346 } 347 } else { 348 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 349 if (!baij->donotstash) { 350 if (roworiented) { 351 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 352 } else { 353 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 354 } 355 } 356 } 357 } 358 PetscFunctionReturn(0); 359 } 360 361 #undef __FUNCT__ 362 #define __FUNCT__ "MatGetValues_MPISBAIJ" 363 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 364 { 365 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 366 PetscErrorCode ierr; 367 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 368 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 369 370 PetscFunctionBegin; 371 for (i=0; i<m; i++) { 372 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 373 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 374 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 375 row = idxm[i] - bsrstart; 376 for (j=0; j<n; j++) { 377 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 378 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 379 if (idxn[j] >= bscstart && idxn[j] < bscend){ 380 col = idxn[j] - bscstart; 381 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 382 } else { 383 if (!baij->colmap) { 384 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 385 } 386 #if defined (PETSC_USE_CTABLE) 387 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 388 data --; 389 #else 390 data = baij->colmap[idxn[j]/bs]-1; 391 #endif 392 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 393 else { 394 col = data + idxn[j]%bs; 395 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 396 } 397 } 398 } 399 } else { 400 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatNorm_MPISBAIJ" 408 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 409 { 410 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscReal sum[2],*lnorm2; 413 414 PetscFunctionBegin; 415 if (baij->size == 1) { 416 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 417 } else { 418 if (type == NORM_FROBENIUS) { 419 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 420 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 421 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 422 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 423 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 424 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 425 *norm = sqrt(sum[0] + 2*sum[1]); 426 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 427 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 428 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 429 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 430 PetscReal *rsum,*rsum2,vabs; 431 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 432 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 433 MatScalar *v; 434 435 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 436 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 437 /* Amat */ 438 v = amat->a; jj = amat->j; 439 for (brow=0; brow<mbs; brow++) { 440 grow = bs*(rstart + brow); 441 nz = amat->i[brow+1] - amat->i[brow]; 442 for (bcol=0; bcol<nz; bcol++){ 443 gcol = bs*(rstart + *jj); jj++; 444 for (col=0; col<bs; col++){ 445 for (row=0; row<bs; row++){ 446 vabs = PetscAbsScalar(*v); v++; 447 rsum[gcol+col] += vabs; 448 /* non-diagonal block */ 449 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 450 } 451 } 452 } 453 } 454 /* Bmat */ 455 v = bmat->a; jj = bmat->j; 456 for (brow=0; brow<mbs; brow++) { 457 grow = bs*(rstart + brow); 458 nz = bmat->i[brow+1] - bmat->i[brow]; 459 for (bcol=0; bcol<nz; bcol++){ 460 gcol = bs*garray[*jj]; jj++; 461 for (col=0; col<bs; col++){ 462 for (row=0; row<bs; row++){ 463 vabs = PetscAbsScalar(*v); v++; 464 rsum[gcol+col] += vabs; 465 rsum[grow+row] += vabs; 466 } 467 } 468 } 469 } 470 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 471 *norm = 0.0; 472 for (col=0; col<mat->cmap->N; col++) { 473 if (rsum2[col] > *norm) *norm = rsum2[col]; 474 } 475 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 476 } else { 477 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 478 } 479 } 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 485 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 486 { 487 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 488 PetscErrorCode ierr; 489 PetscInt nstash,reallocs; 490 InsertMode addv; 491 492 PetscFunctionBegin; 493 if (baij->donotstash || mat->nooffprocentries) { 494 PetscFunctionReturn(0); 495 } 496 497 /* make sure all processors are either in INSERTMODE or ADDMODE */ 498 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 499 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 500 mat->insertmode = addv; /* in case this processor had no cache */ 501 502 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 503 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 504 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 505 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 506 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 507 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNCT__ 512 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 513 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 514 { 515 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 516 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 517 PetscErrorCode ierr; 518 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 519 PetscInt *row,*col; 520 PetscBool other_disassembled; 521 PetscMPIInt n; 522 PetscBool r1,r2,r3; 523 MatScalar *val; 524 InsertMode addv = mat->insertmode; 525 526 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 527 PetscFunctionBegin; 528 529 if (!baij->donotstash && !mat->nooffprocentries) { 530 while (1) { 531 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 532 if (!flg) break; 533 534 for (i=0; i<n;) { 535 /* Now identify the consecutive vals belonging to the same row */ 536 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 537 if (j < n) ncols = j-i; 538 else ncols = n-i; 539 /* Now assemble all these values with a single function call */ 540 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 541 i = j; 542 } 543 } 544 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 545 /* Now process the block-stash. Since the values are stashed column-oriented, 546 set the roworiented flag to column oriented, and after MatSetValues() 547 restore the original flags */ 548 r1 = baij->roworiented; 549 r2 = a->roworiented; 550 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 551 baij->roworiented = PETSC_FALSE; 552 a->roworiented = PETSC_FALSE; 553 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 554 while (1) { 555 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 556 if (!flg) break; 557 558 for (i=0; i<n;) { 559 /* Now identify the consecutive vals belonging to the same row */ 560 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 561 if (j < n) ncols = j-i; 562 else ncols = n-i; 563 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 564 i = j; 565 } 566 } 567 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 568 baij->roworiented = r1; 569 a->roworiented = r2; 570 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 571 } 572 573 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 575 576 /* determine if any processor has disassembled, if so we must 577 also disassemble ourselfs, in order that we may reassemble. */ 578 /* 579 if nonzero structure of submatrix B cannot change then we know that 580 no processor disassembled thus we can skip this stuff 581 */ 582 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 583 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 584 if (mat->was_assembled && !other_disassembled) { 585 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 586 } 587 } 588 589 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 590 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 591 } 592 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr); 593 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 594 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 595 596 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 597 baij->rowvalues = 0; 598 599 PetscFunctionReturn(0); 600 } 601 602 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 603 #undef __FUNCT__ 604 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 605 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 606 { 607 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 608 PetscErrorCode ierr; 609 PetscInt bs = mat->rmap->bs; 610 PetscMPIInt size = baij->size,rank = baij->rank; 611 PetscBool iascii,isdraw; 612 PetscViewer sviewer; 613 PetscViewerFormat format; 614 615 PetscFunctionBegin; 616 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 617 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 618 if (iascii) { 619 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 620 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 621 MatInfo info; 622 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 623 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 624 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 625 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 626 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 628 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 630 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 631 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 632 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 633 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 634 PetscFunctionReturn(0); 635 } else if (format == PETSC_VIEWER_ASCII_INFO) { 636 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 639 PetscFunctionReturn(0); 640 } 641 } 642 643 if (isdraw) { 644 PetscDraw draw; 645 PetscBool isnull; 646 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 647 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 648 } 649 650 if (size == 1) { 651 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 652 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 653 } else { 654 /* assemble the entire matrix onto first processor. */ 655 Mat A; 656 Mat_SeqSBAIJ *Aloc; 657 Mat_SeqBAIJ *Bloc; 658 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 659 MatScalar *a; 660 661 /* Should this be the same type as mat? */ 662 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 663 if (!rank) { 664 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 665 } else { 666 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 667 } 668 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 669 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 670 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 671 672 /* copy over the A part */ 673 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 674 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 675 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 676 677 for (i=0; i<mbs; i++) { 678 rvals[0] = bs*(baij->rstartbs + i); 679 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 680 for (j=ai[i]; j<ai[i+1]; j++) { 681 col = (baij->cstartbs+aj[j])*bs; 682 for (k=0; k<bs; k++) { 683 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 684 col++; a += bs; 685 } 686 } 687 } 688 /* copy over the B part */ 689 Bloc = (Mat_SeqBAIJ*)baij->B->data; 690 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 691 for (i=0; i<mbs; i++) { 692 693 rvals[0] = bs*(baij->rstartbs + i); 694 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 695 for (j=ai[i]; j<ai[i+1]; j++) { 696 col = baij->garray[aj[j]]*bs; 697 for (k=0; k<bs; k++) { 698 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 699 col++; a += bs; 700 } 701 } 702 } 703 ierr = PetscFree(rvals);CHKERRQ(ierr); 704 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 705 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 706 /* 707 Everyone has to call to draw the matrix since the graphics waits are 708 synchronized across all processors that share the PetscDraw object 709 */ 710 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 711 if (!rank) { 712 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 713 /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/ 714 PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ); 715 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 716 } 717 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 718 ierr = MatDestroy(&A);CHKERRQ(ierr); 719 } 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatView_MPISBAIJ" 725 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 726 { 727 PetscErrorCode ierr; 728 PetscBool iascii,isdraw,issocket,isbinary; 729 730 PetscFunctionBegin; 731 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 732 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 733 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 734 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 735 if (iascii || isdraw || issocket || isbinary) { 736 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 737 } else { 738 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatDestroy_MPISBAIJ" 745 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 746 { 747 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 #if defined(PETSC_USE_LOG) 752 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 753 #endif 754 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 755 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 756 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 757 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 758 #if defined (PETSC_USE_CTABLE) 759 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 760 #else 761 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 762 #endif 763 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 764 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 765 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 766 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 767 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 768 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 769 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 770 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 771 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 772 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 773 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 774 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 775 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 776 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 777 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 778 #if defined(PETSC_USE_REAL_MAT_SINGLE) 779 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 780 #endif 781 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 782 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 783 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 784 ierr = PetscFree(mat->data);CHKERRQ(ierr); 785 786 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 787 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 798 { 799 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 800 PetscErrorCode ierr; 801 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 802 PetscScalar *x,*from; 803 804 PetscFunctionBegin; 805 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 806 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 807 808 /* diagonal part */ 809 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 810 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 811 812 /* subdiagonal part */ 813 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 814 815 /* copy x into the vec slvec0 */ 816 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 819 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 820 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 821 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 822 823 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 /* supperdiagonal part */ 826 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatMult_MPISBAIJ" 832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 833 { 834 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 835 PetscErrorCode ierr; 836 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 837 PetscScalar *x,*from; 838 839 PetscFunctionBegin; 840 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 841 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 842 843 /* diagonal part */ 844 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 845 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 846 847 /* subdiagonal part */ 848 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 849 850 /* copy x into the vec slvec0 */ 851 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 852 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 853 854 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 855 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 856 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 857 858 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 /* supperdiagonal part */ 861 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 868 { 869 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 870 PetscErrorCode ierr; 871 PetscInt nt; 872 873 PetscFunctionBegin; 874 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 875 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 876 877 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 878 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 879 880 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 /* do diagonal part */ 882 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 883 /* do supperdiagonal part */ 884 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 886 /* do subdiagonal part */ 887 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 888 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 889 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 890 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 896 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 897 { 898 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 899 PetscErrorCode ierr; 900 PetscInt mbs=a->mbs,bs=A->rmap->bs; 901 PetscScalar *x,*from,zero=0.0; 902 903 PetscFunctionBegin; 904 /* 905 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 906 PetscSynchronizedFlush(((PetscObject)A)->comm); 907 */ 908 /* diagonal part */ 909 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 910 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 911 912 /* subdiagonal part */ 913 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 914 915 /* copy x into the vec slvec0 */ 916 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 917 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 918 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 919 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 920 921 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 923 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 924 925 /* supperdiagonal part */ 926 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 927 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 934 { 935 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 936 PetscErrorCode ierr; 937 938 PetscFunctionBegin; 939 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 /* do diagonal part */ 941 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 942 /* do supperdiagonal part */ 943 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 945 946 /* do subdiagonal part */ 947 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 948 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 949 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 950 951 PetscFunctionReturn(0); 952 } 953 954 /* 955 This only works correctly for square matrices where the subblock A->A is the 956 diagonal block 957 */ 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 961 { 962 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 967 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatScale_MPISBAIJ" 973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 974 { 975 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 980 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatGetRow_MPISBAIJ" 986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 987 { 988 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 989 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 990 PetscErrorCode ierr; 991 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 992 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 993 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 994 995 PetscFunctionBegin; 996 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 997 mat->getrowactive = PETSC_TRUE; 998 999 if (!mat->rowvalues && (idx || v)) { 1000 /* 1001 allocate enough space to hold information from the longest row. 1002 */ 1003 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1004 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1005 PetscInt max = 1,mbs = mat->mbs,tmp; 1006 for (i=0; i<mbs; i++) { 1007 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1008 if (max < tmp) { max = tmp; } 1009 } 1010 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1011 } 1012 1013 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1014 lrow = row - brstart; /* local row index */ 1015 1016 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1017 if (!v) {pvA = 0; pvB = 0;} 1018 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1019 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1020 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1021 nztot = nzA + nzB; 1022 1023 cmap = mat->garray; 1024 if (v || idx) { 1025 if (nztot) { 1026 /* Sort by increasing column numbers, assuming A and B already sorted */ 1027 PetscInt imark = -1; 1028 if (v) { 1029 *v = v_p = mat->rowvalues; 1030 for (i=0; i<nzB; i++) { 1031 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1032 else break; 1033 } 1034 imark = i; 1035 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1036 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1037 } 1038 if (idx) { 1039 *idx = idx_p = mat->rowindices; 1040 if (imark > -1) { 1041 for (i=0; i<imark; i++) { 1042 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1043 } 1044 } else { 1045 for (i=0; i<nzB; i++) { 1046 if (cmap[cworkB[i]/bs] < cstart) 1047 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1048 else break; 1049 } 1050 imark = i; 1051 } 1052 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1053 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1054 } 1055 } else { 1056 if (idx) *idx = 0; 1057 if (v) *v = 0; 1058 } 1059 } 1060 *nz = nztot; 1061 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1062 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1068 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1069 { 1070 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1071 1072 PetscFunctionBegin; 1073 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1074 baij->getrowactive = PETSC_FALSE; 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1080 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1081 { 1082 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1083 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1084 1085 PetscFunctionBegin; 1086 aA->getrow_utriangular = PETSC_TRUE; 1087 PetscFunctionReturn(0); 1088 } 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1091 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1092 { 1093 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1094 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1095 1096 PetscFunctionBegin; 1097 aA->getrow_utriangular = PETSC_FALSE; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1103 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1104 { 1105 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1110 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1116 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1117 { 1118 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1123 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1129 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1130 { 1131 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1136 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1142 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1143 { 1144 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1145 Mat A = a->A,B = a->B; 1146 PetscErrorCode ierr; 1147 PetscReal isend[5],irecv[5]; 1148 1149 PetscFunctionBegin; 1150 info->block_size = (PetscReal)matin->rmap->bs; 1151 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1152 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1153 isend[3] = info->memory; isend[4] = info->mallocs; 1154 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1155 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1156 isend[3] += info->memory; isend[4] += info->mallocs; 1157 if (flag == MAT_LOCAL) { 1158 info->nz_used = isend[0]; 1159 info->nz_allocated = isend[1]; 1160 info->nz_unneeded = isend[2]; 1161 info->memory = isend[3]; 1162 info->mallocs = isend[4]; 1163 } else if (flag == MAT_GLOBAL_MAX) { 1164 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1165 info->nz_used = irecv[0]; 1166 info->nz_allocated = irecv[1]; 1167 info->nz_unneeded = irecv[2]; 1168 info->memory = irecv[3]; 1169 info->mallocs = irecv[4]; 1170 } else if (flag == MAT_GLOBAL_SUM) { 1171 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1172 info->nz_used = irecv[0]; 1173 info->nz_allocated = irecv[1]; 1174 info->nz_unneeded = irecv[2]; 1175 info->memory = irecv[3]; 1176 info->mallocs = irecv[4]; 1177 } else { 1178 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1179 } 1180 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1181 info->fill_ratio_needed = 0; 1182 info->factor_mallocs = 0; 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1189 { 1190 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1191 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 switch (op) { 1196 case MAT_NEW_NONZERO_LOCATIONS: 1197 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1198 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1199 case MAT_KEEP_NONZERO_PATTERN: 1200 case MAT_NEW_NONZERO_LOCATION_ERR: 1201 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1202 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1203 break; 1204 case MAT_ROW_ORIENTED: 1205 a->roworiented = flg; 1206 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1207 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1208 break; 1209 case MAT_NEW_DIAGONALS: 1210 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1211 break; 1212 case MAT_IGNORE_OFF_PROC_ENTRIES: 1213 a->donotstash = flg; 1214 break; 1215 case MAT_USE_HASH_TABLE: 1216 a->ht_flag = flg; 1217 break; 1218 case MAT_HERMITIAN: 1219 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1220 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1221 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1222 break; 1223 case MAT_SPD: 1224 A->spd_set = PETSC_TRUE; 1225 A->spd = flg; 1226 if (flg) { 1227 A->symmetric = PETSC_TRUE; 1228 A->structurally_symmetric = PETSC_TRUE; 1229 A->symmetric_set = PETSC_TRUE; 1230 A->structurally_symmetric_set = PETSC_TRUE; 1231 } 1232 break; 1233 case MAT_SYMMETRIC: 1234 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1235 break; 1236 case MAT_STRUCTURALLY_SYMMETRIC: 1237 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1238 break; 1239 case MAT_SYMMETRY_ETERNAL: 1240 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1241 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1242 break; 1243 case MAT_IGNORE_LOWER_TRIANGULAR: 1244 aA->ignore_ltriangular = flg; 1245 break; 1246 case MAT_ERROR_LOWER_TRIANGULAR: 1247 aA->ignore_ltriangular = flg; 1248 break; 1249 case MAT_GETROW_UPPERTRIANGULAR: 1250 aA->getrow_utriangular = flg; 1251 break; 1252 default: 1253 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1260 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1261 { 1262 PetscErrorCode ierr; 1263 PetscFunctionBegin; 1264 if (MAT_INITIAL_MATRIX || *B != A) { 1265 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1266 } 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1272 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1273 { 1274 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1275 Mat a=baij->A, b=baij->B; 1276 PetscErrorCode ierr; 1277 PetscInt nv,m,n; 1278 PetscBool flg; 1279 1280 PetscFunctionBegin; 1281 if (ll != rr){ 1282 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1283 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1284 } 1285 if (!ll) PetscFunctionReturn(0); 1286 1287 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1288 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1289 1290 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1291 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1292 1293 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1294 1295 /* left diagonalscale the off-diagonal part */ 1296 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1297 1298 /* scale the diagonal part */ 1299 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1300 1301 /* right diagonalscale the off-diagonal part */ 1302 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1303 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1309 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1310 { 1311 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatEqual_MPISBAIJ" 1323 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1324 { 1325 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1326 Mat a,b,c,d; 1327 PetscBool flg; 1328 PetscErrorCode ierr; 1329 1330 PetscFunctionBegin; 1331 a = matA->A; b = matA->B; 1332 c = matB->A; d = matB->B; 1333 1334 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1335 if (flg) { 1336 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1337 } 1338 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatCopy_MPISBAIJ" 1344 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1345 { 1346 PetscErrorCode ierr; 1347 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1348 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1349 1350 PetscFunctionBegin; 1351 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1352 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1353 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1354 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1355 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1356 } else { 1357 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1358 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1359 } 1360 PetscFunctionReturn(0); 1361 } 1362 1363 #undef __FUNCT__ 1364 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1365 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1366 { 1367 PetscErrorCode ierr; 1368 1369 PetscFunctionBegin; 1370 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1376 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1377 { 1378 PetscErrorCode ierr; 1379 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1380 PetscBLASInt bnz,one=1; 1381 Mat_SeqSBAIJ *xa,*ya; 1382 Mat_SeqBAIJ *xb,*yb; 1383 1384 PetscFunctionBegin; 1385 if (str == SAME_NONZERO_PATTERN) { 1386 PetscScalar alpha = a; 1387 xa = (Mat_SeqSBAIJ *)xx->A->data; 1388 ya = (Mat_SeqSBAIJ *)yy->A->data; 1389 bnz = PetscBLASIntCast(xa->nz); 1390 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1391 xb = (Mat_SeqBAIJ *)xx->B->data; 1392 yb = (Mat_SeqBAIJ *)yy->B->data; 1393 bnz = PetscBLASIntCast(xb->nz); 1394 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1395 } else { 1396 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1397 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1398 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1405 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1406 { 1407 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1408 PetscInt rbs,cbs; 1409 PetscErrorCode ierr; 1410 1411 PetscFunctionBegin; 1412 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1413 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1414 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1415 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1416 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1417 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1423 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1424 { 1425 PetscErrorCode ierr; 1426 PetscInt i; 1427 PetscBool flg; 1428 1429 PetscFunctionBegin; 1430 for (i=0; i<n; i++) { 1431 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1432 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1433 } 1434 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 1439 /* -------------------------------------------------------------------*/ 1440 static struct _MatOps MatOps_Values = { 1441 MatSetValues_MPISBAIJ, 1442 MatGetRow_MPISBAIJ, 1443 MatRestoreRow_MPISBAIJ, 1444 MatMult_MPISBAIJ, 1445 /* 4*/ MatMultAdd_MPISBAIJ, 1446 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1447 MatMultAdd_MPISBAIJ, 1448 0, 1449 0, 1450 0, 1451 /*10*/ 0, 1452 0, 1453 0, 1454 MatSOR_MPISBAIJ, 1455 MatTranspose_MPISBAIJ, 1456 /*15*/ MatGetInfo_MPISBAIJ, 1457 MatEqual_MPISBAIJ, 1458 MatGetDiagonal_MPISBAIJ, 1459 MatDiagonalScale_MPISBAIJ, 1460 MatNorm_MPISBAIJ, 1461 /*20*/ MatAssemblyBegin_MPISBAIJ, 1462 MatAssemblyEnd_MPISBAIJ, 1463 MatSetOption_MPISBAIJ, 1464 MatZeroEntries_MPISBAIJ, 1465 /*24*/ 0, 1466 0, 1467 0, 1468 0, 1469 0, 1470 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1471 0, 1472 0, 1473 0, 1474 0, 1475 /*34*/ MatDuplicate_MPISBAIJ, 1476 0, 1477 0, 1478 0, 1479 0, 1480 /*39*/ MatAXPY_MPISBAIJ, 1481 MatGetSubMatrices_MPISBAIJ, 1482 MatIncreaseOverlap_MPISBAIJ, 1483 MatGetValues_MPISBAIJ, 1484 MatCopy_MPISBAIJ, 1485 /*44*/ 0, 1486 MatScale_MPISBAIJ, 1487 0, 1488 0, 1489 0, 1490 /*49*/ MatSetBlockSize_MPISBAIJ, 1491 0, 1492 0, 1493 0, 1494 0, 1495 /*54*/ 0, 1496 0, 1497 MatSetUnfactored_MPISBAIJ, 1498 0, 1499 MatSetValuesBlocked_MPISBAIJ, 1500 /*59*/ 0, 1501 0, 1502 0, 1503 0, 1504 0, 1505 /*64*/ 0, 1506 0, 1507 0, 1508 0, 1509 0, 1510 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1511 0, 1512 0, 1513 0, 1514 0, 1515 /*74*/ 0, 1516 0, 1517 0, 1518 0, 1519 0, 1520 /*79*/ 0, 1521 0, 1522 0, 1523 0, 1524 MatLoad_MPISBAIJ, 1525 /*84*/ 0, 1526 0, 1527 0, 1528 0, 1529 0, 1530 /*89*/ 0, 1531 0, 1532 0, 1533 0, 1534 0, 1535 /*94*/ 0, 1536 0, 1537 0, 1538 0, 1539 0, 1540 /*99*/ 0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 /*104*/0, 1546 MatRealPart_MPISBAIJ, 1547 MatImaginaryPart_MPISBAIJ, 1548 MatGetRowUpperTriangular_MPISBAIJ, 1549 MatRestoreRowUpperTriangular_MPISBAIJ, 1550 /*109*/0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 /*114*/0, 1556 0, 1557 0, 1558 0, 1559 0, 1560 /*119*/0, 1561 0, 1562 0, 1563 0 1564 }; 1565 1566 1567 EXTERN_C_BEGIN 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1570 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1571 { 1572 PetscFunctionBegin; 1573 *a = ((Mat_MPISBAIJ *)A->data)->A; 1574 PetscFunctionReturn(0); 1575 } 1576 EXTERN_C_END 1577 1578 EXTERN_C_BEGIN 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1581 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1582 { 1583 Mat_MPISBAIJ *b; 1584 PetscErrorCode ierr; 1585 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1586 1587 PetscFunctionBegin; 1588 if (bs < 0){ 1589 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1590 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1591 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1592 bs = PetscAbs(bs); 1593 } 1594 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1595 bs = newbs; 1596 1597 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1598 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1599 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1600 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1601 1602 B->rmap->bs = B->cmap->bs = bs; 1603 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1604 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1605 1606 if (d_nnz) { 1607 for (i=0; i<B->rmap->n/bs; i++) { 1608 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1609 } 1610 } 1611 if (o_nnz) { 1612 for (i=0; i<B->rmap->n/bs; i++) { 1613 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1614 } 1615 } 1616 1617 b = (Mat_MPISBAIJ*)B->data; 1618 mbs = B->rmap->n/bs; 1619 Mbs = B->rmap->N/bs; 1620 if (mbs*bs != B->rmap->n) { 1621 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1622 } 1623 1624 B->rmap->bs = bs; 1625 b->bs2 = bs*bs; 1626 b->mbs = mbs; 1627 b->nbs = mbs; 1628 b->Mbs = Mbs; 1629 b->Nbs = Mbs; 1630 1631 for (i=0; i<=b->size; i++) { 1632 b->rangebs[i] = B->rmap->range[i]/bs; 1633 } 1634 b->rstartbs = B->rmap->rstart/bs; 1635 b->rendbs = B->rmap->rend/bs; 1636 1637 b->cstartbs = B->cmap->rstart/bs; 1638 b->cendbs = B->cmap->rend/bs; 1639 1640 if (!B->preallocated) { 1641 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1642 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1643 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1644 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1645 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1646 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1647 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1648 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1649 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1650 } 1651 1652 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1653 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1654 B->preallocated = PETSC_TRUE; 1655 PetscFunctionReturn(0); 1656 } 1657 EXTERN_C_END 1658 1659 EXTERN_C_BEGIN 1660 #undef __FUNCT__ 1661 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1662 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1663 { 1664 PetscInt m,rstart,cstart,cend; 1665 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1666 const PetscInt *JJ=0; 1667 PetscScalar *values=0; 1668 PetscErrorCode ierr; 1669 1670 PetscFunctionBegin; 1671 1672 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1673 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1674 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1675 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1676 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1677 m = B->rmap->n/bs; 1678 rstart = B->rmap->rstart/bs; 1679 cstart = B->cmap->rstart/bs; 1680 cend = B->cmap->rend/bs; 1681 1682 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1683 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 1684 for (i=0; i<m; i++) { 1685 nz = ii[i+1] - ii[i]; 1686 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1687 nz_max = PetscMax(nz_max,nz); 1688 JJ = jj + ii[i]; 1689 for (j=0; j<nz; j++) { 1690 if (*JJ >= cstart) break; 1691 JJ++; 1692 } 1693 d = 0; 1694 for (; j<nz; j++) { 1695 if (*JJ++ >= cend) break; 1696 d++; 1697 } 1698 d_nnz[i] = d; 1699 o_nnz[i] = nz - d; 1700 } 1701 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1702 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1703 1704 values = (PetscScalar*)V; 1705 if (!values) { 1706 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 1707 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1708 } 1709 for (i=0; i<m; i++) { 1710 PetscInt row = i + rstart; 1711 PetscInt ncols = ii[i+1] - ii[i]; 1712 const PetscInt *icols = jj + ii[i]; 1713 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1714 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1715 } 1716 1717 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1718 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1719 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1720 1721 PetscFunctionReturn(0); 1722 } 1723 EXTERN_C_END 1724 1725 EXTERN_C_BEGIN 1726 #if defined(PETSC_HAVE_MUMPS) 1727 extern PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1728 #endif 1729 #if defined(PETSC_HAVE_SPOOLES) 1730 extern PetscErrorCode MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1731 #endif 1732 #if defined(PETSC_HAVE_PASTIX) 1733 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1734 #endif 1735 EXTERN_C_END 1736 1737 /*MC 1738 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1739 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1740 the matrix is stored. 1741 1742 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1743 can call MatSetOption(Mat, MAT_HERMITIAN); 1744 1745 Options Database Keys: 1746 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1747 1748 Level: beginner 1749 1750 .seealso: MatCreateMPISBAIJ 1751 M*/ 1752 1753 EXTERN_C_BEGIN 1754 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,const MatType,MatReuse,Mat*); 1755 EXTERN_C_END 1756 1757 EXTERN_C_BEGIN 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatCreate_MPISBAIJ" 1760 PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1761 { 1762 Mat_MPISBAIJ *b; 1763 PetscErrorCode ierr; 1764 PetscBool flg; 1765 1766 PetscFunctionBegin; 1767 1768 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1769 B->data = (void*)b; 1770 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1771 1772 B->ops->destroy = MatDestroy_MPISBAIJ; 1773 B->ops->view = MatView_MPISBAIJ; 1774 B->assembled = PETSC_FALSE; 1775 1776 B->insertmode = NOT_SET_VALUES; 1777 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1778 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1779 1780 /* build local table of row and column ownerships */ 1781 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1782 1783 /* build cache for off array entries formed */ 1784 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1785 b->donotstash = PETSC_FALSE; 1786 b->colmap = PETSC_NULL; 1787 b->garray = PETSC_NULL; 1788 b->roworiented = PETSC_TRUE; 1789 1790 /* stuff used in block assembly */ 1791 b->barray = 0; 1792 1793 /* stuff used for matrix vector multiply */ 1794 b->lvec = 0; 1795 b->Mvctx = 0; 1796 b->slvec0 = 0; 1797 b->slvec0b = 0; 1798 b->slvec1 = 0; 1799 b->slvec1a = 0; 1800 b->slvec1b = 0; 1801 b->sMvctx = 0; 1802 1803 /* stuff for MatGetRow() */ 1804 b->rowindices = 0; 1805 b->rowvalues = 0; 1806 b->getrowactive = PETSC_FALSE; 1807 1808 /* hash table stuff */ 1809 b->ht = 0; 1810 b->hd = 0; 1811 b->ht_size = 0; 1812 b->ht_flag = PETSC_FALSE; 1813 b->ht_fact = 0; 1814 b->ht_total_ct = 0; 1815 b->ht_insert_ct = 0; 1816 1817 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1818 b->ijonly = PETSC_FALSE; 1819 1820 b->in_loc = 0; 1821 b->v_loc = 0; 1822 b->n_loc = 0; 1823 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1824 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1825 if (flg) { 1826 PetscReal fact = 1.39; 1827 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1828 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1829 if (fact <= 1.0) fact = 1.39; 1830 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1831 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1832 } 1833 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1834 1835 #if defined(PETSC_HAVE_PASTIX) 1836 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1837 "MatGetFactor_mpisbaij_pastix", 1838 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1839 #endif 1840 #if defined(PETSC_HAVE_MUMPS) 1841 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1842 "MatGetFactor_sbaij_mumps", 1843 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1844 #endif 1845 #if defined(PETSC_HAVE_SPOOLES) 1846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1847 "MatGetFactor_mpisbaij_spooles", 1848 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1849 #endif 1850 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1851 "MatStoreValues_MPISBAIJ", 1852 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1853 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1854 "MatRetrieveValues_MPISBAIJ", 1855 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1856 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1857 "MatGetDiagonalBlock_MPISBAIJ", 1858 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1859 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1860 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1861 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1862 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C", 1863 "MatMPISBAIJSetPreallocationCSR_MPISBAIJ", 1864 MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1865 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C", 1866 "MatConvert_MPISBAIJ_MPISBSTRM", 1867 MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1868 1869 B->symmetric = PETSC_TRUE; 1870 B->structurally_symmetric = PETSC_TRUE; 1871 B->symmetric_set = PETSC_TRUE; 1872 B->structurally_symmetric_set = PETSC_TRUE; 1873 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1874 PetscFunctionReturn(0); 1875 } 1876 EXTERN_C_END 1877 1878 /*MC 1879 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1880 1881 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1882 and MATMPISBAIJ otherwise. 1883 1884 Options Database Keys: 1885 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1886 1887 Level: beginner 1888 1889 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1890 M*/ 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1894 /*@C 1895 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1896 the user should preallocate the matrix storage by setting the parameters 1897 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1898 performance can be increased by more than a factor of 50. 1899 1900 Collective on Mat 1901 1902 Input Parameters: 1903 + A - the matrix 1904 . bs - size of blockk 1905 . d_nz - number of block nonzeros per block row in diagonal portion of local 1906 submatrix (same for all local rows) 1907 . d_nnz - array containing the number of block nonzeros in the various block rows 1908 in the upper triangular and diagonal part of the in diagonal portion of the local 1909 (possibly different for each block row) or PETSC_NULL. You must leave room 1910 for the diagonal entry even if it is zero. 1911 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1912 submatrix (same for all local rows). 1913 - o_nnz - array containing the number of nonzeros in the various block rows of the 1914 off-diagonal portion of the local submatrix (possibly different for 1915 each block row) or PETSC_NULL. 1916 1917 1918 Options Database Keys: 1919 . -mat_no_unroll - uses code that does not unroll the loops in the 1920 block calculations (much slower) 1921 . -mat_block_size - size of the blocks to use 1922 1923 Notes: 1924 1925 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1926 than it must be used on all processors that share the object for that argument. 1927 1928 If the *_nnz parameter is given then the *_nz parameter is ignored 1929 1930 Storage Information: 1931 For a square global matrix we define each processor's diagonal portion 1932 to be its local rows and the corresponding columns (a square submatrix); 1933 each processor's off-diagonal portion encompasses the remainder of the 1934 local matrix (a rectangular submatrix). 1935 1936 The user can specify preallocated storage for the diagonal part of 1937 the local submatrix with either d_nz or d_nnz (not both). Set 1938 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1939 memory allocation. Likewise, specify preallocated storage for the 1940 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1941 1942 You can call MatGetInfo() to get information on how effective the preallocation was; 1943 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1944 You can also run with the option -info and look for messages with the string 1945 malloc in them to see if additional memory allocation was needed. 1946 1947 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1948 the figure below we depict these three local rows and all columns (0-11). 1949 1950 .vb 1951 0 1 2 3 4 5 6 7 8 9 10 11 1952 ------------------- 1953 row 3 | o o o d d d o o o o o o 1954 row 4 | o o o d d d o o o o o o 1955 row 5 | o o o d d d o o o o o o 1956 ------------------- 1957 .ve 1958 1959 Thus, any entries in the d locations are stored in the d (diagonal) 1960 submatrix, and any entries in the o locations are stored in the 1961 o (off-diagonal) submatrix. Note that the d matrix is stored in 1962 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1963 1964 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1965 plus the diagonal part of the d matrix, 1966 and o_nz should indicate the number of block nonzeros per row in the upper triangular 1967 part of the o matrix. 1968 In general, for PDE problems in which most nonzeros are near the diagonal, 1969 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1970 or you will get TERRIBLE performance; see the users' manual chapter on 1971 matrices. 1972 1973 Level: intermediate 1974 1975 .keywords: matrix, block, aij, compressed row, sparse, parallel 1976 1977 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1978 @*/ 1979 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1980 { 1981 PetscErrorCode ierr; 1982 1983 PetscFunctionBegin; 1984 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 1985 PetscFunctionReturn(0); 1986 } 1987 1988 #undef __FUNCT__ 1989 #define __FUNCT__ "MatCreateMPISBAIJ" 1990 /*@C 1991 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1992 (block compressed row). For good matrix assembly performance 1993 the user should preallocate the matrix storage by setting the parameters 1994 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1995 performance can be increased by more than a factor of 50. 1996 1997 Collective on MPI_Comm 1998 1999 Input Parameters: 2000 + comm - MPI communicator 2001 . bs - size of blockk 2002 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2003 This value should be the same as the local size used in creating the 2004 y vector for the matrix-vector product y = Ax. 2005 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2006 This value should be the same as the local size used in creating the 2007 x vector for the matrix-vector product y = Ax. 2008 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2009 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2010 . d_nz - number of block nonzeros per block row in diagonal portion of local 2011 submatrix (same for all local rows) 2012 . d_nnz - array containing the number of block nonzeros in the various block rows 2013 in the upper triangular portion of the in diagonal portion of the local 2014 (possibly different for each block block row) or PETSC_NULL. 2015 You must leave room for the diagonal entry even if it is zero. 2016 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2017 submatrix (same for all local rows). 2018 - o_nnz - array containing the number of nonzeros in the various block rows of the 2019 off-diagonal portion of the local submatrix (possibly different for 2020 each block row) or PETSC_NULL. 2021 2022 Output Parameter: 2023 . A - the matrix 2024 2025 Options Database Keys: 2026 . -mat_no_unroll - uses code that does not unroll the loops in the 2027 block calculations (much slower) 2028 . -mat_block_size - size of the blocks to use 2029 . -mat_mpi - use the parallel matrix data structures even on one processor 2030 (defaults to using SeqBAIJ format on one processor) 2031 2032 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2033 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2034 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2035 2036 Notes: 2037 The number of rows and columns must be divisible by blocksize. 2038 This matrix type does not support complex Hermitian operation. 2039 2040 The user MUST specify either the local or global matrix dimensions 2041 (possibly both). 2042 2043 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2044 than it must be used on all processors that share the object for that argument. 2045 2046 If the *_nnz parameter is given then the *_nz parameter is ignored 2047 2048 Storage Information: 2049 For a square global matrix we define each processor's diagonal portion 2050 to be its local rows and the corresponding columns (a square submatrix); 2051 each processor's off-diagonal portion encompasses the remainder of the 2052 local matrix (a rectangular submatrix). 2053 2054 The user can specify preallocated storage for the diagonal part of 2055 the local submatrix with either d_nz or d_nnz (not both). Set 2056 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2057 memory allocation. Likewise, specify preallocated storage for the 2058 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2059 2060 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2061 the figure below we depict these three local rows and all columns (0-11). 2062 2063 .vb 2064 0 1 2 3 4 5 6 7 8 9 10 11 2065 ------------------- 2066 row 3 | o o o d d d o o o o o o 2067 row 4 | o o o d d d o o o o o o 2068 row 5 | o o o d d d o o o o o o 2069 ------------------- 2070 .ve 2071 2072 Thus, any entries in the d locations are stored in the d (diagonal) 2073 submatrix, and any entries in the o locations are stored in the 2074 o (off-diagonal) submatrix. Note that the d matrix is stored in 2075 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2076 2077 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2078 plus the diagonal part of the d matrix, 2079 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2080 In general, for PDE problems in which most nonzeros are near the diagonal, 2081 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2082 or you will get TERRIBLE performance; see the users' manual chapter on 2083 matrices. 2084 2085 Level: intermediate 2086 2087 .keywords: matrix, block, aij, compressed row, sparse, parallel 2088 2089 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2090 @*/ 2091 2092 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2093 { 2094 PetscErrorCode ierr; 2095 PetscMPIInt size; 2096 2097 PetscFunctionBegin; 2098 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2099 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2101 if (size > 1) { 2102 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2103 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2104 } else { 2105 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2106 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2107 } 2108 PetscFunctionReturn(0); 2109 } 2110 2111 2112 #undef __FUNCT__ 2113 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2114 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2115 { 2116 Mat mat; 2117 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2118 PetscErrorCode ierr; 2119 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2120 PetscScalar *array; 2121 2122 PetscFunctionBegin; 2123 *newmat = 0; 2124 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2125 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2126 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2127 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2128 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2129 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2130 2131 mat->factortype = matin->factortype; 2132 mat->preallocated = PETSC_TRUE; 2133 mat->assembled = PETSC_TRUE; 2134 mat->insertmode = NOT_SET_VALUES; 2135 2136 a = (Mat_MPISBAIJ*)mat->data; 2137 a->bs2 = oldmat->bs2; 2138 a->mbs = oldmat->mbs; 2139 a->nbs = oldmat->nbs; 2140 a->Mbs = oldmat->Mbs; 2141 a->Nbs = oldmat->Nbs; 2142 2143 2144 a->size = oldmat->size; 2145 a->rank = oldmat->rank; 2146 a->donotstash = oldmat->donotstash; 2147 a->roworiented = oldmat->roworiented; 2148 a->rowindices = 0; 2149 a->rowvalues = 0; 2150 a->getrowactive = PETSC_FALSE; 2151 a->barray = 0; 2152 a->rstartbs = oldmat->rstartbs; 2153 a->rendbs = oldmat->rendbs; 2154 a->cstartbs = oldmat->cstartbs; 2155 a->cendbs = oldmat->cendbs; 2156 2157 /* hash table stuff */ 2158 a->ht = 0; 2159 a->hd = 0; 2160 a->ht_size = 0; 2161 a->ht_flag = oldmat->ht_flag; 2162 a->ht_fact = oldmat->ht_fact; 2163 a->ht_total_ct = 0; 2164 a->ht_insert_ct = 0; 2165 2166 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2167 if (oldmat->colmap) { 2168 #if defined (PETSC_USE_CTABLE) 2169 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2170 #else 2171 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2172 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2173 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2174 #endif 2175 } else a->colmap = 0; 2176 2177 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2178 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2179 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2180 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2181 } else a->garray = 0; 2182 2183 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2184 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2185 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2186 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2187 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2188 2189 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2190 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2191 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2192 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2193 2194 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2195 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2196 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2197 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2198 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2199 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2200 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2201 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2202 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2203 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2204 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2205 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2206 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2207 2208 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2209 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2210 a->sMvctx = oldmat->sMvctx; 2211 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2212 2213 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2214 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2215 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2216 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2217 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2218 *newmat = mat; 2219 PetscFunctionReturn(0); 2220 } 2221 2222 #undef __FUNCT__ 2223 #define __FUNCT__ "MatLoad_MPISBAIJ" 2224 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2225 { 2226 PetscErrorCode ierr; 2227 PetscInt i,nz,j,rstart,rend; 2228 PetscScalar *vals,*buf; 2229 MPI_Comm comm = ((PetscObject)viewer)->comm; 2230 MPI_Status status; 2231 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2232 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2233 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2234 PetscInt bs=1,Mbs,mbs,extra_rows; 2235 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2236 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2237 int fd; 2238 2239 PetscFunctionBegin; 2240 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2241 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2242 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2243 2244 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2245 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2246 if (!rank) { 2247 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2248 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2249 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2250 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2251 } 2252 2253 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2254 2255 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2256 M = header[1]; N = header[2]; 2257 2258 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2259 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2260 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2261 2262 /* If global sizes are set, check if they are consistent with that given in the file */ 2263 if (sizesset) { 2264 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2265 } 2266 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2267 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2268 2269 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2270 2271 /* 2272 This code adds extra rows to make sure the number of rows is 2273 divisible by the blocksize 2274 */ 2275 Mbs = M/bs; 2276 extra_rows = bs - M + bs*(Mbs); 2277 if (extra_rows == bs) extra_rows = 0; 2278 else Mbs++; 2279 if (extra_rows &&!rank) { 2280 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2281 } 2282 2283 /* determine ownership of all rows */ 2284 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2285 mbs = Mbs/size + ((Mbs % size) > rank); 2286 m = mbs*bs; 2287 } else { /* User Set */ 2288 m = newmat->rmap->n; 2289 mbs = m/bs; 2290 } 2291 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2292 mmbs = PetscMPIIntCast(mbs); 2293 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2294 rowners[0] = 0; 2295 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2296 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2297 rstart = rowners[rank]; 2298 rend = rowners[rank+1]; 2299 2300 /* distribute row lengths to all processors */ 2301 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2302 if (!rank) { 2303 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2304 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2305 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2306 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2307 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2308 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2309 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2310 } else { 2311 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2312 } 2313 2314 if (!rank) { /* procs[0] */ 2315 /* calculate the number of nonzeros on each processor */ 2316 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2317 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2318 for (i=0; i<size; i++) { 2319 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2320 procsnz[i] += rowlengths[j]; 2321 } 2322 } 2323 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2324 2325 /* determine max buffer needed and allocate it */ 2326 maxnz = 0; 2327 for (i=0; i<size; i++) { 2328 maxnz = PetscMax(maxnz,procsnz[i]); 2329 } 2330 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2331 2332 /* read in my part of the matrix column indices */ 2333 nz = procsnz[0]; 2334 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2335 mycols = ibuf; 2336 if (size == 1) nz -= extra_rows; 2337 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2338 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2339 2340 /* read in every ones (except the last) and ship off */ 2341 for (i=1; i<size-1; i++) { 2342 nz = procsnz[i]; 2343 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2344 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2345 } 2346 /* read in the stuff for the last proc */ 2347 if (size != 1) { 2348 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2349 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2350 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2351 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2352 } 2353 ierr = PetscFree(cols);CHKERRQ(ierr); 2354 } else { /* procs[i], i>0 */ 2355 /* determine buffer space needed for message */ 2356 nz = 0; 2357 for (i=0; i<m; i++) { 2358 nz += locrowlens[i]; 2359 } 2360 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2361 mycols = ibuf; 2362 /* receive message of column indices*/ 2363 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2364 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2365 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2366 } 2367 2368 /* loop over local rows, determining number of off diagonal entries */ 2369 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2370 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2371 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2372 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2373 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2374 rowcount = 0; 2375 nzcount = 0; 2376 for (i=0; i<mbs; i++) { 2377 dcount = 0; 2378 odcount = 0; 2379 for (j=0; j<bs; j++) { 2380 kmax = locrowlens[rowcount]; 2381 for (k=0; k<kmax; k++) { 2382 tmp = mycols[nzcount++]/bs; /* block col. index */ 2383 if (!mask[tmp]) { 2384 mask[tmp] = 1; 2385 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2386 else masked1[dcount++] = tmp; /* entry in diag portion */ 2387 } 2388 } 2389 rowcount++; 2390 } 2391 2392 dlens[i] = dcount; /* d_nzz[i] */ 2393 odlens[i] = odcount; /* o_nzz[i] */ 2394 2395 /* zero out the mask elements we set */ 2396 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2397 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2398 } 2399 if (!sizesset) { 2400 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2401 } 2402 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2403 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2404 2405 if (!rank) { 2406 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2407 /* read in my part of the matrix numerical values */ 2408 nz = procsnz[0]; 2409 vals = buf; 2410 mycols = ibuf; 2411 if (size == 1) nz -= extra_rows; 2412 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2413 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2414 2415 /* insert into matrix */ 2416 jj = rstart*bs; 2417 for (i=0; i<m; i++) { 2418 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2419 mycols += locrowlens[i]; 2420 vals += locrowlens[i]; 2421 jj++; 2422 } 2423 2424 /* read in other processors (except the last one) and ship out */ 2425 for (i=1; i<size-1; i++) { 2426 nz = procsnz[i]; 2427 vals = buf; 2428 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2429 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2430 } 2431 /* the last proc */ 2432 if (size != 1){ 2433 nz = procsnz[i] - extra_rows; 2434 vals = buf; 2435 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2436 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2437 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2438 } 2439 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2440 2441 } else { 2442 /* receive numeric values */ 2443 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2444 2445 /* receive message of values*/ 2446 vals = buf; 2447 mycols = ibuf; 2448 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2449 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2450 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2451 2452 /* insert into matrix */ 2453 jj = rstart*bs; 2454 for (i=0; i<m; i++) { 2455 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2456 mycols += locrowlens[i]; 2457 vals += locrowlens[i]; 2458 jj++; 2459 } 2460 } 2461 2462 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2463 ierr = PetscFree(buf);CHKERRQ(ierr); 2464 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2465 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2466 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2467 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2468 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2469 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2475 /*XXXXX@ 2476 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2477 2478 Input Parameters: 2479 . mat - the matrix 2480 . fact - factor 2481 2482 Not Collective on Mat, each process can have a different hash factor 2483 2484 Level: advanced 2485 2486 Notes: 2487 This can also be set by the command line option: -mat_use_hash_table fact 2488 2489 .keywords: matrix, hashtable, factor, HT 2490 2491 .seealso: MatSetOption() 2492 @XXXXX*/ 2493 2494 2495 #undef __FUNCT__ 2496 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2497 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2498 { 2499 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2500 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2501 PetscReal atmp; 2502 PetscReal *work,*svalues,*rvalues; 2503 PetscErrorCode ierr; 2504 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2505 PetscMPIInt rank,size; 2506 PetscInt *rowners_bs,dest,count,source; 2507 PetscScalar *va; 2508 MatScalar *ba; 2509 MPI_Status stat; 2510 2511 PetscFunctionBegin; 2512 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2513 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2514 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2515 2516 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2517 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2518 2519 bs = A->rmap->bs; 2520 mbs = a->mbs; 2521 Mbs = a->Mbs; 2522 ba = b->a; 2523 bi = b->i; 2524 bj = b->j; 2525 2526 /* find ownerships */ 2527 rowners_bs = A->rmap->range; 2528 2529 /* each proc creates an array to be distributed */ 2530 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2531 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2532 2533 /* row_max for B */ 2534 if (rank != size-1){ 2535 for (i=0; i<mbs; i++) { 2536 ncols = bi[1] - bi[0]; bi++; 2537 brow = bs*i; 2538 for (j=0; j<ncols; j++){ 2539 bcol = bs*(*bj); 2540 for (kcol=0; kcol<bs; kcol++){ 2541 col = bcol + kcol; /* local col index */ 2542 col += rowners_bs[rank+1]; /* global col index */ 2543 for (krow=0; krow<bs; krow++){ 2544 atmp = PetscAbsScalar(*ba); ba++; 2545 row = brow + krow; /* local row index */ 2546 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2547 if (work[col] < atmp) work[col] = atmp; 2548 } 2549 } 2550 bj++; 2551 } 2552 } 2553 2554 /* send values to its owners */ 2555 for (dest=rank+1; dest<size; dest++){ 2556 svalues = work + rowners_bs[dest]; 2557 count = rowners_bs[dest+1]-rowners_bs[dest]; 2558 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2559 } 2560 } 2561 2562 /* receive values */ 2563 if (rank){ 2564 rvalues = work; 2565 count = rowners_bs[rank+1]-rowners_bs[rank]; 2566 for (source=0; source<rank; source++){ 2567 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2568 /* process values */ 2569 for (i=0; i<count; i++){ 2570 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2571 } 2572 } 2573 } 2574 2575 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2576 ierr = PetscFree(work);CHKERRQ(ierr); 2577 PetscFunctionReturn(0); 2578 } 2579 2580 #undef __FUNCT__ 2581 #define __FUNCT__ "MatSOR_MPISBAIJ" 2582 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2583 { 2584 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2585 PetscErrorCode ierr; 2586 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2587 PetscScalar *x,*ptr,*from; 2588 Vec bb1; 2589 const PetscScalar *b; 2590 2591 PetscFunctionBegin; 2592 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2593 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2594 2595 if (flag == SOR_APPLY_UPPER) { 2596 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2597 PetscFunctionReturn(0); 2598 } 2599 2600 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2601 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2602 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2603 its--; 2604 } 2605 2606 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2607 while (its--){ 2608 2609 /* lower triangular part: slvec0b = - B^T*xx */ 2610 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2611 2612 /* copy xx into slvec0a */ 2613 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2614 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2615 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2616 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2617 2618 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2619 2620 /* copy bb into slvec1a */ 2621 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2622 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2623 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2624 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2625 2626 /* set slvec1b = 0 */ 2627 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2628 2629 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2630 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2631 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2632 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2633 2634 /* upper triangular part: bb1 = bb1 - B*x */ 2635 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2636 2637 /* local diagonal sweep */ 2638 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2639 } 2640 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2641 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2642 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2643 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2644 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2645 } else if (flag & SOR_EISENSTAT) { 2646 Vec xx1; 2647 PetscBool hasop; 2648 const PetscScalar *diag; 2649 PetscScalar *sl,scale = (omega - 2.0)/omega; 2650 PetscInt i,n; 2651 2652 if (!mat->xx1) { 2653 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2654 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2655 } 2656 xx1 = mat->xx1; 2657 bb1 = mat->bb1; 2658 2659 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2660 2661 if (!mat->diag) { 2662 /* this is wrong for same matrix with new nonzero values */ 2663 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2664 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2665 } 2666 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2667 2668 if (hasop) { 2669 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2670 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2671 } else { 2672 /* 2673 These two lines are replaced by code that may be a bit faster for a good compiler 2674 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2675 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2676 */ 2677 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2678 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2679 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2680 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2681 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2682 if (omega == 1.0) { 2683 for (i=0; i<n; i++) { 2684 sl[i] = b[i] - diag[i]*x[i]; 2685 } 2686 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2687 } else { 2688 for (i=0; i<n; i++) { 2689 sl[i] = b[i] + scale*diag[i]*x[i]; 2690 } 2691 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2692 } 2693 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2694 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2695 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2696 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2697 } 2698 2699 /* multiply off-diagonal portion of matrix */ 2700 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2701 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2702 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2703 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2704 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2705 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2706 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2707 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2708 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2709 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2710 2711 /* local sweep */ 2712 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2713 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2714 } else { 2715 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2716 } 2717 PetscFunctionReturn(0); 2718 } 2719 2720 #undef __FUNCT__ 2721 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2722 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2723 { 2724 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2725 PetscErrorCode ierr; 2726 Vec lvec1,bb1; 2727 2728 PetscFunctionBegin; 2729 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2730 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2731 2732 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2733 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2734 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2735 its--; 2736 } 2737 2738 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2739 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2740 while (its--){ 2741 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2742 2743 /* lower diagonal part: bb1 = bb - B^T*xx */ 2744 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2745 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2746 2747 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2748 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2749 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2750 2751 /* upper diagonal part: bb1 = bb1 - B*x */ 2752 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2753 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2754 2755 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2756 2757 /* diagonal sweep */ 2758 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2759 } 2760 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2761 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2762 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2763 PetscFunctionReturn(0); 2764 } 2765 2766 #undef __FUNCT__ 2767 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2768 /*@ 2769 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2770 CSR format the local rows. 2771 2772 Collective on MPI_Comm 2773 2774 Input Parameters: 2775 + comm - MPI communicator 2776 . bs - the block size, only a block size of 1 is supported 2777 . m - number of local rows (Cannot be PETSC_DECIDE) 2778 . n - This value should be the same as the local size used in creating the 2779 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2780 calculated if N is given) For square matrices n is almost always m. 2781 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2782 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2783 . i - row indices 2784 . j - column indices 2785 - a - matrix values 2786 2787 Output Parameter: 2788 . mat - the matrix 2789 2790 Level: intermediate 2791 2792 Notes: 2793 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2794 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2795 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2796 2797 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2798 2799 .keywords: matrix, aij, compressed row, sparse, parallel 2800 2801 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2802 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 2803 @*/ 2804 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2805 { 2806 PetscErrorCode ierr; 2807 2808 2809 PetscFunctionBegin; 2810 if (i[0]) { 2811 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2812 } 2813 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2814 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2815 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2816 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2817 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2818 PetscFunctionReturn(0); 2819 } 2820 2821 2822 #undef __FUNCT__ 2823 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2824 /*@C 2825 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2826 (the default parallel PETSc format). 2827 2828 Collective on MPI_Comm 2829 2830 Input Parameters: 2831 + A - the matrix 2832 . bs - the block size 2833 . i - the indices into j for the start of each local row (starts with zero) 2834 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2835 - v - optional values in the matrix 2836 2837 Level: developer 2838 2839 .keywords: matrix, aij, compressed row, sparse, parallel 2840 2841 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2842 @*/ 2843 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2844 { 2845 PetscErrorCode ierr; 2846 2847 PetscFunctionBegin; 2848 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2849 PetscFunctionReturn(0); 2850 } 2851 2852 2853