1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 EXTERN_C_BEGIN 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 26 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 27 { 28 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 29 PetscErrorCode ierr; 30 31 PetscFunctionBegin; 32 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 33 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN_C_BEGIN 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 EXTERN_C_END 52 53 54 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 55 { \ 56 \ 57 brow = row/bs; \ 58 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 59 rmax = aimax[brow]; nrow = ailen[brow]; \ 60 bcol = col/bs; \ 61 ridx = row % bs; cidx = col % bs; \ 62 low = 0; high = nrow; \ 63 while (high-low > 3) { \ 64 t = (low+high)/2; \ 65 if (rp[t] > bcol) high = t; \ 66 else low = t; \ 67 } \ 68 for (_i=low; _i<high; _i++) { \ 69 if (rp[_i] > bcol) break; \ 70 if (rp[_i] == bcol) { \ 71 bap = ap + bs2*_i + bs*cidx + ridx; \ 72 if (addv == ADD_VALUES) *bap += value; \ 73 else *bap = value; \ 74 goto a_noinsert; \ 75 } \ 76 } \ 77 if (a->nonew == 1) goto a_noinsert; \ 78 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 79 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 80 N = nrow++ - 1; \ 81 /* shift up all the later entries in this row */ \ 82 for (ii=N; ii>=_i; ii--) { \ 83 rp[ii+1] = rp[ii]; \ 84 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 85 } \ 86 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 87 rp[_i] = bcol; \ 88 ap[bs2*_i + bs*cidx + ridx] = value; \ 89 a_noinsert:; \ 90 ailen[brow] = nrow; \ 91 } 92 93 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 94 { \ 95 brow = row/bs; \ 96 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 97 rmax = bimax[brow]; nrow = bilen[brow]; \ 98 bcol = col/bs; \ 99 ridx = row % bs; cidx = col % bs; \ 100 low = 0; high = nrow; \ 101 while (high-low > 3) { \ 102 t = (low+high)/2; \ 103 if (rp[t] > bcol) high = t; \ 104 else low = t; \ 105 } \ 106 for (_i=low; _i<high; _i++) { \ 107 if (rp[_i] > bcol) break; \ 108 if (rp[_i] == bcol) { \ 109 bap = ap + bs2*_i + bs*cidx + ridx; \ 110 if (addv == ADD_VALUES) *bap += value; \ 111 else *bap = value; \ 112 goto b_noinsert; \ 113 } \ 114 } \ 115 if (b->nonew == 1) goto b_noinsert; \ 116 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 117 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 118 N = nrow++ - 1; \ 119 /* shift up all the later entries in this row */ \ 120 for (ii=N; ii>=_i; ii--) { \ 121 rp[ii+1] = rp[ii]; \ 122 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 } \ 124 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 125 rp[_i] = bcol; \ 126 ap[bs2*_i + bs*cidx + ridx] = value; \ 127 b_noinsert:; \ 128 bilen[brow] = nrow; \ 129 } 130 131 /* Only add/insert a(i,j) with i<=j (blocks). 132 Any a(i,j) with i>j input by user is ingored. 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetValues_MPISBAIJ" 136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 137 { 138 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 139 MatScalar value; 140 PetscBool roworiented = baij->roworiented; 141 PetscErrorCode ierr; 142 PetscInt i,j,row,col; 143 PetscInt rstart_orig=mat->rmap->rstart; 144 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 145 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 146 147 /* Some Variables required in the macro */ 148 Mat A = baij->A; 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 150 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 151 MatScalar *aa=a->a; 152 153 Mat B = baij->B; 154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 155 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 156 MatScalar *ba=b->a; 157 158 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 159 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 160 MatScalar *ap,*bap; 161 162 /* for stash */ 163 PetscInt n_loc, *in_loc = PETSC_NULL; 164 MatScalar *v_loc = PETSC_NULL; 165 166 PetscFunctionBegin; 167 if (v) PetscValidScalarPointer(v,6); 168 if (!baij->donotstash){ 169 if (n > baij->n_loc) { 170 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 171 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 172 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 174 baij->n_loc = n; 175 } 176 in_loc = baij->in_loc; 177 v_loc = baij->v_loc; 178 } 179 180 for (i=0; i<m; i++) { 181 if (im[i] < 0) continue; 182 #if defined(PETSC_USE_DEBUG) 183 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 184 #endif 185 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 186 row = im[i] - rstart_orig; /* local row index */ 187 for (j=0; j<n; j++) { 188 if (im[i]/bs > in[j]/bs){ 189 if (a->ignore_ltriangular){ 190 continue; /* ignore lower triangular blocks */ 191 } else { 192 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 193 } 194 } 195 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 196 col = in[j] - cstart_orig; /* local col index */ 197 brow = row/bs; bcol = col/bs; 198 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 199 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 200 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 201 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 202 } else if (in[j] < 0) continue; 203 #if defined(PETSC_USE_DEBUG) 204 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 205 #endif 206 else { /* off-diag entry (B) */ 207 if (mat->was_assembled) { 208 if (!baij->colmap) { 209 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 210 } 211 #if defined (PETSC_USE_CTABLE) 212 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 213 col = col - 1; 214 #else 215 col = baij->colmap[in[j]/bs] - 1; 216 #endif 217 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 218 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 219 col = in[j]; 220 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 221 B = baij->B; 222 b = (Mat_SeqBAIJ*)(B)->data; 223 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 224 ba=b->a; 225 } else col += in[j]%bs; 226 } else col = in[j]; 227 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 228 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } 231 } 232 } else { /* off processor entry */ 233 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 234 if (!baij->donotstash) { 235 n_loc = 0; 236 for (j=0; j<n; j++){ 237 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 238 in_loc[n_loc] = in[j]; 239 if (roworiented) { 240 v_loc[n_loc] = v[i*n+j]; 241 } else { 242 v_loc[n_loc] = v[j*m+i]; 243 } 244 n_loc++; 245 } 246 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 247 } 248 } 249 } 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 256 { 257 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 258 const MatScalar *value; 259 MatScalar *barray=baij->barray; 260 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 261 PetscErrorCode ierr; 262 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 263 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 264 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 265 266 PetscFunctionBegin; 267 if(!barray) { 268 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 269 baij->barray = barray; 270 } 271 272 if (roworiented) { 273 stepval = (n-1)*bs; 274 } else { 275 stepval = (m-1)*bs; 276 } 277 for (i=0; i<m; i++) { 278 if (im[i] < 0) continue; 279 #if defined(PETSC_USE_DEBUG) 280 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 281 #endif 282 if (im[i] >= rstart && im[i] < rend) { 283 row = im[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (im[i] > in[j]) { 286 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 287 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 288 } 289 /* If NumCol = 1 then a copy is not required */ 290 if ((roworiented) && (n == 1)) { 291 barray = (MatScalar*) v + i*bs2; 292 } else if((!roworiented) && (m == 1)) { 293 barray = (MatScalar*) v + j*bs2; 294 } else { /* Here a copy is required */ 295 if (roworiented) { 296 value = v + i*(stepval+bs)*bs + j*bs; 297 } else { 298 value = v + j*(stepval+bs)*bs + i*bs; 299 } 300 for (ii=0; ii<bs; ii++,value+=stepval) { 301 for (jj=0; jj<bs; jj++) { 302 *barray++ = *value++; 303 } 304 } 305 barray -=bs2; 306 } 307 308 if (in[j] >= cstart && in[j] < cend){ 309 col = in[j] - cstart; 310 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 311 } 312 else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined (PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined (PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } 343 else col = in[j]; 344 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 345 } 346 } 347 } else { 348 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 349 if (!baij->donotstash) { 350 if (roworiented) { 351 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 352 } else { 353 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 354 } 355 } 356 } 357 } 358 PetscFunctionReturn(0); 359 } 360 361 #undef __FUNCT__ 362 #define __FUNCT__ "MatGetValues_MPISBAIJ" 363 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 364 { 365 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 366 PetscErrorCode ierr; 367 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 368 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 369 370 PetscFunctionBegin; 371 for (i=0; i<m; i++) { 372 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 373 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 374 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 375 row = idxm[i] - bsrstart; 376 for (j=0; j<n; j++) { 377 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 378 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 379 if (idxn[j] >= bscstart && idxn[j] < bscend){ 380 col = idxn[j] - bscstart; 381 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 382 } else { 383 if (!baij->colmap) { 384 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 385 } 386 #if defined (PETSC_USE_CTABLE) 387 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 388 data --; 389 #else 390 data = baij->colmap[idxn[j]/bs]-1; 391 #endif 392 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 393 else { 394 col = data + idxn[j]%bs; 395 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 396 } 397 } 398 } 399 } else { 400 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatNorm_MPISBAIJ" 408 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 409 { 410 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscReal sum[2],*lnorm2; 413 414 PetscFunctionBegin; 415 if (baij->size == 1) { 416 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 417 } else { 418 if (type == NORM_FROBENIUS) { 419 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 420 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 421 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 422 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 423 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 424 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 425 *norm = sqrt(sum[0] + 2*sum[1]); 426 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 427 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 428 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 429 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 430 PetscReal *rsum,*rsum2,vabs; 431 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 432 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 433 MatScalar *v; 434 435 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 436 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 437 /* Amat */ 438 v = amat->a; jj = amat->j; 439 for (brow=0; brow<mbs; brow++) { 440 grow = bs*(rstart + brow); 441 nz = amat->i[brow+1] - amat->i[brow]; 442 for (bcol=0; bcol<nz; bcol++){ 443 gcol = bs*(rstart + *jj); jj++; 444 for (col=0; col<bs; col++){ 445 for (row=0; row<bs; row++){ 446 vabs = PetscAbsScalar(*v); v++; 447 rsum[gcol+col] += vabs; 448 /* non-diagonal block */ 449 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 450 } 451 } 452 } 453 } 454 /* Bmat */ 455 v = bmat->a; jj = bmat->j; 456 for (brow=0; brow<mbs; brow++) { 457 grow = bs*(rstart + brow); 458 nz = bmat->i[brow+1] - bmat->i[brow]; 459 for (bcol=0; bcol<nz; bcol++){ 460 gcol = bs*garray[*jj]; jj++; 461 for (col=0; col<bs; col++){ 462 for (row=0; row<bs; row++){ 463 vabs = PetscAbsScalar(*v); v++; 464 rsum[gcol+col] += vabs; 465 rsum[grow+row] += vabs; 466 } 467 } 468 } 469 } 470 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 471 *norm = 0.0; 472 for (col=0; col<mat->cmap->N; col++) { 473 if (rsum2[col] > *norm) *norm = rsum2[col]; 474 } 475 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 476 } else { 477 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 478 } 479 } 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 485 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 486 { 487 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 488 PetscErrorCode ierr; 489 PetscInt nstash,reallocs; 490 InsertMode addv; 491 492 PetscFunctionBegin; 493 if (baij->donotstash || mat->nooffprocentries) { 494 PetscFunctionReturn(0); 495 } 496 497 /* make sure all processors are either in INSERTMODE or ADDMODE */ 498 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 499 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 500 mat->insertmode = addv; /* in case this processor had no cache */ 501 502 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 503 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 504 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 505 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 506 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 507 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNCT__ 512 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 513 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 514 { 515 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 516 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 517 PetscErrorCode ierr; 518 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 519 PetscInt *row,*col; 520 PetscBool other_disassembled; 521 PetscMPIInt n; 522 PetscBool r1,r2,r3; 523 MatScalar *val; 524 InsertMode addv = mat->insertmode; 525 526 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 527 PetscFunctionBegin; 528 529 if (!baij->donotstash && !mat->nooffprocentries) { 530 while (1) { 531 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 532 if (!flg) break; 533 534 for (i=0; i<n;) { 535 /* Now identify the consecutive vals belonging to the same row */ 536 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 537 if (j < n) ncols = j-i; 538 else ncols = n-i; 539 /* Now assemble all these values with a single function call */ 540 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 541 i = j; 542 } 543 } 544 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 545 /* Now process the block-stash. Since the values are stashed column-oriented, 546 set the roworiented flag to column oriented, and after MatSetValues() 547 restore the original flags */ 548 r1 = baij->roworiented; 549 r2 = a->roworiented; 550 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 551 baij->roworiented = PETSC_FALSE; 552 a->roworiented = PETSC_FALSE; 553 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 554 while (1) { 555 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 556 if (!flg) break; 557 558 for (i=0; i<n;) { 559 /* Now identify the consecutive vals belonging to the same row */ 560 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 561 if (j < n) ncols = j-i; 562 else ncols = n-i; 563 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 564 i = j; 565 } 566 } 567 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 568 baij->roworiented = r1; 569 a->roworiented = r2; 570 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 571 } 572 573 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 575 576 /* determine if any processor has disassembled, if so we must 577 also disassemble ourselfs, in order that we may reassemble. */ 578 /* 579 if nonzero structure of submatrix B cannot change then we know that 580 no processor disassembled thus we can skip this stuff 581 */ 582 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 583 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 584 if (mat->was_assembled && !other_disassembled) { 585 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 586 } 587 } 588 589 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 590 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 591 } 592 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr); 593 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 594 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 595 596 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 597 baij->rowvalues = 0; 598 599 PetscFunctionReturn(0); 600 } 601 602 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 603 #undef __FUNCT__ 604 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 605 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 606 { 607 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 608 PetscErrorCode ierr; 609 PetscInt bs = mat->rmap->bs; 610 PetscMPIInt size = baij->size,rank = baij->rank; 611 PetscBool iascii,isdraw; 612 PetscViewer sviewer; 613 PetscViewerFormat format; 614 615 PetscFunctionBegin; 616 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 617 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 618 if (iascii) { 619 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 620 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 621 MatInfo info; 622 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 623 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 624 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 625 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 626 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 628 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 630 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 631 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 632 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 633 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 634 PetscFunctionReturn(0); 635 } else if (format == PETSC_VIEWER_ASCII_INFO) { 636 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 639 PetscFunctionReturn(0); 640 } 641 } 642 643 if (isdraw) { 644 PetscDraw draw; 645 PetscBool isnull; 646 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 647 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 648 } 649 650 if (size == 1) { 651 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 652 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 653 } else { 654 /* assemble the entire matrix onto first processor. */ 655 Mat A; 656 Mat_SeqSBAIJ *Aloc; 657 Mat_SeqBAIJ *Bloc; 658 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 659 MatScalar *a; 660 661 /* Should this be the same type as mat? */ 662 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 663 if (!rank) { 664 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 665 } else { 666 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 667 } 668 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 669 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 670 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 671 672 /* copy over the A part */ 673 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 674 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 675 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 676 677 for (i=0; i<mbs; i++) { 678 rvals[0] = bs*(baij->rstartbs + i); 679 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 680 for (j=ai[i]; j<ai[i+1]; j++) { 681 col = (baij->cstartbs+aj[j])*bs; 682 for (k=0; k<bs; k++) { 683 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 684 col++; a += bs; 685 } 686 } 687 } 688 /* copy over the B part */ 689 Bloc = (Mat_SeqBAIJ*)baij->B->data; 690 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 691 for (i=0; i<mbs; i++) { 692 693 rvals[0] = bs*(baij->rstartbs + i); 694 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 695 for (j=ai[i]; j<ai[i+1]; j++) { 696 col = baij->garray[aj[j]]*bs; 697 for (k=0; k<bs; k++) { 698 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 699 col++; a += bs; 700 } 701 } 702 } 703 ierr = PetscFree(rvals);CHKERRQ(ierr); 704 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 705 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 706 /* 707 Everyone has to call to draw the matrix since the graphics waits are 708 synchronized across all processors that share the PetscDraw object 709 */ 710 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 711 if (!rank) { 712 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 713 /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/ 714 PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ); 715 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 716 } 717 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 718 ierr = MatDestroy(&A);CHKERRQ(ierr); 719 } 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatView_MPISBAIJ" 725 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 726 { 727 PetscErrorCode ierr; 728 PetscBool iascii,isdraw,issocket,isbinary; 729 730 PetscFunctionBegin; 731 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 732 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 733 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 734 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 735 if (iascii || isdraw || issocket || isbinary) { 736 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 737 } else { 738 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatDestroy_MPISBAIJ" 745 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 746 { 747 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 #if defined(PETSC_USE_LOG) 752 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 753 #endif 754 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 755 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 756 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 757 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 758 #if defined (PETSC_USE_CTABLE) 759 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 760 #else 761 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 762 #endif 763 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 764 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 765 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 766 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 767 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 768 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 769 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 770 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 771 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 772 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 773 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 774 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 775 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 776 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 777 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 778 #if defined(PETSC_USE_REAL_MAT_SINGLE) 779 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 780 #endif 781 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 782 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 783 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 784 ierr = PetscFree(mat->data);CHKERRQ(ierr); 785 786 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 787 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 798 { 799 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 800 PetscErrorCode ierr; 801 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 802 PetscScalar *x,*from; 803 804 PetscFunctionBegin; 805 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 806 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 807 808 /* diagonal part */ 809 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 810 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 811 812 /* subdiagonal part */ 813 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 814 815 /* copy x into the vec slvec0 */ 816 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 819 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 820 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 821 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 822 823 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 /* supperdiagonal part */ 826 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatMult_MPISBAIJ" 832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 833 { 834 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 835 PetscErrorCode ierr; 836 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 837 PetscScalar *x,*from; 838 839 PetscFunctionBegin; 840 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 841 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 842 843 /* diagonal part */ 844 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 845 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 846 847 /* subdiagonal part */ 848 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 849 850 /* copy x into the vec slvec0 */ 851 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 852 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 853 854 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 855 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 856 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 857 858 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 /* supperdiagonal part */ 861 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 868 { 869 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 870 PetscErrorCode ierr; 871 PetscInt nt; 872 873 PetscFunctionBegin; 874 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 875 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 876 877 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 878 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 879 880 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 /* do diagonal part */ 882 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 883 /* do supperdiagonal part */ 884 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 886 /* do subdiagonal part */ 887 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 888 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 889 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 890 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 896 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 897 { 898 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 899 PetscErrorCode ierr; 900 PetscInt mbs=a->mbs,bs=A->rmap->bs; 901 PetscScalar *x,*from,zero=0.0; 902 903 PetscFunctionBegin; 904 /* 905 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 906 PetscSynchronizedFlush(((PetscObject)A)->comm); 907 */ 908 /* diagonal part */ 909 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 910 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 911 912 /* subdiagonal part */ 913 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 914 915 /* copy x into the vec slvec0 */ 916 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 917 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 918 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 919 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 920 921 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 923 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 924 925 /* supperdiagonal part */ 926 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 927 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 934 { 935 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 936 PetscErrorCode ierr; 937 938 PetscFunctionBegin; 939 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 /* do diagonal part */ 941 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 942 /* do supperdiagonal part */ 943 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 945 946 /* do subdiagonal part */ 947 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 948 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 949 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 950 951 PetscFunctionReturn(0); 952 } 953 954 /* 955 This only works correctly for square matrices where the subblock A->A is the 956 diagonal block 957 */ 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 961 { 962 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 967 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatScale_MPISBAIJ" 973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 974 { 975 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 980 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatGetRow_MPISBAIJ" 986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 987 { 988 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 989 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 990 PetscErrorCode ierr; 991 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 992 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 993 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 994 995 PetscFunctionBegin; 996 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 997 mat->getrowactive = PETSC_TRUE; 998 999 if (!mat->rowvalues && (idx || v)) { 1000 /* 1001 allocate enough space to hold information from the longest row. 1002 */ 1003 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1004 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1005 PetscInt max = 1,mbs = mat->mbs,tmp; 1006 for (i=0; i<mbs; i++) { 1007 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1008 if (max < tmp) { max = tmp; } 1009 } 1010 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1011 } 1012 1013 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1014 lrow = row - brstart; /* local row index */ 1015 1016 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1017 if (!v) {pvA = 0; pvB = 0;} 1018 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1019 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1020 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1021 nztot = nzA + nzB; 1022 1023 cmap = mat->garray; 1024 if (v || idx) { 1025 if (nztot) { 1026 /* Sort by increasing column numbers, assuming A and B already sorted */ 1027 PetscInt imark = -1; 1028 if (v) { 1029 *v = v_p = mat->rowvalues; 1030 for (i=0; i<nzB; i++) { 1031 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1032 else break; 1033 } 1034 imark = i; 1035 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1036 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1037 } 1038 if (idx) { 1039 *idx = idx_p = mat->rowindices; 1040 if (imark > -1) { 1041 for (i=0; i<imark; i++) { 1042 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1043 } 1044 } else { 1045 for (i=0; i<nzB; i++) { 1046 if (cmap[cworkB[i]/bs] < cstart) 1047 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1048 else break; 1049 } 1050 imark = i; 1051 } 1052 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1053 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1054 } 1055 } else { 1056 if (idx) *idx = 0; 1057 if (v) *v = 0; 1058 } 1059 } 1060 *nz = nztot; 1061 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1062 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1068 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1069 { 1070 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1071 1072 PetscFunctionBegin; 1073 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1074 baij->getrowactive = PETSC_FALSE; 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1080 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1081 { 1082 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1083 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1084 1085 PetscFunctionBegin; 1086 aA->getrow_utriangular = PETSC_TRUE; 1087 PetscFunctionReturn(0); 1088 } 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1091 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1092 { 1093 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1094 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1095 1096 PetscFunctionBegin; 1097 aA->getrow_utriangular = PETSC_FALSE; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1103 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1104 { 1105 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1110 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1116 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1117 { 1118 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1123 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1129 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1130 { 1131 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1136 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1142 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1143 { 1144 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1145 Mat A = a->A,B = a->B; 1146 PetscErrorCode ierr; 1147 PetscReal isend[5],irecv[5]; 1148 1149 PetscFunctionBegin; 1150 info->block_size = (PetscReal)matin->rmap->bs; 1151 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1152 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1153 isend[3] = info->memory; isend[4] = info->mallocs; 1154 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1155 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1156 isend[3] += info->memory; isend[4] += info->mallocs; 1157 if (flag == MAT_LOCAL) { 1158 info->nz_used = isend[0]; 1159 info->nz_allocated = isend[1]; 1160 info->nz_unneeded = isend[2]; 1161 info->memory = isend[3]; 1162 info->mallocs = isend[4]; 1163 } else if (flag == MAT_GLOBAL_MAX) { 1164 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1165 info->nz_used = irecv[0]; 1166 info->nz_allocated = irecv[1]; 1167 info->nz_unneeded = irecv[2]; 1168 info->memory = irecv[3]; 1169 info->mallocs = irecv[4]; 1170 } else if (flag == MAT_GLOBAL_SUM) { 1171 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1172 info->nz_used = irecv[0]; 1173 info->nz_allocated = irecv[1]; 1174 info->nz_unneeded = irecv[2]; 1175 info->memory = irecv[3]; 1176 info->mallocs = irecv[4]; 1177 } else { 1178 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1179 } 1180 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1181 info->fill_ratio_needed = 0; 1182 info->factor_mallocs = 0; 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1189 { 1190 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1191 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 switch (op) { 1196 case MAT_NEW_NONZERO_LOCATIONS: 1197 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1198 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1199 case MAT_KEEP_NONZERO_PATTERN: 1200 case MAT_NEW_NONZERO_LOCATION_ERR: 1201 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1202 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1203 break; 1204 case MAT_ROW_ORIENTED: 1205 a->roworiented = flg; 1206 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1207 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1208 break; 1209 case MAT_NEW_DIAGONALS: 1210 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1211 break; 1212 case MAT_IGNORE_OFF_PROC_ENTRIES: 1213 a->donotstash = flg; 1214 break; 1215 case MAT_USE_HASH_TABLE: 1216 a->ht_flag = flg; 1217 break; 1218 case MAT_HERMITIAN: 1219 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1220 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1221 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1222 break; 1223 case MAT_SPD: 1224 A->spd_set = PETSC_TRUE; 1225 A->spd = flg; 1226 if (flg) { 1227 A->symmetric = PETSC_TRUE; 1228 A->structurally_symmetric = PETSC_TRUE; 1229 A->symmetric_set = PETSC_TRUE; 1230 A->structurally_symmetric_set = PETSC_TRUE; 1231 } 1232 break; 1233 case MAT_SYMMETRIC: 1234 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1235 break; 1236 case MAT_STRUCTURALLY_SYMMETRIC: 1237 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1238 break; 1239 case MAT_SYMMETRY_ETERNAL: 1240 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1241 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1242 break; 1243 case MAT_IGNORE_LOWER_TRIANGULAR: 1244 aA->ignore_ltriangular = flg; 1245 break; 1246 case MAT_ERROR_LOWER_TRIANGULAR: 1247 aA->ignore_ltriangular = flg; 1248 break; 1249 case MAT_GETROW_UPPERTRIANGULAR: 1250 aA->getrow_utriangular = flg; 1251 break; 1252 default: 1253 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1260 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1261 { 1262 PetscErrorCode ierr; 1263 PetscFunctionBegin; 1264 if (MAT_INITIAL_MATRIX || *B != A) { 1265 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1266 } 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1272 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1273 { 1274 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1275 Mat a=baij->A, b=baij->B; 1276 PetscErrorCode ierr; 1277 PetscInt nv,m,n; 1278 PetscBool flg; 1279 1280 PetscFunctionBegin; 1281 if (ll != rr){ 1282 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1283 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1284 } 1285 if (!ll) PetscFunctionReturn(0); 1286 1287 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1288 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1289 1290 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1291 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1292 1293 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1294 1295 /* left diagonalscale the off-diagonal part */ 1296 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1297 1298 /* scale the diagonal part */ 1299 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1300 1301 /* right diagonalscale the off-diagonal part */ 1302 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1303 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1309 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1310 { 1311 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatEqual_MPISBAIJ" 1323 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1324 { 1325 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1326 Mat a,b,c,d; 1327 PetscBool flg; 1328 PetscErrorCode ierr; 1329 1330 PetscFunctionBegin; 1331 a = matA->A; b = matA->B; 1332 c = matB->A; d = matB->B; 1333 1334 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1335 if (flg) { 1336 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1337 } 1338 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatCopy_MPISBAIJ" 1344 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1345 { 1346 PetscErrorCode ierr; 1347 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1348 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1349 1350 PetscFunctionBegin; 1351 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1352 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1353 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1354 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1355 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1356 } else { 1357 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1358 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1359 } 1360 PetscFunctionReturn(0); 1361 } 1362 1363 #undef __FUNCT__ 1364 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1365 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1366 { 1367 PetscErrorCode ierr; 1368 1369 PetscFunctionBegin; 1370 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1376 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1377 { 1378 PetscErrorCode ierr; 1379 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1380 PetscBLASInt bnz,one=1; 1381 Mat_SeqSBAIJ *xa,*ya; 1382 Mat_SeqBAIJ *xb,*yb; 1383 1384 PetscFunctionBegin; 1385 if (str == SAME_NONZERO_PATTERN) { 1386 PetscScalar alpha = a; 1387 xa = (Mat_SeqSBAIJ *)xx->A->data; 1388 ya = (Mat_SeqSBAIJ *)yy->A->data; 1389 bnz = PetscBLASIntCast(xa->nz); 1390 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1391 xb = (Mat_SeqBAIJ *)xx->B->data; 1392 yb = (Mat_SeqBAIJ *)yy->B->data; 1393 bnz = PetscBLASIntCast(xb->nz); 1394 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1395 } else { 1396 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1397 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1398 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1405 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1406 { 1407 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1408 PetscInt rbs,cbs; 1409 PetscErrorCode ierr; 1410 1411 PetscFunctionBegin; 1412 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1413 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1414 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1415 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1416 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1417 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1423 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1424 { 1425 PetscErrorCode ierr; 1426 PetscInt i; 1427 PetscBool flg; 1428 1429 PetscFunctionBegin; 1430 for (i=0; i<n; i++) { 1431 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1432 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1433 } 1434 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 1439 /* -------------------------------------------------------------------*/ 1440 static struct _MatOps MatOps_Values = { 1441 MatSetValues_MPISBAIJ, 1442 MatGetRow_MPISBAIJ, 1443 MatRestoreRow_MPISBAIJ, 1444 MatMult_MPISBAIJ, 1445 /* 4*/ MatMultAdd_MPISBAIJ, 1446 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1447 MatMultAdd_MPISBAIJ, 1448 0, 1449 0, 1450 0, 1451 /*10*/ 0, 1452 0, 1453 0, 1454 MatSOR_MPISBAIJ, 1455 MatTranspose_MPISBAIJ, 1456 /*15*/ MatGetInfo_MPISBAIJ, 1457 MatEqual_MPISBAIJ, 1458 MatGetDiagonal_MPISBAIJ, 1459 MatDiagonalScale_MPISBAIJ, 1460 MatNorm_MPISBAIJ, 1461 /*20*/ MatAssemblyBegin_MPISBAIJ, 1462 MatAssemblyEnd_MPISBAIJ, 1463 MatSetOption_MPISBAIJ, 1464 MatZeroEntries_MPISBAIJ, 1465 /*24*/ 0, 1466 0, 1467 0, 1468 0, 1469 0, 1470 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1471 0, 1472 0, 1473 0, 1474 0, 1475 /*34*/ MatDuplicate_MPISBAIJ, 1476 0, 1477 0, 1478 0, 1479 0, 1480 /*39*/ MatAXPY_MPISBAIJ, 1481 MatGetSubMatrices_MPISBAIJ, 1482 MatIncreaseOverlap_MPISBAIJ, 1483 MatGetValues_MPISBAIJ, 1484 MatCopy_MPISBAIJ, 1485 /*44*/ 0, 1486 MatScale_MPISBAIJ, 1487 0, 1488 0, 1489 0, 1490 /*49*/ MatSetBlockSize_MPISBAIJ, 1491 0, 1492 0, 1493 0, 1494 0, 1495 /*54*/ 0, 1496 0, 1497 MatSetUnfactored_MPISBAIJ, 1498 0, 1499 MatSetValuesBlocked_MPISBAIJ, 1500 /*59*/ 0, 1501 0, 1502 0, 1503 0, 1504 0, 1505 /*64*/ 0, 1506 0, 1507 0, 1508 0, 1509 0, 1510 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1511 0, 1512 0, 1513 0, 1514 0, 1515 /*74*/ 0, 1516 0, 1517 0, 1518 0, 1519 0, 1520 /*79*/ 0, 1521 0, 1522 0, 1523 0, 1524 MatLoad_MPISBAIJ, 1525 /*84*/ 0, 1526 0, 1527 0, 1528 0, 1529 0, 1530 /*89*/ 0, 1531 0, 1532 0, 1533 0, 1534 0, 1535 /*94*/ 0, 1536 0, 1537 0, 1538 0, 1539 0, 1540 /*99*/ 0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 /*104*/0, 1546 MatRealPart_MPISBAIJ, 1547 MatImaginaryPart_MPISBAIJ, 1548 MatGetRowUpperTriangular_MPISBAIJ, 1549 MatRestoreRowUpperTriangular_MPISBAIJ, 1550 /*109*/0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 /*114*/0, 1556 0, 1557 0, 1558 0, 1559 0, 1560 /*119*/0, 1561 0, 1562 0, 1563 0 1564 }; 1565 1566 1567 EXTERN_C_BEGIN 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1570 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1571 { 1572 PetscFunctionBegin; 1573 *a = ((Mat_MPISBAIJ *)A->data)->A; 1574 PetscFunctionReturn(0); 1575 } 1576 EXTERN_C_END 1577 1578 EXTERN_C_BEGIN 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1581 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1582 { 1583 Mat_MPISBAIJ *b; 1584 PetscErrorCode ierr; 1585 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1586 1587 PetscFunctionBegin; 1588 if (bs < 0){ 1589 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1590 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1591 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1592 bs = PetscAbs(bs); 1593 } 1594 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1595 bs = newbs; 1596 1597 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1598 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1599 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1600 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1601 1602 B->rmap->bs = B->cmap->bs = bs; 1603 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1604 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1605 1606 if (d_nnz) { 1607 for (i=0; i<B->rmap->n/bs; i++) { 1608 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1609 } 1610 } 1611 if (o_nnz) { 1612 for (i=0; i<B->rmap->n/bs; i++) { 1613 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1614 } 1615 } 1616 1617 b = (Mat_MPISBAIJ*)B->data; 1618 mbs = B->rmap->n/bs; 1619 Mbs = B->rmap->N/bs; 1620 if (mbs*bs != B->rmap->n) { 1621 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1622 } 1623 1624 B->rmap->bs = bs; 1625 b->bs2 = bs*bs; 1626 b->mbs = mbs; 1627 b->nbs = mbs; 1628 b->Mbs = Mbs; 1629 b->Nbs = Mbs; 1630 1631 for (i=0; i<=b->size; i++) { 1632 b->rangebs[i] = B->rmap->range[i]/bs; 1633 } 1634 b->rstartbs = B->rmap->rstart/bs; 1635 b->rendbs = B->rmap->rend/bs; 1636 1637 b->cstartbs = B->cmap->rstart/bs; 1638 b->cendbs = B->cmap->rend/bs; 1639 1640 if (!B->preallocated) { 1641 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1642 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1643 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1644 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1645 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1646 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1647 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1648 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1649 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1650 } 1651 1652 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1653 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1654 B->preallocated = PETSC_TRUE; 1655 PetscFunctionReturn(0); 1656 } 1657 EXTERN_C_END 1658 1659 EXTERN_C_BEGIN 1660 #undef __FUNCT__ 1661 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1662 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1663 { 1664 PetscInt m,rstart,cstart,cend; 1665 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1666 const PetscInt *JJ=0; 1667 PetscScalar *values=0; 1668 PetscErrorCode ierr; 1669 1670 PetscFunctionBegin; 1671 1672 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1673 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1674 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1675 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1676 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1677 m = B->rmap->n/bs; 1678 rstart = B->rmap->rstart/bs; 1679 cstart = B->cmap->rstart/bs; 1680 cend = B->cmap->rend/bs; 1681 1682 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1683 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 1684 for (i=0; i<m; i++) { 1685 nz = ii[i+1] - ii[i]; 1686 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1687 nz_max = PetscMax(nz_max,nz); 1688 JJ = jj + ii[i]; 1689 for (j=0; j<nz; j++) { 1690 if (*JJ >= cstart) break; 1691 JJ++; 1692 } 1693 d = 0; 1694 for (; j<nz; j++) { 1695 if (*JJ++ >= cend) break; 1696 d++; 1697 } 1698 d_nnz[i] = d; 1699 o_nnz[i] = nz - d; 1700 } 1701 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1702 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1703 1704 values = (PetscScalar*)V; 1705 if (!values) { 1706 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 1707 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1708 } 1709 for (i=0; i<m; i++) { 1710 PetscInt row = i + rstart; 1711 PetscInt ncols = ii[i+1] - ii[i]; 1712 const PetscInt *icols = jj + ii[i]; 1713 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1714 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1715 } 1716 1717 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1718 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1719 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1720 1721 PetscFunctionReturn(0); 1722 } 1723 EXTERN_C_END 1724 1725 EXTERN_C_BEGIN 1726 #if defined(PETSC_HAVE_MUMPS) 1727 extern PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1728 #endif 1729 #if defined(PETSC_HAVE_SPOOLES) 1730 extern PetscErrorCode MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1731 #endif 1732 #if defined(PETSC_HAVE_PASTIX) 1733 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1734 #endif 1735 EXTERN_C_END 1736 1737 /*MC 1738 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1739 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1740 the matrix is stored. 1741 1742 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1743 can call MatSetOption(Mat, MAT_HERMITIAN); 1744 1745 Options Database Keys: 1746 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1747 1748 Level: beginner 1749 1750 .seealso: MatCreateMPISBAIJ 1751 M*/ 1752 1753 EXTERN_C_BEGIN 1754 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,const MatType,MatReuse,Mat*); 1755 EXTERN_C_END 1756 1757 EXTERN_C_BEGIN 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatCreate_MPISBAIJ" 1760 PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1761 { 1762 Mat_MPISBAIJ *b; 1763 PetscErrorCode ierr; 1764 PetscBool flg; 1765 1766 PetscFunctionBegin; 1767 1768 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1769 B->data = (void*)b; 1770 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1771 1772 B->ops->destroy = MatDestroy_MPISBAIJ; 1773 B->ops->view = MatView_MPISBAIJ; 1774 B->assembled = PETSC_FALSE; 1775 1776 B->insertmode = NOT_SET_VALUES; 1777 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1778 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1779 1780 /* build local table of row and column ownerships */ 1781 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1782 1783 /* build cache for off array entries formed */ 1784 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1785 b->donotstash = PETSC_FALSE; 1786 b->colmap = PETSC_NULL; 1787 b->garray = PETSC_NULL; 1788 b->roworiented = PETSC_TRUE; 1789 1790 /* stuff used in block assembly */ 1791 b->barray = 0; 1792 1793 /* stuff used for matrix vector multiply */ 1794 b->lvec = 0; 1795 b->Mvctx = 0; 1796 b->slvec0 = 0; 1797 b->slvec0b = 0; 1798 b->slvec1 = 0; 1799 b->slvec1a = 0; 1800 b->slvec1b = 0; 1801 b->sMvctx = 0; 1802 1803 /* stuff for MatGetRow() */ 1804 b->rowindices = 0; 1805 b->rowvalues = 0; 1806 b->getrowactive = PETSC_FALSE; 1807 1808 /* hash table stuff */ 1809 b->ht = 0; 1810 b->hd = 0; 1811 b->ht_size = 0; 1812 b->ht_flag = PETSC_FALSE; 1813 b->ht_fact = 0; 1814 b->ht_total_ct = 0; 1815 b->ht_insert_ct = 0; 1816 1817 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1818 b->ijonly = PETSC_FALSE; 1819 1820 b->in_loc = 0; 1821 b->v_loc = 0; 1822 b->n_loc = 0; 1823 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1824 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1825 if (flg) { 1826 PetscReal fact = 1.39; 1827 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1828 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1829 if (fact <= 1.0) fact = 1.39; 1830 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1831 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1832 } 1833 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1834 1835 #if defined(PETSC_HAVE_PASTIX) 1836 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1837 "MatGetFactor_mpisbaij_pastix", 1838 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1839 #endif 1840 #if defined(PETSC_HAVE_MUMPS) 1841 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1842 "MatGetFactor_sbaij_mumps", 1843 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1844 #endif 1845 #if defined(PETSC_HAVE_SPOOLES) 1846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1847 "MatGetFactor_mpisbaij_spooles", 1848 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1849 #endif 1850 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1851 "MatStoreValues_MPISBAIJ", 1852 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1853 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1854 "MatRetrieveValues_MPISBAIJ", 1855 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1856 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1857 "MatGetDiagonalBlock_MPISBAIJ", 1858 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1859 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1860 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1861 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1862 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C", 1863 "MatMPISBAIJSetPreallocationCSR_MPISBAIJ", 1864 MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1865 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C", 1866 "MatConvert_MPISBAIJ_MPISBSTRM", 1867 MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1868 1869 B->symmetric = PETSC_TRUE; 1870 B->structurally_symmetric = PETSC_TRUE; 1871 B->symmetric_set = PETSC_TRUE; 1872 B->structurally_symmetric_set = PETSC_TRUE; 1873 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1874 PetscFunctionReturn(0); 1875 } 1876 EXTERN_C_END 1877 1878 /*MC 1879 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1880 1881 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1882 and MATMPISBAIJ otherwise. 1883 1884 Options Database Keys: 1885 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1886 1887 Level: beginner 1888 1889 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1890 M*/ 1891 1892 EXTERN_C_BEGIN 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatCreate_SBAIJ" 1895 PetscErrorCode MatCreate_SBAIJ(Mat A) 1896 { 1897 PetscErrorCode ierr; 1898 PetscMPIInt size; 1899 1900 PetscFunctionBegin; 1901 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1902 if (size == 1) { 1903 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1904 } else { 1905 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1906 } 1907 PetscFunctionReturn(0); 1908 } 1909 EXTERN_C_END 1910 1911 #undef __FUNCT__ 1912 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1913 /*@C 1914 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1915 the user should preallocate the matrix storage by setting the parameters 1916 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1917 performance can be increased by more than a factor of 50. 1918 1919 Collective on Mat 1920 1921 Input Parameters: 1922 + A - the matrix 1923 . bs - size of blockk 1924 . d_nz - number of block nonzeros per block row in diagonal portion of local 1925 submatrix (same for all local rows) 1926 . d_nnz - array containing the number of block nonzeros in the various block rows 1927 in the upper triangular and diagonal part of the in diagonal portion of the local 1928 (possibly different for each block row) or PETSC_NULL. You must leave room 1929 for the diagonal entry even if it is zero. 1930 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1931 submatrix (same for all local rows). 1932 - o_nnz - array containing the number of nonzeros in the various block rows of the 1933 off-diagonal portion of the local submatrix (possibly different for 1934 each block row) or PETSC_NULL. 1935 1936 1937 Options Database Keys: 1938 . -mat_no_unroll - uses code that does not unroll the loops in the 1939 block calculations (much slower) 1940 . -mat_block_size - size of the blocks to use 1941 1942 Notes: 1943 1944 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1945 than it must be used on all processors that share the object for that argument. 1946 1947 If the *_nnz parameter is given then the *_nz parameter is ignored 1948 1949 Storage Information: 1950 For a square global matrix we define each processor's diagonal portion 1951 to be its local rows and the corresponding columns (a square submatrix); 1952 each processor's off-diagonal portion encompasses the remainder of the 1953 local matrix (a rectangular submatrix). 1954 1955 The user can specify preallocated storage for the diagonal part of 1956 the local submatrix with either d_nz or d_nnz (not both). Set 1957 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1958 memory allocation. Likewise, specify preallocated storage for the 1959 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1960 1961 You can call MatGetInfo() to get information on how effective the preallocation was; 1962 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1963 You can also run with the option -info and look for messages with the string 1964 malloc in them to see if additional memory allocation was needed. 1965 1966 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1967 the figure below we depict these three local rows and all columns (0-11). 1968 1969 .vb 1970 0 1 2 3 4 5 6 7 8 9 10 11 1971 ------------------- 1972 row 3 | o o o d d d o o o o o o 1973 row 4 | o o o d d d o o o o o o 1974 row 5 | o o o d d d o o o o o o 1975 ------------------- 1976 .ve 1977 1978 Thus, any entries in the d locations are stored in the d (diagonal) 1979 submatrix, and any entries in the o locations are stored in the 1980 o (off-diagonal) submatrix. Note that the d matrix is stored in 1981 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1982 1983 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1984 plus the diagonal part of the d matrix, 1985 and o_nz should indicate the number of block nonzeros per row in the upper triangular 1986 part of the o matrix. 1987 In general, for PDE problems in which most nonzeros are near the diagonal, 1988 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1989 or you will get TERRIBLE performance; see the users' manual chapter on 1990 matrices. 1991 1992 Level: intermediate 1993 1994 .keywords: matrix, block, aij, compressed row, sparse, parallel 1995 1996 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1997 @*/ 1998 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1999 { 2000 PetscErrorCode ierr; 2001 2002 PetscFunctionBegin; 2003 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2004 PetscFunctionReturn(0); 2005 } 2006 2007 #undef __FUNCT__ 2008 #define __FUNCT__ "MatCreateMPISBAIJ" 2009 /*@C 2010 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2011 (block compressed row). For good matrix assembly performance 2012 the user should preallocate the matrix storage by setting the parameters 2013 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2014 performance can be increased by more than a factor of 50. 2015 2016 Collective on MPI_Comm 2017 2018 Input Parameters: 2019 + comm - MPI communicator 2020 . bs - size of blockk 2021 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2022 This value should be the same as the local size used in creating the 2023 y vector for the matrix-vector product y = Ax. 2024 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2025 This value should be the same as the local size used in creating the 2026 x vector for the matrix-vector product y = Ax. 2027 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2028 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2029 . d_nz - number of block nonzeros per block row in diagonal portion of local 2030 submatrix (same for all local rows) 2031 . d_nnz - array containing the number of block nonzeros in the various block rows 2032 in the upper triangular portion of the in diagonal portion of the local 2033 (possibly different for each block block row) or PETSC_NULL. 2034 You must leave room for the diagonal entry even if it is zero. 2035 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2036 submatrix (same for all local rows). 2037 - o_nnz - array containing the number of nonzeros in the various block rows of the 2038 off-diagonal portion of the local submatrix (possibly different for 2039 each block row) or PETSC_NULL. 2040 2041 Output Parameter: 2042 . A - the matrix 2043 2044 Options Database Keys: 2045 . -mat_no_unroll - uses code that does not unroll the loops in the 2046 block calculations (much slower) 2047 . -mat_block_size - size of the blocks to use 2048 . -mat_mpi - use the parallel matrix data structures even on one processor 2049 (defaults to using SeqBAIJ format on one processor) 2050 2051 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2052 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2053 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2054 2055 Notes: 2056 The number of rows and columns must be divisible by blocksize. 2057 This matrix type does not support complex Hermitian operation. 2058 2059 The user MUST specify either the local or global matrix dimensions 2060 (possibly both). 2061 2062 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2063 than it must be used on all processors that share the object for that argument. 2064 2065 If the *_nnz parameter is given then the *_nz parameter is ignored 2066 2067 Storage Information: 2068 For a square global matrix we define each processor's diagonal portion 2069 to be its local rows and the corresponding columns (a square submatrix); 2070 each processor's off-diagonal portion encompasses the remainder of the 2071 local matrix (a rectangular submatrix). 2072 2073 The user can specify preallocated storage for the diagonal part of 2074 the local submatrix with either d_nz or d_nnz (not both). Set 2075 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2076 memory allocation. Likewise, specify preallocated storage for the 2077 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2078 2079 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2080 the figure below we depict these three local rows and all columns (0-11). 2081 2082 .vb 2083 0 1 2 3 4 5 6 7 8 9 10 11 2084 ------------------- 2085 row 3 | o o o d d d o o o o o o 2086 row 4 | o o o d d d o o o o o o 2087 row 5 | o o o d d d o o o o o o 2088 ------------------- 2089 .ve 2090 2091 Thus, any entries in the d locations are stored in the d (diagonal) 2092 submatrix, and any entries in the o locations are stored in the 2093 o (off-diagonal) submatrix. Note that the d matrix is stored in 2094 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2095 2096 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2097 plus the diagonal part of the d matrix, 2098 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2099 In general, for PDE problems in which most nonzeros are near the diagonal, 2100 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2101 or you will get TERRIBLE performance; see the users' manual chapter on 2102 matrices. 2103 2104 Level: intermediate 2105 2106 .keywords: matrix, block, aij, compressed row, sparse, parallel 2107 2108 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2109 @*/ 2110 2111 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2112 { 2113 PetscErrorCode ierr; 2114 PetscMPIInt size; 2115 2116 PetscFunctionBegin; 2117 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2118 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2119 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2120 if (size > 1) { 2121 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2122 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2123 } else { 2124 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2125 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2126 } 2127 PetscFunctionReturn(0); 2128 } 2129 2130 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2133 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2134 { 2135 Mat mat; 2136 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2137 PetscErrorCode ierr; 2138 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2139 PetscScalar *array; 2140 2141 PetscFunctionBegin; 2142 *newmat = 0; 2143 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2144 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2145 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2146 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2147 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2148 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2149 2150 mat->factortype = matin->factortype; 2151 mat->preallocated = PETSC_TRUE; 2152 mat->assembled = PETSC_TRUE; 2153 mat->insertmode = NOT_SET_VALUES; 2154 2155 a = (Mat_MPISBAIJ*)mat->data; 2156 a->bs2 = oldmat->bs2; 2157 a->mbs = oldmat->mbs; 2158 a->nbs = oldmat->nbs; 2159 a->Mbs = oldmat->Mbs; 2160 a->Nbs = oldmat->Nbs; 2161 2162 2163 a->size = oldmat->size; 2164 a->rank = oldmat->rank; 2165 a->donotstash = oldmat->donotstash; 2166 a->roworiented = oldmat->roworiented; 2167 a->rowindices = 0; 2168 a->rowvalues = 0; 2169 a->getrowactive = PETSC_FALSE; 2170 a->barray = 0; 2171 a->rstartbs = oldmat->rstartbs; 2172 a->rendbs = oldmat->rendbs; 2173 a->cstartbs = oldmat->cstartbs; 2174 a->cendbs = oldmat->cendbs; 2175 2176 /* hash table stuff */ 2177 a->ht = 0; 2178 a->hd = 0; 2179 a->ht_size = 0; 2180 a->ht_flag = oldmat->ht_flag; 2181 a->ht_fact = oldmat->ht_fact; 2182 a->ht_total_ct = 0; 2183 a->ht_insert_ct = 0; 2184 2185 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2186 if (oldmat->colmap) { 2187 #if defined (PETSC_USE_CTABLE) 2188 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2189 #else 2190 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2191 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2192 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2193 #endif 2194 } else a->colmap = 0; 2195 2196 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2197 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2198 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2199 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2200 } else a->garray = 0; 2201 2202 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2203 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2204 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2205 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2206 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2207 2208 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2209 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2210 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2211 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2212 2213 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2214 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2215 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2216 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2217 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2218 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2219 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2220 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2221 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2222 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2223 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2224 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2225 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2226 2227 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2228 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2229 a->sMvctx = oldmat->sMvctx; 2230 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2231 2232 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2233 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2234 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2235 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2236 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2237 *newmat = mat; 2238 PetscFunctionReturn(0); 2239 } 2240 2241 #undef __FUNCT__ 2242 #define __FUNCT__ "MatLoad_MPISBAIJ" 2243 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2244 { 2245 PetscErrorCode ierr; 2246 PetscInt i,nz,j,rstart,rend; 2247 PetscScalar *vals,*buf; 2248 MPI_Comm comm = ((PetscObject)viewer)->comm; 2249 MPI_Status status; 2250 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2251 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2252 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2253 PetscInt bs=1,Mbs,mbs,extra_rows; 2254 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2255 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2256 int fd; 2257 2258 PetscFunctionBegin; 2259 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2260 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2261 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2262 2263 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2264 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2265 if (!rank) { 2266 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2267 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2268 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2269 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2270 } 2271 2272 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2273 2274 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2275 M = header[1]; N = header[2]; 2276 2277 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2278 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2279 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2280 2281 /* If global sizes are set, check if they are consistent with that given in the file */ 2282 if (sizesset) { 2283 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2284 } 2285 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2286 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2287 2288 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2289 2290 /* 2291 This code adds extra rows to make sure the number of rows is 2292 divisible by the blocksize 2293 */ 2294 Mbs = M/bs; 2295 extra_rows = bs - M + bs*(Mbs); 2296 if (extra_rows == bs) extra_rows = 0; 2297 else Mbs++; 2298 if (extra_rows &&!rank) { 2299 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2300 } 2301 2302 /* determine ownership of all rows */ 2303 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2304 mbs = Mbs/size + ((Mbs % size) > rank); 2305 m = mbs*bs; 2306 } else { /* User Set */ 2307 m = newmat->rmap->n; 2308 mbs = m/bs; 2309 } 2310 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2311 mmbs = PetscMPIIntCast(mbs); 2312 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2313 rowners[0] = 0; 2314 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2315 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2316 rstart = rowners[rank]; 2317 rend = rowners[rank+1]; 2318 2319 /* distribute row lengths to all processors */ 2320 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2321 if (!rank) { 2322 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2323 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2324 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2325 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2326 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2327 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2328 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2329 } else { 2330 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2331 } 2332 2333 if (!rank) { /* procs[0] */ 2334 /* calculate the number of nonzeros on each processor */ 2335 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2336 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2337 for (i=0; i<size; i++) { 2338 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2339 procsnz[i] += rowlengths[j]; 2340 } 2341 } 2342 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2343 2344 /* determine max buffer needed and allocate it */ 2345 maxnz = 0; 2346 for (i=0; i<size; i++) { 2347 maxnz = PetscMax(maxnz,procsnz[i]); 2348 } 2349 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2350 2351 /* read in my part of the matrix column indices */ 2352 nz = procsnz[0]; 2353 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2354 mycols = ibuf; 2355 if (size == 1) nz -= extra_rows; 2356 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2357 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2358 2359 /* read in every ones (except the last) and ship off */ 2360 for (i=1; i<size-1; i++) { 2361 nz = procsnz[i]; 2362 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2363 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2364 } 2365 /* read in the stuff for the last proc */ 2366 if (size != 1) { 2367 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2368 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2369 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2370 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2371 } 2372 ierr = PetscFree(cols);CHKERRQ(ierr); 2373 } else { /* procs[i], i>0 */ 2374 /* determine buffer space needed for message */ 2375 nz = 0; 2376 for (i=0; i<m; i++) { 2377 nz += locrowlens[i]; 2378 } 2379 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2380 mycols = ibuf; 2381 /* receive message of column indices*/ 2382 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2383 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2384 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2385 } 2386 2387 /* loop over local rows, determining number of off diagonal entries */ 2388 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2389 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2390 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2391 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2392 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2393 rowcount = 0; 2394 nzcount = 0; 2395 for (i=0; i<mbs; i++) { 2396 dcount = 0; 2397 odcount = 0; 2398 for (j=0; j<bs; j++) { 2399 kmax = locrowlens[rowcount]; 2400 for (k=0; k<kmax; k++) { 2401 tmp = mycols[nzcount++]/bs; /* block col. index */ 2402 if (!mask[tmp]) { 2403 mask[tmp] = 1; 2404 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2405 else masked1[dcount++] = tmp; /* entry in diag portion */ 2406 } 2407 } 2408 rowcount++; 2409 } 2410 2411 dlens[i] = dcount; /* d_nzz[i] */ 2412 odlens[i] = odcount; /* o_nzz[i] */ 2413 2414 /* zero out the mask elements we set */ 2415 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2416 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2417 } 2418 if (!sizesset) { 2419 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2420 } 2421 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2422 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2423 2424 if (!rank) { 2425 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2426 /* read in my part of the matrix numerical values */ 2427 nz = procsnz[0]; 2428 vals = buf; 2429 mycols = ibuf; 2430 if (size == 1) nz -= extra_rows; 2431 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2432 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2433 2434 /* insert into matrix */ 2435 jj = rstart*bs; 2436 for (i=0; i<m; i++) { 2437 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2438 mycols += locrowlens[i]; 2439 vals += locrowlens[i]; 2440 jj++; 2441 } 2442 2443 /* read in other processors (except the last one) and ship out */ 2444 for (i=1; i<size-1; i++) { 2445 nz = procsnz[i]; 2446 vals = buf; 2447 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2448 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2449 } 2450 /* the last proc */ 2451 if (size != 1){ 2452 nz = procsnz[i] - extra_rows; 2453 vals = buf; 2454 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2455 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2456 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2457 } 2458 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2459 2460 } else { 2461 /* receive numeric values */ 2462 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2463 2464 /* receive message of values*/ 2465 vals = buf; 2466 mycols = ibuf; 2467 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2468 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2469 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2470 2471 /* insert into matrix */ 2472 jj = rstart*bs; 2473 for (i=0; i<m; i++) { 2474 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2475 mycols += locrowlens[i]; 2476 vals += locrowlens[i]; 2477 jj++; 2478 } 2479 } 2480 2481 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2482 ierr = PetscFree(buf);CHKERRQ(ierr); 2483 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2484 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2485 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2486 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2487 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2488 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNCT__ 2493 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2494 /*XXXXX@ 2495 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2496 2497 Input Parameters: 2498 . mat - the matrix 2499 . fact - factor 2500 2501 Not Collective on Mat, each process can have a different hash factor 2502 2503 Level: advanced 2504 2505 Notes: 2506 This can also be set by the command line option: -mat_use_hash_table fact 2507 2508 .keywords: matrix, hashtable, factor, HT 2509 2510 .seealso: MatSetOption() 2511 @XXXXX*/ 2512 2513 2514 #undef __FUNCT__ 2515 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2516 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2517 { 2518 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2519 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2520 PetscReal atmp; 2521 PetscReal *work,*svalues,*rvalues; 2522 PetscErrorCode ierr; 2523 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2524 PetscMPIInt rank,size; 2525 PetscInt *rowners_bs,dest,count,source; 2526 PetscScalar *va; 2527 MatScalar *ba; 2528 MPI_Status stat; 2529 2530 PetscFunctionBegin; 2531 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2532 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2533 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2534 2535 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2536 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2537 2538 bs = A->rmap->bs; 2539 mbs = a->mbs; 2540 Mbs = a->Mbs; 2541 ba = b->a; 2542 bi = b->i; 2543 bj = b->j; 2544 2545 /* find ownerships */ 2546 rowners_bs = A->rmap->range; 2547 2548 /* each proc creates an array to be distributed */ 2549 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2550 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2551 2552 /* row_max for B */ 2553 if (rank != size-1){ 2554 for (i=0; i<mbs; i++) { 2555 ncols = bi[1] - bi[0]; bi++; 2556 brow = bs*i; 2557 for (j=0; j<ncols; j++){ 2558 bcol = bs*(*bj); 2559 for (kcol=0; kcol<bs; kcol++){ 2560 col = bcol + kcol; /* local col index */ 2561 col += rowners_bs[rank+1]; /* global col index */ 2562 for (krow=0; krow<bs; krow++){ 2563 atmp = PetscAbsScalar(*ba); ba++; 2564 row = brow + krow; /* local row index */ 2565 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2566 if (work[col] < atmp) work[col] = atmp; 2567 } 2568 } 2569 bj++; 2570 } 2571 } 2572 2573 /* send values to its owners */ 2574 for (dest=rank+1; dest<size; dest++){ 2575 svalues = work + rowners_bs[dest]; 2576 count = rowners_bs[dest+1]-rowners_bs[dest]; 2577 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2578 } 2579 } 2580 2581 /* receive values */ 2582 if (rank){ 2583 rvalues = work; 2584 count = rowners_bs[rank+1]-rowners_bs[rank]; 2585 for (source=0; source<rank; source++){ 2586 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2587 /* process values */ 2588 for (i=0; i<count; i++){ 2589 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2590 } 2591 } 2592 } 2593 2594 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2595 ierr = PetscFree(work);CHKERRQ(ierr); 2596 PetscFunctionReturn(0); 2597 } 2598 2599 #undef __FUNCT__ 2600 #define __FUNCT__ "MatSOR_MPISBAIJ" 2601 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2602 { 2603 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2604 PetscErrorCode ierr; 2605 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2606 PetscScalar *x,*ptr,*from; 2607 Vec bb1; 2608 const PetscScalar *b; 2609 2610 PetscFunctionBegin; 2611 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2612 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2613 2614 if (flag == SOR_APPLY_UPPER) { 2615 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2616 PetscFunctionReturn(0); 2617 } 2618 2619 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2620 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2621 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2622 its--; 2623 } 2624 2625 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2626 while (its--){ 2627 2628 /* lower triangular part: slvec0b = - B^T*xx */ 2629 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2630 2631 /* copy xx into slvec0a */ 2632 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2633 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2634 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2635 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2636 2637 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2638 2639 /* copy bb into slvec1a */ 2640 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2641 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2642 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2643 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2644 2645 /* set slvec1b = 0 */ 2646 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2647 2648 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2649 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2650 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2651 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2652 2653 /* upper triangular part: bb1 = bb1 - B*x */ 2654 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2655 2656 /* local diagonal sweep */ 2657 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2658 } 2659 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2660 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2661 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2662 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2663 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2664 } else if (flag & SOR_EISENSTAT) { 2665 Vec xx1; 2666 PetscBool hasop; 2667 const PetscScalar *diag; 2668 PetscScalar *sl,scale = (omega - 2.0)/omega; 2669 PetscInt i,n; 2670 2671 if (!mat->xx1) { 2672 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2673 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2674 } 2675 xx1 = mat->xx1; 2676 bb1 = mat->bb1; 2677 2678 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2679 2680 if (!mat->diag) { 2681 /* this is wrong for same matrix with new nonzero values */ 2682 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2683 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2684 } 2685 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2686 2687 if (hasop) { 2688 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2689 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2690 } else { 2691 /* 2692 These two lines are replaced by code that may be a bit faster for a good compiler 2693 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2694 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2695 */ 2696 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2697 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2698 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2699 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2700 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2701 if (omega == 1.0) { 2702 for (i=0; i<n; i++) { 2703 sl[i] = b[i] - diag[i]*x[i]; 2704 } 2705 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2706 } else { 2707 for (i=0; i<n; i++) { 2708 sl[i] = b[i] + scale*diag[i]*x[i]; 2709 } 2710 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2711 } 2712 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2713 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2714 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2715 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2716 } 2717 2718 /* multiply off-diagonal portion of matrix */ 2719 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2720 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2721 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2722 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2723 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2724 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2725 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2726 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2727 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2728 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2729 2730 /* local sweep */ 2731 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2732 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2733 } else { 2734 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2735 } 2736 PetscFunctionReturn(0); 2737 } 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2741 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2742 { 2743 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2744 PetscErrorCode ierr; 2745 Vec lvec1,bb1; 2746 2747 PetscFunctionBegin; 2748 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2749 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2750 2751 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2752 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2753 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2754 its--; 2755 } 2756 2757 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2758 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2759 while (its--){ 2760 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2761 2762 /* lower diagonal part: bb1 = bb - B^T*xx */ 2763 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2764 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2765 2766 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2767 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2768 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2769 2770 /* upper diagonal part: bb1 = bb1 - B*x */ 2771 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2772 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2773 2774 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2775 2776 /* diagonal sweep */ 2777 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2778 } 2779 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2780 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2781 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2782 PetscFunctionReturn(0); 2783 } 2784 2785 #undef __FUNCT__ 2786 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2787 /*@ 2788 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2789 CSR format the local rows. 2790 2791 Collective on MPI_Comm 2792 2793 Input Parameters: 2794 + comm - MPI communicator 2795 . bs - the block size, only a block size of 1 is supported 2796 . m - number of local rows (Cannot be PETSC_DECIDE) 2797 . n - This value should be the same as the local size used in creating the 2798 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2799 calculated if N is given) For square matrices n is almost always m. 2800 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2801 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2802 . i - row indices 2803 . j - column indices 2804 - a - matrix values 2805 2806 Output Parameter: 2807 . mat - the matrix 2808 2809 Level: intermediate 2810 2811 Notes: 2812 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2813 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2814 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2815 2816 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2817 2818 .keywords: matrix, aij, compressed row, sparse, parallel 2819 2820 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2821 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 2822 @*/ 2823 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2824 { 2825 PetscErrorCode ierr; 2826 2827 2828 PetscFunctionBegin; 2829 if (i[0]) { 2830 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2831 } 2832 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2833 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2834 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2835 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2836 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2837 PetscFunctionReturn(0); 2838 } 2839 2840 2841 #undef __FUNCT__ 2842 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2843 /*@C 2844 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2845 (the default parallel PETSc format). 2846 2847 Collective on MPI_Comm 2848 2849 Input Parameters: 2850 + A - the matrix 2851 . bs - the block size 2852 . i - the indices into j for the start of each local row (starts with zero) 2853 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2854 - v - optional values in the matrix 2855 2856 Level: developer 2857 2858 .keywords: matrix, aij, compressed row, sparse, parallel 2859 2860 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2861 @*/ 2862 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2863 { 2864 PetscErrorCode ierr; 2865 2866 PetscFunctionBegin; 2867 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2868 PetscFunctionReturn(0); 2869 } 2870 2871 2872