1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 EXTERN_C_BEGIN 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 26 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 27 { 28 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 29 PetscErrorCode ierr; 30 31 PetscFunctionBegin; 32 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 33 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN_C_BEGIN 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 EXTERN_C_END 52 53 54 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 55 { \ 56 \ 57 brow = row/bs; \ 58 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 59 rmax = aimax[brow]; nrow = ailen[brow]; \ 60 bcol = col/bs; \ 61 ridx = row % bs; cidx = col % bs; \ 62 low = 0; high = nrow; \ 63 while (high-low > 3) { \ 64 t = (low+high)/2; \ 65 if (rp[t] > bcol) high = t; \ 66 else low = t; \ 67 } \ 68 for (_i=low; _i<high; _i++) { \ 69 if (rp[_i] > bcol) break; \ 70 if (rp[_i] == bcol) { \ 71 bap = ap + bs2*_i + bs*cidx + ridx; \ 72 if (addv == ADD_VALUES) *bap += value; \ 73 else *bap = value; \ 74 goto a_noinsert; \ 75 } \ 76 } \ 77 if (a->nonew == 1) goto a_noinsert; \ 78 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 79 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 80 N = nrow++ - 1; \ 81 /* shift up all the later entries in this row */ \ 82 for (ii=N; ii>=_i; ii--) { \ 83 rp[ii+1] = rp[ii]; \ 84 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 85 } \ 86 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 87 rp[_i] = bcol; \ 88 ap[bs2*_i + bs*cidx + ridx] = value; \ 89 a_noinsert:; \ 90 ailen[brow] = nrow; \ 91 } 92 93 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 94 { \ 95 brow = row/bs; \ 96 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 97 rmax = bimax[brow]; nrow = bilen[brow]; \ 98 bcol = col/bs; \ 99 ridx = row % bs; cidx = col % bs; \ 100 low = 0; high = nrow; \ 101 while (high-low > 3) { \ 102 t = (low+high)/2; \ 103 if (rp[t] > bcol) high = t; \ 104 else low = t; \ 105 } \ 106 for (_i=low; _i<high; _i++) { \ 107 if (rp[_i] > bcol) break; \ 108 if (rp[_i] == bcol) { \ 109 bap = ap + bs2*_i + bs*cidx + ridx; \ 110 if (addv == ADD_VALUES) *bap += value; \ 111 else *bap = value; \ 112 goto b_noinsert; \ 113 } \ 114 } \ 115 if (b->nonew == 1) goto b_noinsert; \ 116 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 117 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 118 N = nrow++ - 1; \ 119 /* shift up all the later entries in this row */ \ 120 for (ii=N; ii>=_i; ii--) { \ 121 rp[ii+1] = rp[ii]; \ 122 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 } \ 124 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 125 rp[_i] = bcol; \ 126 ap[bs2*_i + bs*cidx + ridx] = value; \ 127 b_noinsert:; \ 128 bilen[brow] = nrow; \ 129 } 130 131 /* Only add/insert a(i,j) with i<=j (blocks). 132 Any a(i,j) with i>j input by user is ingored. 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetValues_MPISBAIJ" 136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 137 { 138 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 139 MatScalar value; 140 PetscBool roworiented = baij->roworiented; 141 PetscErrorCode ierr; 142 PetscInt i,j,row,col; 143 PetscInt rstart_orig=mat->rmap->rstart; 144 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 145 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 146 147 /* Some Variables required in the macro */ 148 Mat A = baij->A; 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 150 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 151 MatScalar *aa=a->a; 152 153 Mat B = baij->B; 154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 155 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 156 MatScalar *ba=b->a; 157 158 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 159 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 160 MatScalar *ap,*bap; 161 162 /* for stash */ 163 PetscInt n_loc, *in_loc = PETSC_NULL; 164 MatScalar *v_loc = PETSC_NULL; 165 166 PetscFunctionBegin; 167 if (v) PetscValidScalarPointer(v,6); 168 if (!baij->donotstash){ 169 if (n > baij->n_loc) { 170 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 171 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 172 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 174 baij->n_loc = n; 175 } 176 in_loc = baij->in_loc; 177 v_loc = baij->v_loc; 178 } 179 180 for (i=0; i<m; i++) { 181 if (im[i] < 0) continue; 182 #if defined(PETSC_USE_DEBUG) 183 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 184 #endif 185 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 186 row = im[i] - rstart_orig; /* local row index */ 187 for (j=0; j<n; j++) { 188 if (im[i]/bs > in[j]/bs){ 189 if (a->ignore_ltriangular){ 190 continue; /* ignore lower triangular blocks */ 191 } else { 192 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 193 } 194 } 195 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 196 col = in[j] - cstart_orig; /* local col index */ 197 brow = row/bs; bcol = col/bs; 198 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 199 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 200 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 201 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 202 } else if (in[j] < 0) continue; 203 #if defined(PETSC_USE_DEBUG) 204 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 205 #endif 206 else { /* off-diag entry (B) */ 207 if (mat->was_assembled) { 208 if (!baij->colmap) { 209 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 210 } 211 #if defined (PETSC_USE_CTABLE) 212 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 213 col = col - 1; 214 #else 215 col = baij->colmap[in[j]/bs] - 1; 216 #endif 217 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 218 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 219 col = in[j]; 220 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 221 B = baij->B; 222 b = (Mat_SeqBAIJ*)(B)->data; 223 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 224 ba=b->a; 225 } else col += in[j]%bs; 226 } else col = in[j]; 227 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 228 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } 231 } 232 } else { /* off processor entry */ 233 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 234 if (!baij->donotstash) { 235 n_loc = 0; 236 for (j=0; j<n; j++){ 237 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 238 in_loc[n_loc] = in[j]; 239 if (roworiented) { 240 v_loc[n_loc] = v[i*n+j]; 241 } else { 242 v_loc[n_loc] = v[j*m+i]; 243 } 244 n_loc++; 245 } 246 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 247 } 248 } 249 } 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 256 { 257 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 258 const MatScalar *value; 259 MatScalar *barray=baij->barray; 260 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 261 PetscErrorCode ierr; 262 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 263 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 264 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 265 266 PetscFunctionBegin; 267 if(!barray) { 268 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 269 baij->barray = barray; 270 } 271 272 if (roworiented) { 273 stepval = (n-1)*bs; 274 } else { 275 stepval = (m-1)*bs; 276 } 277 for (i=0; i<m; i++) { 278 if (im[i] < 0) continue; 279 #if defined(PETSC_USE_DEBUG) 280 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 281 #endif 282 if (im[i] >= rstart && im[i] < rend) { 283 row = im[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (im[i] > in[j]) { 286 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 287 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 288 } 289 /* If NumCol = 1 then a copy is not required */ 290 if ((roworiented) && (n == 1)) { 291 barray = (MatScalar*) v + i*bs2; 292 } else if((!roworiented) && (m == 1)) { 293 barray = (MatScalar*) v + j*bs2; 294 } else { /* Here a copy is required */ 295 if (roworiented) { 296 value = v + i*(stepval+bs)*bs + j*bs; 297 } else { 298 value = v + j*(stepval+bs)*bs + i*bs; 299 } 300 for (ii=0; ii<bs; ii++,value+=stepval) { 301 for (jj=0; jj<bs; jj++) { 302 *barray++ = *value++; 303 } 304 } 305 barray -=bs2; 306 } 307 308 if (in[j] >= cstart && in[j] < cend){ 309 col = in[j] - cstart; 310 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 311 } 312 else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined (PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined (PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } 343 else col = in[j]; 344 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 345 } 346 } 347 } else { 348 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 349 if (!baij->donotstash) { 350 if (roworiented) { 351 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 352 } else { 353 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 354 } 355 } 356 } 357 } 358 PetscFunctionReturn(0); 359 } 360 361 #undef __FUNCT__ 362 #define __FUNCT__ "MatGetValues_MPISBAIJ" 363 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 364 { 365 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 366 PetscErrorCode ierr; 367 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 368 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 369 370 PetscFunctionBegin; 371 for (i=0; i<m; i++) { 372 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 373 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 374 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 375 row = idxm[i] - bsrstart; 376 for (j=0; j<n; j++) { 377 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 378 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 379 if (idxn[j] >= bscstart && idxn[j] < bscend){ 380 col = idxn[j] - bscstart; 381 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 382 } else { 383 if (!baij->colmap) { 384 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 385 } 386 #if defined (PETSC_USE_CTABLE) 387 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 388 data --; 389 #else 390 data = baij->colmap[idxn[j]/bs]-1; 391 #endif 392 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 393 else { 394 col = data + idxn[j]%bs; 395 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 396 } 397 } 398 } 399 } else { 400 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatNorm_MPISBAIJ" 408 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 409 { 410 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscReal sum[2],*lnorm2; 413 414 PetscFunctionBegin; 415 if (baij->size == 1) { 416 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 417 } else { 418 if (type == NORM_FROBENIUS) { 419 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 420 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 421 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 422 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 423 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 424 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 425 *norm = sqrt(sum[0] + 2*sum[1]); 426 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 427 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 428 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 429 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 430 PetscReal *rsum,*rsum2,vabs; 431 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 432 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 433 MatScalar *v; 434 435 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 436 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 437 /* Amat */ 438 v = amat->a; jj = amat->j; 439 for (brow=0; brow<mbs; brow++) { 440 grow = bs*(rstart + brow); 441 nz = amat->i[brow+1] - amat->i[brow]; 442 for (bcol=0; bcol<nz; bcol++){ 443 gcol = bs*(rstart + *jj); jj++; 444 for (col=0; col<bs; col++){ 445 for (row=0; row<bs; row++){ 446 vabs = PetscAbsScalar(*v); v++; 447 rsum[gcol+col] += vabs; 448 /* non-diagonal block */ 449 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 450 } 451 } 452 } 453 } 454 /* Bmat */ 455 v = bmat->a; jj = bmat->j; 456 for (brow=0; brow<mbs; brow++) { 457 grow = bs*(rstart + brow); 458 nz = bmat->i[brow+1] - bmat->i[brow]; 459 for (bcol=0; bcol<nz; bcol++){ 460 gcol = bs*garray[*jj]; jj++; 461 for (col=0; col<bs; col++){ 462 for (row=0; row<bs; row++){ 463 vabs = PetscAbsScalar(*v); v++; 464 rsum[gcol+col] += vabs; 465 rsum[grow+row] += vabs; 466 } 467 } 468 } 469 } 470 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 471 *norm = 0.0; 472 for (col=0; col<mat->cmap->N; col++) { 473 if (rsum2[col] > *norm) *norm = rsum2[col]; 474 } 475 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 476 } else { 477 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 478 } 479 } 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 485 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 486 { 487 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 488 PetscErrorCode ierr; 489 PetscInt nstash,reallocs; 490 InsertMode addv; 491 492 PetscFunctionBegin; 493 if (baij->donotstash || mat->nooffprocentries) { 494 PetscFunctionReturn(0); 495 } 496 497 /* make sure all processors are either in INSERTMODE or ADDMODE */ 498 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 499 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 500 mat->insertmode = addv; /* in case this processor had no cache */ 501 502 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 503 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 504 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 505 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 506 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 507 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNCT__ 512 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 513 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 514 { 515 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 516 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 517 PetscErrorCode ierr; 518 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 519 PetscInt *row,*col; 520 PetscBool other_disassembled; 521 PetscMPIInt n; 522 PetscBool r1,r2,r3; 523 MatScalar *val; 524 InsertMode addv = mat->insertmode; 525 526 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 527 PetscFunctionBegin; 528 529 if (!baij->donotstash && !mat->nooffprocentries) { 530 while (1) { 531 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 532 if (!flg) break; 533 534 for (i=0; i<n;) { 535 /* Now identify the consecutive vals belonging to the same row */ 536 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 537 if (j < n) ncols = j-i; 538 else ncols = n-i; 539 /* Now assemble all these values with a single function call */ 540 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 541 i = j; 542 } 543 } 544 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 545 /* Now process the block-stash. Since the values are stashed column-oriented, 546 set the roworiented flag to column oriented, and after MatSetValues() 547 restore the original flags */ 548 r1 = baij->roworiented; 549 r2 = a->roworiented; 550 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 551 baij->roworiented = PETSC_FALSE; 552 a->roworiented = PETSC_FALSE; 553 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 554 while (1) { 555 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 556 if (!flg) break; 557 558 for (i=0; i<n;) { 559 /* Now identify the consecutive vals belonging to the same row */ 560 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 561 if (j < n) ncols = j-i; 562 else ncols = n-i; 563 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 564 i = j; 565 } 566 } 567 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 568 baij->roworiented = r1; 569 a->roworiented = r2; 570 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 571 } 572 573 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 575 576 /* determine if any processor has disassembled, if so we must 577 also disassemble ourselfs, in order that we may reassemble. */ 578 /* 579 if nonzero structure of submatrix B cannot change then we know that 580 no processor disassembled thus we can skip this stuff 581 */ 582 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 583 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 584 if (mat->was_assembled && !other_disassembled) { 585 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 586 } 587 } 588 589 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 590 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 591 } 592 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr); 593 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 594 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 595 596 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 597 baij->rowvalues = 0; 598 599 PetscFunctionReturn(0); 600 } 601 602 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 603 #undef __FUNCT__ 604 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 605 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 606 { 607 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 608 PetscErrorCode ierr; 609 PetscInt bs = mat->rmap->bs; 610 PetscMPIInt size = baij->size,rank = baij->rank; 611 PetscBool iascii,isdraw; 612 PetscViewer sviewer; 613 PetscViewerFormat format; 614 615 PetscFunctionBegin; 616 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 617 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 618 if (iascii) { 619 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 620 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 621 MatInfo info; 622 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 623 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 624 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 625 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 626 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 628 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 630 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 631 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 632 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 633 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 634 PetscFunctionReturn(0); 635 } else if (format == PETSC_VIEWER_ASCII_INFO) { 636 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 639 PetscFunctionReturn(0); 640 } 641 } 642 643 if (isdraw) { 644 PetscDraw draw; 645 PetscBool isnull; 646 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 647 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 648 } 649 650 if (size == 1) { 651 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 652 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 653 } else { 654 /* assemble the entire matrix onto first processor. */ 655 Mat A; 656 Mat_SeqSBAIJ *Aloc; 657 Mat_SeqBAIJ *Bloc; 658 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 659 MatScalar *a; 660 661 /* Should this be the same type as mat? */ 662 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 663 if (!rank) { 664 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 665 } else { 666 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 667 } 668 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 669 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 670 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 671 672 /* copy over the A part */ 673 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 674 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 675 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 676 677 for (i=0; i<mbs; i++) { 678 rvals[0] = bs*(baij->rstartbs + i); 679 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 680 for (j=ai[i]; j<ai[i+1]; j++) { 681 col = (baij->cstartbs+aj[j])*bs; 682 for (k=0; k<bs; k++) { 683 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 684 col++; a += bs; 685 } 686 } 687 } 688 /* copy over the B part */ 689 Bloc = (Mat_SeqBAIJ*)baij->B->data; 690 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 691 for (i=0; i<mbs; i++) { 692 693 rvals[0] = bs*(baij->rstartbs + i); 694 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 695 for (j=ai[i]; j<ai[i+1]; j++) { 696 col = baij->garray[aj[j]]*bs; 697 for (k=0; k<bs; k++) { 698 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 699 col++; a += bs; 700 } 701 } 702 } 703 ierr = PetscFree(rvals);CHKERRQ(ierr); 704 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 705 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 706 /* 707 Everyone has to call to draw the matrix since the graphics waits are 708 synchronized across all processors that share the PetscDraw object 709 */ 710 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 711 if (!rank) { 712 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 713 /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/ 714 PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ); 715 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 716 } 717 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 718 ierr = MatDestroy(A);CHKERRQ(ierr); 719 } 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatView_MPISBAIJ" 725 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 726 { 727 PetscErrorCode ierr; 728 PetscBool iascii,isdraw,issocket,isbinary; 729 730 PetscFunctionBegin; 731 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 732 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 733 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 734 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 735 if (iascii || isdraw || issocket || isbinary) { 736 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 737 } else { 738 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatDestroy_MPISBAIJ" 745 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 746 { 747 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 #if defined(PETSC_USE_LOG) 752 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 753 #endif 754 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 755 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 756 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 757 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 758 #if defined (PETSC_USE_CTABLE) 759 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 760 #else 761 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 762 #endif 763 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 764 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 765 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 766 if (baij->slvec0) { 767 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 768 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 769 } 770 if (baij->slvec1) { 771 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 772 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 773 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 774 } 775 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 776 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 777 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 778 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 779 if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);} 780 if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);} 781 if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);} 782 #if defined(PETSC_USE_REAL_MAT_SINGLE) 783 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 784 #endif 785 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 786 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 787 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 788 ierr = PetscFree(baij);CHKERRQ(ierr); 789 790 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 792 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 793 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 794 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNCT__ 799 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 800 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 801 { 802 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 803 PetscErrorCode ierr; 804 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 805 PetscScalar *x,*from; 806 807 PetscFunctionBegin; 808 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 809 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 810 811 /* diagonal part */ 812 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 813 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 814 815 /* subdiagonal part */ 816 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 817 818 /* copy x into the vec slvec0 */ 819 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 820 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 821 822 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 823 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 824 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 825 826 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 827 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828 /* supperdiagonal part */ 829 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "MatMult_MPISBAIJ" 835 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 836 { 837 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 838 PetscErrorCode ierr; 839 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 840 PetscScalar *x,*from; 841 842 PetscFunctionBegin; 843 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 844 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 845 846 /* diagonal part */ 847 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 848 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 849 850 /* subdiagonal part */ 851 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 852 853 /* copy x into the vec slvec0 */ 854 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 855 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 856 857 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 858 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 859 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 860 861 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 863 /* supperdiagonal part */ 864 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 870 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 871 { 872 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 873 PetscErrorCode ierr; 874 PetscInt nt; 875 876 PetscFunctionBegin; 877 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 878 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 879 880 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 881 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 882 883 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 884 /* do diagonal part */ 885 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 886 /* do supperdiagonal part */ 887 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 888 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 889 /* do subdiagonal part */ 890 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 891 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 892 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 893 894 PetscFunctionReturn(0); 895 } 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 899 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 900 { 901 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 902 PetscErrorCode ierr; 903 PetscInt mbs=a->mbs,bs=A->rmap->bs; 904 PetscScalar *x,*from,zero=0.0; 905 906 PetscFunctionBegin; 907 /* 908 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 909 PetscSynchronizedFlush(((PetscObject)A)->comm); 910 */ 911 /* diagonal part */ 912 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 913 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 914 915 /* subdiagonal part */ 916 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 917 918 /* copy x into the vec slvec0 */ 919 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 920 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 921 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 922 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 923 924 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 925 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 926 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 927 928 /* supperdiagonal part */ 929 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 930 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 936 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 937 { 938 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 /* do diagonal part */ 944 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 945 /* do supperdiagonal part */ 946 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 948 949 /* do subdiagonal part */ 950 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 951 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 952 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 953 954 PetscFunctionReturn(0); 955 } 956 957 /* 958 This only works correctly for square matrices where the subblock A->A is the 959 diagonal block 960 */ 961 #undef __FUNCT__ 962 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 963 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 964 { 965 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 966 PetscErrorCode ierr; 967 968 PetscFunctionBegin; 969 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 970 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } 973 974 #undef __FUNCT__ 975 #define __FUNCT__ "MatScale_MPISBAIJ" 976 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 977 { 978 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 979 PetscErrorCode ierr; 980 981 PetscFunctionBegin; 982 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 983 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatGetRow_MPISBAIJ" 989 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 990 { 991 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 992 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 993 PetscErrorCode ierr; 994 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 995 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 996 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 997 998 PetscFunctionBegin; 999 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1000 mat->getrowactive = PETSC_TRUE; 1001 1002 if (!mat->rowvalues && (idx || v)) { 1003 /* 1004 allocate enough space to hold information from the longest row. 1005 */ 1006 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1007 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1008 PetscInt max = 1,mbs = mat->mbs,tmp; 1009 for (i=0; i<mbs; i++) { 1010 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1011 if (max < tmp) { max = tmp; } 1012 } 1013 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1014 } 1015 1016 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1017 lrow = row - brstart; /* local row index */ 1018 1019 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1020 if (!v) {pvA = 0; pvB = 0;} 1021 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1022 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1023 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1024 nztot = nzA + nzB; 1025 1026 cmap = mat->garray; 1027 if (v || idx) { 1028 if (nztot) { 1029 /* Sort by increasing column numbers, assuming A and B already sorted */ 1030 PetscInt imark = -1; 1031 if (v) { 1032 *v = v_p = mat->rowvalues; 1033 for (i=0; i<nzB; i++) { 1034 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1035 else break; 1036 } 1037 imark = i; 1038 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1039 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1040 } 1041 if (idx) { 1042 *idx = idx_p = mat->rowindices; 1043 if (imark > -1) { 1044 for (i=0; i<imark; i++) { 1045 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1046 } 1047 } else { 1048 for (i=0; i<nzB; i++) { 1049 if (cmap[cworkB[i]/bs] < cstart) 1050 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1051 else break; 1052 } 1053 imark = i; 1054 } 1055 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1056 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1057 } 1058 } else { 1059 if (idx) *idx = 0; 1060 if (v) *v = 0; 1061 } 1062 } 1063 *nz = nztot; 1064 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1065 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1071 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1072 { 1073 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1074 1075 PetscFunctionBegin; 1076 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1077 baij->getrowactive = PETSC_FALSE; 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1083 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1084 { 1085 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1086 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1087 1088 PetscFunctionBegin; 1089 aA->getrow_utriangular = PETSC_TRUE; 1090 PetscFunctionReturn(0); 1091 } 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1094 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1095 { 1096 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1097 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1098 1099 PetscFunctionBegin; 1100 aA->getrow_utriangular = PETSC_FALSE; 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1106 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1107 { 1108 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1109 PetscErrorCode ierr; 1110 1111 PetscFunctionBegin; 1112 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1113 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1119 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1120 { 1121 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1126 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1132 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1133 { 1134 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1139 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1145 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1146 { 1147 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1148 Mat A = a->A,B = a->B; 1149 PetscErrorCode ierr; 1150 PetscReal isend[5],irecv[5]; 1151 1152 PetscFunctionBegin; 1153 info->block_size = (PetscReal)matin->rmap->bs; 1154 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1155 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1156 isend[3] = info->memory; isend[4] = info->mallocs; 1157 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1158 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1159 isend[3] += info->memory; isend[4] += info->mallocs; 1160 if (flag == MAT_LOCAL) { 1161 info->nz_used = isend[0]; 1162 info->nz_allocated = isend[1]; 1163 info->nz_unneeded = isend[2]; 1164 info->memory = isend[3]; 1165 info->mallocs = isend[4]; 1166 } else if (flag == MAT_GLOBAL_MAX) { 1167 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1168 info->nz_used = irecv[0]; 1169 info->nz_allocated = irecv[1]; 1170 info->nz_unneeded = irecv[2]; 1171 info->memory = irecv[3]; 1172 info->mallocs = irecv[4]; 1173 } else if (flag == MAT_GLOBAL_SUM) { 1174 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1175 info->nz_used = irecv[0]; 1176 info->nz_allocated = irecv[1]; 1177 info->nz_unneeded = irecv[2]; 1178 info->memory = irecv[3]; 1179 info->mallocs = irecv[4]; 1180 } else { 1181 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1182 } 1183 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1184 info->fill_ratio_needed = 0; 1185 info->factor_mallocs = 0; 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1191 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1192 { 1193 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1194 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1195 PetscErrorCode ierr; 1196 1197 PetscFunctionBegin; 1198 switch (op) { 1199 case MAT_NEW_NONZERO_LOCATIONS: 1200 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1201 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1202 case MAT_KEEP_NONZERO_PATTERN: 1203 case MAT_NEW_NONZERO_LOCATION_ERR: 1204 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1205 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1206 break; 1207 case MAT_ROW_ORIENTED: 1208 a->roworiented = flg; 1209 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1210 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1211 break; 1212 case MAT_NEW_DIAGONALS: 1213 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1214 break; 1215 case MAT_IGNORE_OFF_PROC_ENTRIES: 1216 a->donotstash = flg; 1217 break; 1218 case MAT_USE_HASH_TABLE: 1219 a->ht_flag = flg; 1220 break; 1221 case MAT_HERMITIAN: 1222 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1223 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1224 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1225 break; 1226 case MAT_SPD: 1227 A->spd_set = PETSC_TRUE; 1228 A->spd = flg; 1229 if (flg) { 1230 A->symmetric = PETSC_TRUE; 1231 A->structurally_symmetric = PETSC_TRUE; 1232 A->symmetric_set = PETSC_TRUE; 1233 A->structurally_symmetric_set = PETSC_TRUE; 1234 } 1235 break; 1236 case MAT_SYMMETRIC: 1237 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1238 break; 1239 case MAT_STRUCTURALLY_SYMMETRIC: 1240 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1241 break; 1242 case MAT_SYMMETRY_ETERNAL: 1243 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1244 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1245 break; 1246 case MAT_IGNORE_LOWER_TRIANGULAR: 1247 aA->ignore_ltriangular = flg; 1248 break; 1249 case MAT_ERROR_LOWER_TRIANGULAR: 1250 aA->ignore_ltriangular = flg; 1251 break; 1252 case MAT_GETROW_UPPERTRIANGULAR: 1253 aA->getrow_utriangular = flg; 1254 break; 1255 default: 1256 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1257 } 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1263 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1264 { 1265 PetscErrorCode ierr; 1266 PetscFunctionBegin; 1267 if (MAT_INITIAL_MATRIX || *B != A) { 1268 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1269 } 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1275 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1276 { 1277 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1278 Mat a=baij->A, b=baij->B; 1279 PetscErrorCode ierr; 1280 PetscInt nv,m,n; 1281 PetscBool flg; 1282 1283 PetscFunctionBegin; 1284 if (ll != rr){ 1285 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1286 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1287 } 1288 if (!ll) PetscFunctionReturn(0); 1289 1290 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1291 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1292 1293 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1294 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1295 1296 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1297 1298 /* left diagonalscale the off-diagonal part */ 1299 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1300 1301 /* scale the diagonal part */ 1302 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1303 1304 /* right diagonalscale the off-diagonal part */ 1305 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1306 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1312 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1313 { 1314 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1315 PetscErrorCode ierr; 1316 1317 PetscFunctionBegin; 1318 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatEqual_MPISBAIJ" 1326 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1327 { 1328 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1329 Mat a,b,c,d; 1330 PetscBool flg; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 a = matA->A; b = matA->B; 1335 c = matB->A; d = matB->B; 1336 1337 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1338 if (flg) { 1339 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1340 } 1341 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatCopy_MPISBAIJ" 1347 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1348 { 1349 PetscErrorCode ierr; 1350 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1351 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1352 1353 PetscFunctionBegin; 1354 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1355 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1356 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1357 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1358 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1359 } else { 1360 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1361 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1362 } 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1368 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1369 { 1370 PetscErrorCode ierr; 1371 1372 PetscFunctionBegin; 1373 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1379 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1380 { 1381 PetscErrorCode ierr; 1382 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1383 PetscBLASInt bnz,one=1; 1384 Mat_SeqSBAIJ *xa,*ya; 1385 Mat_SeqBAIJ *xb,*yb; 1386 1387 PetscFunctionBegin; 1388 if (str == SAME_NONZERO_PATTERN) { 1389 PetscScalar alpha = a; 1390 xa = (Mat_SeqSBAIJ *)xx->A->data; 1391 ya = (Mat_SeqSBAIJ *)yy->A->data; 1392 bnz = PetscBLASIntCast(xa->nz); 1393 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1394 xb = (Mat_SeqBAIJ *)xx->B->data; 1395 yb = (Mat_SeqBAIJ *)yy->B->data; 1396 bnz = PetscBLASIntCast(xb->nz); 1397 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1398 } else { 1399 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1400 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1401 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1408 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1409 { 1410 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1411 PetscInt rbs,cbs; 1412 PetscErrorCode ierr; 1413 1414 PetscFunctionBegin; 1415 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1416 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1417 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1418 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1419 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1420 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1426 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1427 { 1428 PetscErrorCode ierr; 1429 PetscInt i; 1430 PetscBool flg; 1431 1432 PetscFunctionBegin; 1433 for (i=0; i<n; i++) { 1434 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1435 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1436 } 1437 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 1442 /* -------------------------------------------------------------------*/ 1443 static struct _MatOps MatOps_Values = { 1444 MatSetValues_MPISBAIJ, 1445 MatGetRow_MPISBAIJ, 1446 MatRestoreRow_MPISBAIJ, 1447 MatMult_MPISBAIJ, 1448 /* 4*/ MatMultAdd_MPISBAIJ, 1449 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1450 MatMultAdd_MPISBAIJ, 1451 0, 1452 0, 1453 0, 1454 /*10*/ 0, 1455 0, 1456 0, 1457 MatSOR_MPISBAIJ, 1458 MatTranspose_MPISBAIJ, 1459 /*15*/ MatGetInfo_MPISBAIJ, 1460 MatEqual_MPISBAIJ, 1461 MatGetDiagonal_MPISBAIJ, 1462 MatDiagonalScale_MPISBAIJ, 1463 MatNorm_MPISBAIJ, 1464 /*20*/ MatAssemblyBegin_MPISBAIJ, 1465 MatAssemblyEnd_MPISBAIJ, 1466 MatSetOption_MPISBAIJ, 1467 MatZeroEntries_MPISBAIJ, 1468 /*24*/ 0, 1469 0, 1470 0, 1471 0, 1472 0, 1473 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1474 0, 1475 0, 1476 0, 1477 0, 1478 /*34*/ MatDuplicate_MPISBAIJ, 1479 0, 1480 0, 1481 0, 1482 0, 1483 /*39*/ MatAXPY_MPISBAIJ, 1484 MatGetSubMatrices_MPISBAIJ, 1485 MatIncreaseOverlap_MPISBAIJ, 1486 MatGetValues_MPISBAIJ, 1487 MatCopy_MPISBAIJ, 1488 /*44*/ 0, 1489 MatScale_MPISBAIJ, 1490 0, 1491 0, 1492 0, 1493 /*49*/ MatSetBlockSize_MPISBAIJ, 1494 0, 1495 0, 1496 0, 1497 0, 1498 /*54*/ 0, 1499 0, 1500 MatSetUnfactored_MPISBAIJ, 1501 0, 1502 MatSetValuesBlocked_MPISBAIJ, 1503 /*59*/ 0, 1504 0, 1505 0, 1506 0, 1507 0, 1508 /*64*/ 0, 1509 0, 1510 0, 1511 0, 1512 0, 1513 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1514 0, 1515 0, 1516 0, 1517 0, 1518 /*74*/ 0, 1519 0, 1520 0, 1521 0, 1522 0, 1523 /*79*/ 0, 1524 0, 1525 0, 1526 0, 1527 MatLoad_MPISBAIJ, 1528 /*84*/ 0, 1529 0, 1530 0, 1531 0, 1532 0, 1533 /*89*/ 0, 1534 0, 1535 0, 1536 0, 1537 0, 1538 /*94*/ 0, 1539 0, 1540 0, 1541 0, 1542 0, 1543 /*99*/ 0, 1544 0, 1545 0, 1546 0, 1547 0, 1548 /*104*/0, 1549 MatRealPart_MPISBAIJ, 1550 MatImaginaryPart_MPISBAIJ, 1551 MatGetRowUpperTriangular_MPISBAIJ, 1552 MatRestoreRowUpperTriangular_MPISBAIJ, 1553 /*109*/0, 1554 0, 1555 0, 1556 0, 1557 0, 1558 /*114*/0, 1559 0, 1560 0, 1561 0, 1562 0, 1563 /*119*/0, 1564 0, 1565 0, 1566 0 1567 }; 1568 1569 1570 EXTERN_C_BEGIN 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1573 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *a) 1574 { 1575 PetscFunctionBegin; 1576 *a = ((Mat_MPISBAIJ *)A->data)->A; 1577 *iscopy = PETSC_FALSE; 1578 PetscFunctionReturn(0); 1579 } 1580 EXTERN_C_END 1581 1582 EXTERN_C_BEGIN 1583 #undef __FUNCT__ 1584 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1585 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1586 { 1587 Mat_MPISBAIJ *b; 1588 PetscErrorCode ierr; 1589 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1590 1591 PetscFunctionBegin; 1592 if (bs < 0){ 1593 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1594 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1595 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1596 bs = PetscAbs(bs); 1597 } 1598 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1599 bs = newbs; 1600 1601 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1602 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1603 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1604 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1605 1606 B->rmap->bs = B->cmap->bs = bs; 1607 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1608 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1609 1610 if (d_nnz) { 1611 for (i=0; i<B->rmap->n/bs; i++) { 1612 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1613 } 1614 } 1615 if (o_nnz) { 1616 for (i=0; i<B->rmap->n/bs; i++) { 1617 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1618 } 1619 } 1620 1621 b = (Mat_MPISBAIJ*)B->data; 1622 mbs = B->rmap->n/bs; 1623 Mbs = B->rmap->N/bs; 1624 if (mbs*bs != B->rmap->n) { 1625 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1626 } 1627 1628 B->rmap->bs = bs; 1629 b->bs2 = bs*bs; 1630 b->mbs = mbs; 1631 b->nbs = mbs; 1632 b->Mbs = Mbs; 1633 b->Nbs = Mbs; 1634 1635 for (i=0; i<=b->size; i++) { 1636 b->rangebs[i] = B->rmap->range[i]/bs; 1637 } 1638 b->rstartbs = B->rmap->rstart/bs; 1639 b->rendbs = B->rmap->rend/bs; 1640 1641 b->cstartbs = B->cmap->rstart/bs; 1642 b->cendbs = B->cmap->rend/bs; 1643 1644 if (!B->preallocated) { 1645 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1646 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1647 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1648 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1649 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1650 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1651 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1652 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1653 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1654 } 1655 1656 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1657 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1658 B->preallocated = PETSC_TRUE; 1659 PetscFunctionReturn(0); 1660 } 1661 EXTERN_C_END 1662 1663 EXTERN_C_BEGIN 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1666 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1667 { 1668 PetscInt m,rstart,cstart,cend; 1669 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1670 const PetscInt *JJ=0; 1671 PetscScalar *values=0; 1672 PetscErrorCode ierr; 1673 1674 PetscFunctionBegin; 1675 1676 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1677 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1678 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1679 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1680 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1681 m = B->rmap->n/bs; 1682 rstart = B->rmap->rstart/bs; 1683 cstart = B->cmap->rstart/bs; 1684 cend = B->cmap->rend/bs; 1685 1686 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1687 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 1688 for (i=0; i<m; i++) { 1689 nz = ii[i+1] - ii[i]; 1690 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1691 nz_max = PetscMax(nz_max,nz); 1692 JJ = jj + ii[i]; 1693 for (j=0; j<nz; j++) { 1694 if (*JJ >= cstart) break; 1695 JJ++; 1696 } 1697 d = 0; 1698 for (; j<nz; j++) { 1699 if (*JJ++ >= cend) break; 1700 d++; 1701 } 1702 d_nnz[i] = d; 1703 o_nnz[i] = nz - d; 1704 } 1705 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1706 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1707 1708 values = (PetscScalar*)V; 1709 if (!values) { 1710 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 1711 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1712 } 1713 for (i=0; i<m; i++) { 1714 PetscInt row = i + rstart; 1715 PetscInt ncols = ii[i+1] - ii[i]; 1716 const PetscInt *icols = jj + ii[i]; 1717 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1718 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1719 } 1720 1721 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1722 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1723 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1724 1725 PetscFunctionReturn(0); 1726 } 1727 EXTERN_C_END 1728 1729 EXTERN_C_BEGIN 1730 #if defined(PETSC_HAVE_MUMPS) 1731 extern PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1732 #endif 1733 #if defined(PETSC_HAVE_SPOOLES) 1734 extern PetscErrorCode MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1735 #endif 1736 #if defined(PETSC_HAVE_PASTIX) 1737 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1738 #endif 1739 EXTERN_C_END 1740 1741 /*MC 1742 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1743 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1744 1745 Options Database Keys: 1746 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1747 1748 Level: beginner 1749 1750 .seealso: MatCreateMPISBAIJ 1751 M*/ 1752 1753 EXTERN_C_BEGIN 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatCreate_MPISBAIJ" 1756 PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1757 { 1758 Mat_MPISBAIJ *b; 1759 PetscErrorCode ierr; 1760 PetscBool flg; 1761 1762 PetscFunctionBegin; 1763 1764 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1765 B->data = (void*)b; 1766 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1767 1768 B->ops->destroy = MatDestroy_MPISBAIJ; 1769 B->ops->view = MatView_MPISBAIJ; 1770 B->assembled = PETSC_FALSE; 1771 1772 B->insertmode = NOT_SET_VALUES; 1773 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1774 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1775 1776 /* build local table of row and column ownerships */ 1777 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1778 1779 /* build cache for off array entries formed */ 1780 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1781 b->donotstash = PETSC_FALSE; 1782 b->colmap = PETSC_NULL; 1783 b->garray = PETSC_NULL; 1784 b->roworiented = PETSC_TRUE; 1785 1786 /* stuff used in block assembly */ 1787 b->barray = 0; 1788 1789 /* stuff used for matrix vector multiply */ 1790 b->lvec = 0; 1791 b->Mvctx = 0; 1792 b->slvec0 = 0; 1793 b->slvec0b = 0; 1794 b->slvec1 = 0; 1795 b->slvec1a = 0; 1796 b->slvec1b = 0; 1797 b->sMvctx = 0; 1798 1799 /* stuff for MatGetRow() */ 1800 b->rowindices = 0; 1801 b->rowvalues = 0; 1802 b->getrowactive = PETSC_FALSE; 1803 1804 /* hash table stuff */ 1805 b->ht = 0; 1806 b->hd = 0; 1807 b->ht_size = 0; 1808 b->ht_flag = PETSC_FALSE; 1809 b->ht_fact = 0; 1810 b->ht_total_ct = 0; 1811 b->ht_insert_ct = 0; 1812 1813 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1814 b->ijonly = PETSC_FALSE; 1815 1816 b->in_loc = 0; 1817 b->v_loc = 0; 1818 b->n_loc = 0; 1819 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1820 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1821 if (flg) { 1822 PetscReal fact = 1.39; 1823 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1824 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1825 if (fact <= 1.0) fact = 1.39; 1826 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1827 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1828 } 1829 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1830 1831 #if defined(PETSC_HAVE_PASTIX) 1832 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1833 "MatGetFactor_mpisbaij_pastix", 1834 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1835 #endif 1836 #if defined(PETSC_HAVE_MUMPS) 1837 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1838 "MatGetFactor_sbaij_mumps", 1839 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1840 #endif 1841 #if defined(PETSC_HAVE_SPOOLES) 1842 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1843 "MatGetFactor_mpisbaij_spooles", 1844 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1845 #endif 1846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1847 "MatStoreValues_MPISBAIJ", 1848 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1849 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1850 "MatRetrieveValues_MPISBAIJ", 1851 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1852 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1853 "MatGetDiagonalBlock_MPISBAIJ", 1854 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1855 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1856 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1857 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1858 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C", 1859 "MatMPISBAIJSetPreallocationCSR_MPISBAIJ", 1860 MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1861 B->symmetric = PETSC_TRUE; 1862 B->structurally_symmetric = PETSC_TRUE; 1863 B->symmetric_set = PETSC_TRUE; 1864 B->structurally_symmetric_set = PETSC_TRUE; 1865 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1866 PetscFunctionReturn(0); 1867 } 1868 EXTERN_C_END 1869 1870 /*MC 1871 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1872 1873 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1874 and MATMPISBAIJ otherwise. 1875 1876 Options Database Keys: 1877 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1878 1879 Level: beginner 1880 1881 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1882 M*/ 1883 1884 EXTERN_C_BEGIN 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatCreate_SBAIJ" 1887 PetscErrorCode MatCreate_SBAIJ(Mat A) 1888 { 1889 PetscErrorCode ierr; 1890 PetscMPIInt size; 1891 1892 PetscFunctionBegin; 1893 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1894 if (size == 1) { 1895 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1896 } else { 1897 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 EXTERN_C_END 1902 1903 #undef __FUNCT__ 1904 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1905 /*@C 1906 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1907 the user should preallocate the matrix storage by setting the parameters 1908 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1909 performance can be increased by more than a factor of 50. 1910 1911 Collective on Mat 1912 1913 Input Parameters: 1914 + A - the matrix 1915 . bs - size of blockk 1916 . d_nz - number of block nonzeros per block row in diagonal portion of local 1917 submatrix (same for all local rows) 1918 . d_nnz - array containing the number of block nonzeros in the various block rows 1919 in the upper triangular and diagonal part of the in diagonal portion of the local 1920 (possibly different for each block row) or PETSC_NULL. You must leave room 1921 for the diagonal entry even if it is zero. 1922 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1923 submatrix (same for all local rows). 1924 - o_nnz - array containing the number of nonzeros in the various block rows of the 1925 off-diagonal portion of the local submatrix (possibly different for 1926 each block row) or PETSC_NULL. 1927 1928 1929 Options Database Keys: 1930 . -mat_no_unroll - uses code that does not unroll the loops in the 1931 block calculations (much slower) 1932 . -mat_block_size - size of the blocks to use 1933 1934 Notes: 1935 1936 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1937 than it must be used on all processors that share the object for that argument. 1938 1939 If the *_nnz parameter is given then the *_nz parameter is ignored 1940 1941 Storage Information: 1942 For a square global matrix we define each processor's diagonal portion 1943 to be its local rows and the corresponding columns (a square submatrix); 1944 each processor's off-diagonal portion encompasses the remainder of the 1945 local matrix (a rectangular submatrix). 1946 1947 The user can specify preallocated storage for the diagonal part of 1948 the local submatrix with either d_nz or d_nnz (not both). Set 1949 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1950 memory allocation. Likewise, specify preallocated storage for the 1951 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1952 1953 You can call MatGetInfo() to get information on how effective the preallocation was; 1954 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1955 You can also run with the option -info and look for messages with the string 1956 malloc in them to see if additional memory allocation was needed. 1957 1958 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1959 the figure below we depict these three local rows and all columns (0-11). 1960 1961 .vb 1962 0 1 2 3 4 5 6 7 8 9 10 11 1963 ------------------- 1964 row 3 | o o o d d d o o o o o o 1965 row 4 | o o o d d d o o o o o o 1966 row 5 | o o o d d d o o o o o o 1967 ------------------- 1968 .ve 1969 1970 Thus, any entries in the d locations are stored in the d (diagonal) 1971 submatrix, and any entries in the o locations are stored in the 1972 o (off-diagonal) submatrix. Note that the d matrix is stored in 1973 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1974 1975 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1976 plus the diagonal part of the d matrix, 1977 and o_nz should indicate the number of block nonzeros per row in the upper triangular 1978 part of the o matrix. 1979 In general, for PDE problems in which most nonzeros are near the diagonal, 1980 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1981 or you will get TERRIBLE performance; see the users' manual chapter on 1982 matrices. 1983 1984 Level: intermediate 1985 1986 .keywords: matrix, block, aij, compressed row, sparse, parallel 1987 1988 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1989 @*/ 1990 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1991 { 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 1996 PetscFunctionReturn(0); 1997 } 1998 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "MatCreateMPISBAIJ" 2001 /*@C 2002 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2003 (block compressed row). For good matrix assembly performance 2004 the user should preallocate the matrix storage by setting the parameters 2005 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2006 performance can be increased by more than a factor of 50. 2007 2008 Collective on MPI_Comm 2009 2010 Input Parameters: 2011 + comm - MPI communicator 2012 . bs - size of blockk 2013 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2014 This value should be the same as the local size used in creating the 2015 y vector for the matrix-vector product y = Ax. 2016 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2017 This value should be the same as the local size used in creating the 2018 x vector for the matrix-vector product y = Ax. 2019 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2020 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2021 . d_nz - number of block nonzeros per block row in diagonal portion of local 2022 submatrix (same for all local rows) 2023 . d_nnz - array containing the number of block nonzeros in the various block rows 2024 in the upper triangular portion of the in diagonal portion of the local 2025 (possibly different for each block block row) or PETSC_NULL. 2026 You must leave room for the diagonal entry even if it is zero. 2027 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2028 submatrix (same for all local rows). 2029 - o_nnz - array containing the number of nonzeros in the various block rows of the 2030 off-diagonal portion of the local submatrix (possibly different for 2031 each block row) or PETSC_NULL. 2032 2033 Output Parameter: 2034 . A - the matrix 2035 2036 Options Database Keys: 2037 . -mat_no_unroll - uses code that does not unroll the loops in the 2038 block calculations (much slower) 2039 . -mat_block_size - size of the blocks to use 2040 . -mat_mpi - use the parallel matrix data structures even on one processor 2041 (defaults to using SeqBAIJ format on one processor) 2042 2043 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2044 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2045 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2046 2047 Notes: 2048 The number of rows and columns must be divisible by blocksize. 2049 This matrix type does not support complex Hermitian operation. 2050 2051 The user MUST specify either the local or global matrix dimensions 2052 (possibly both). 2053 2054 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2055 than it must be used on all processors that share the object for that argument. 2056 2057 If the *_nnz parameter is given then the *_nz parameter is ignored 2058 2059 Storage Information: 2060 For a square global matrix we define each processor's diagonal portion 2061 to be its local rows and the corresponding columns (a square submatrix); 2062 each processor's off-diagonal portion encompasses the remainder of the 2063 local matrix (a rectangular submatrix). 2064 2065 The user can specify preallocated storage for the diagonal part of 2066 the local submatrix with either d_nz or d_nnz (not both). Set 2067 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2068 memory allocation. Likewise, specify preallocated storage for the 2069 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2070 2071 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2072 the figure below we depict these three local rows and all columns (0-11). 2073 2074 .vb 2075 0 1 2 3 4 5 6 7 8 9 10 11 2076 ------------------- 2077 row 3 | o o o d d d o o o o o o 2078 row 4 | o o o d d d o o o o o o 2079 row 5 | o o o d d d o o o o o o 2080 ------------------- 2081 .ve 2082 2083 Thus, any entries in the d locations are stored in the d (diagonal) 2084 submatrix, and any entries in the o locations are stored in the 2085 o (off-diagonal) submatrix. Note that the d matrix is stored in 2086 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2087 2088 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2089 plus the diagonal part of the d matrix, 2090 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2091 In general, for PDE problems in which most nonzeros are near the diagonal, 2092 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2093 or you will get TERRIBLE performance; see the users' manual chapter on 2094 matrices. 2095 2096 Level: intermediate 2097 2098 .keywords: matrix, block, aij, compressed row, sparse, parallel 2099 2100 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2101 @*/ 2102 2103 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2104 { 2105 PetscErrorCode ierr; 2106 PetscMPIInt size; 2107 2108 PetscFunctionBegin; 2109 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2110 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2111 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2112 if (size > 1) { 2113 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2114 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2115 } else { 2116 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2117 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2118 } 2119 PetscFunctionReturn(0); 2120 } 2121 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2125 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2126 { 2127 Mat mat; 2128 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2129 PetscErrorCode ierr; 2130 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2131 PetscScalar *array; 2132 2133 PetscFunctionBegin; 2134 *newmat = 0; 2135 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2136 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2137 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2138 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2139 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2140 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2141 2142 mat->factortype = matin->factortype; 2143 mat->preallocated = PETSC_TRUE; 2144 mat->assembled = PETSC_TRUE; 2145 mat->insertmode = NOT_SET_VALUES; 2146 2147 a = (Mat_MPISBAIJ*)mat->data; 2148 a->bs2 = oldmat->bs2; 2149 a->mbs = oldmat->mbs; 2150 a->nbs = oldmat->nbs; 2151 a->Mbs = oldmat->Mbs; 2152 a->Nbs = oldmat->Nbs; 2153 2154 2155 a->size = oldmat->size; 2156 a->rank = oldmat->rank; 2157 a->donotstash = oldmat->donotstash; 2158 a->roworiented = oldmat->roworiented; 2159 a->rowindices = 0; 2160 a->rowvalues = 0; 2161 a->getrowactive = PETSC_FALSE; 2162 a->barray = 0; 2163 a->rstartbs = oldmat->rstartbs; 2164 a->rendbs = oldmat->rendbs; 2165 a->cstartbs = oldmat->cstartbs; 2166 a->cendbs = oldmat->cendbs; 2167 2168 /* hash table stuff */ 2169 a->ht = 0; 2170 a->hd = 0; 2171 a->ht_size = 0; 2172 a->ht_flag = oldmat->ht_flag; 2173 a->ht_fact = oldmat->ht_fact; 2174 a->ht_total_ct = 0; 2175 a->ht_insert_ct = 0; 2176 2177 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2178 if (oldmat->colmap) { 2179 #if defined (PETSC_USE_CTABLE) 2180 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2181 #else 2182 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2183 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2184 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2185 #endif 2186 } else a->colmap = 0; 2187 2188 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2189 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2190 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2191 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2192 } else a->garray = 0; 2193 2194 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2195 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2196 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2197 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2198 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2199 2200 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2201 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2202 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2203 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2204 2205 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2206 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2207 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2208 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2209 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2210 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2211 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2212 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2213 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2214 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2215 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2216 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2217 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2218 2219 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2220 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2221 a->sMvctx = oldmat->sMvctx; 2222 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2223 2224 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2225 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2226 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2227 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2228 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2229 *newmat = mat; 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatLoad_MPISBAIJ" 2235 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2236 { 2237 PetscErrorCode ierr; 2238 PetscInt i,nz,j,rstart,rend; 2239 PetscScalar *vals,*buf; 2240 MPI_Comm comm = ((PetscObject)viewer)->comm; 2241 MPI_Status status; 2242 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2243 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2244 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2245 PetscInt bs=1,Mbs,mbs,extra_rows; 2246 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2247 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2248 int fd; 2249 2250 PetscFunctionBegin; 2251 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2252 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2253 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2254 2255 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2256 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2257 if (!rank) { 2258 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2259 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2260 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2261 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2262 } 2263 2264 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2265 2266 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2267 M = header[1]; N = header[2]; 2268 2269 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2270 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2271 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2272 2273 /* If global sizes are set, check if they are consistent with that given in the file */ 2274 if (sizesset) { 2275 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2276 } 2277 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2278 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2279 2280 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2281 2282 /* 2283 This code adds extra rows to make sure the number of rows is 2284 divisible by the blocksize 2285 */ 2286 Mbs = M/bs; 2287 extra_rows = bs - M + bs*(Mbs); 2288 if (extra_rows == bs) extra_rows = 0; 2289 else Mbs++; 2290 if (extra_rows &&!rank) { 2291 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2292 } 2293 2294 /* determine ownership of all rows */ 2295 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2296 mbs = Mbs/size + ((Mbs % size) > rank); 2297 m = mbs*bs; 2298 } else { /* User Set */ 2299 m = newmat->rmap->n; 2300 mbs = m/bs; 2301 } 2302 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2303 mmbs = PetscMPIIntCast(mbs); 2304 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2305 rowners[0] = 0; 2306 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2307 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2308 rstart = rowners[rank]; 2309 rend = rowners[rank+1]; 2310 2311 /* distribute row lengths to all processors */ 2312 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2313 if (!rank) { 2314 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2315 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2316 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2317 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2318 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2319 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2320 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2321 } else { 2322 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2323 } 2324 2325 if (!rank) { /* procs[0] */ 2326 /* calculate the number of nonzeros on each processor */ 2327 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2328 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2329 for (i=0; i<size; i++) { 2330 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2331 procsnz[i] += rowlengths[j]; 2332 } 2333 } 2334 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2335 2336 /* determine max buffer needed and allocate it */ 2337 maxnz = 0; 2338 for (i=0; i<size; i++) { 2339 maxnz = PetscMax(maxnz,procsnz[i]); 2340 } 2341 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2342 2343 /* read in my part of the matrix column indices */ 2344 nz = procsnz[0]; 2345 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2346 mycols = ibuf; 2347 if (size == 1) nz -= extra_rows; 2348 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2349 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2350 2351 /* read in every ones (except the last) and ship off */ 2352 for (i=1; i<size-1; i++) { 2353 nz = procsnz[i]; 2354 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2355 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2356 } 2357 /* read in the stuff for the last proc */ 2358 if (size != 1) { 2359 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2360 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2361 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2362 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2363 } 2364 ierr = PetscFree(cols);CHKERRQ(ierr); 2365 } else { /* procs[i], i>0 */ 2366 /* determine buffer space needed for message */ 2367 nz = 0; 2368 for (i=0; i<m; i++) { 2369 nz += locrowlens[i]; 2370 } 2371 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2372 mycols = ibuf; 2373 /* receive message of column indices*/ 2374 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2375 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2376 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2377 } 2378 2379 /* loop over local rows, determining number of off diagonal entries */ 2380 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2381 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2382 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2383 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2384 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2385 rowcount = 0; 2386 nzcount = 0; 2387 for (i=0; i<mbs; i++) { 2388 dcount = 0; 2389 odcount = 0; 2390 for (j=0; j<bs; j++) { 2391 kmax = locrowlens[rowcount]; 2392 for (k=0; k<kmax; k++) { 2393 tmp = mycols[nzcount++]/bs; /* block col. index */ 2394 if (!mask[tmp]) { 2395 mask[tmp] = 1; 2396 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2397 else masked1[dcount++] = tmp; /* entry in diag portion */ 2398 } 2399 } 2400 rowcount++; 2401 } 2402 2403 dlens[i] = dcount; /* d_nzz[i] */ 2404 odlens[i] = odcount; /* o_nzz[i] */ 2405 2406 /* zero out the mask elements we set */ 2407 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2408 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2409 } 2410 if (!sizesset) { 2411 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2412 } 2413 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2414 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2415 2416 if (!rank) { 2417 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2418 /* read in my part of the matrix numerical values */ 2419 nz = procsnz[0]; 2420 vals = buf; 2421 mycols = ibuf; 2422 if (size == 1) nz -= extra_rows; 2423 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2424 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2425 2426 /* insert into matrix */ 2427 jj = rstart*bs; 2428 for (i=0; i<m; i++) { 2429 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2430 mycols += locrowlens[i]; 2431 vals += locrowlens[i]; 2432 jj++; 2433 } 2434 2435 /* read in other processors (except the last one) and ship out */ 2436 for (i=1; i<size-1; i++) { 2437 nz = procsnz[i]; 2438 vals = buf; 2439 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2440 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2441 } 2442 /* the last proc */ 2443 if (size != 1){ 2444 nz = procsnz[i] - extra_rows; 2445 vals = buf; 2446 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2447 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2448 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2449 } 2450 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2451 2452 } else { 2453 /* receive numeric values */ 2454 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2455 2456 /* receive message of values*/ 2457 vals = buf; 2458 mycols = ibuf; 2459 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2460 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2461 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2462 2463 /* insert into matrix */ 2464 jj = rstart*bs; 2465 for (i=0; i<m; i++) { 2466 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2467 mycols += locrowlens[i]; 2468 vals += locrowlens[i]; 2469 jj++; 2470 } 2471 } 2472 2473 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2474 ierr = PetscFree(buf);CHKERRQ(ierr); 2475 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2476 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2477 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2478 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2479 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2480 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2486 /*XXXXX@ 2487 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2488 2489 Input Parameters: 2490 . mat - the matrix 2491 . fact - factor 2492 2493 Not Collective on Mat, each process can have a different hash factor 2494 2495 Level: advanced 2496 2497 Notes: 2498 This can also be set by the command line option: -mat_use_hash_table fact 2499 2500 .keywords: matrix, hashtable, factor, HT 2501 2502 .seealso: MatSetOption() 2503 @XXXXX*/ 2504 2505 2506 #undef __FUNCT__ 2507 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2508 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2509 { 2510 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2511 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2512 PetscReal atmp; 2513 PetscReal *work,*svalues,*rvalues; 2514 PetscErrorCode ierr; 2515 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2516 PetscMPIInt rank,size; 2517 PetscInt *rowners_bs,dest,count,source; 2518 PetscScalar *va; 2519 MatScalar *ba; 2520 MPI_Status stat; 2521 2522 PetscFunctionBegin; 2523 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2524 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2525 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2526 2527 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2528 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2529 2530 bs = A->rmap->bs; 2531 mbs = a->mbs; 2532 Mbs = a->Mbs; 2533 ba = b->a; 2534 bi = b->i; 2535 bj = b->j; 2536 2537 /* find ownerships */ 2538 rowners_bs = A->rmap->range; 2539 2540 /* each proc creates an array to be distributed */ 2541 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2542 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2543 2544 /* row_max for B */ 2545 if (rank != size-1){ 2546 for (i=0; i<mbs; i++) { 2547 ncols = bi[1] - bi[0]; bi++; 2548 brow = bs*i; 2549 for (j=0; j<ncols; j++){ 2550 bcol = bs*(*bj); 2551 for (kcol=0; kcol<bs; kcol++){ 2552 col = bcol + kcol; /* local col index */ 2553 col += rowners_bs[rank+1]; /* global col index */ 2554 for (krow=0; krow<bs; krow++){ 2555 atmp = PetscAbsScalar(*ba); ba++; 2556 row = brow + krow; /* local row index */ 2557 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2558 if (work[col] < atmp) work[col] = atmp; 2559 } 2560 } 2561 bj++; 2562 } 2563 } 2564 2565 /* send values to its owners */ 2566 for (dest=rank+1; dest<size; dest++){ 2567 svalues = work + rowners_bs[dest]; 2568 count = rowners_bs[dest+1]-rowners_bs[dest]; 2569 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2570 } 2571 } 2572 2573 /* receive values */ 2574 if (rank){ 2575 rvalues = work; 2576 count = rowners_bs[rank+1]-rowners_bs[rank]; 2577 for (source=0; source<rank; source++){ 2578 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2579 /* process values */ 2580 for (i=0; i<count; i++){ 2581 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2582 } 2583 } 2584 } 2585 2586 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2587 ierr = PetscFree(work);CHKERRQ(ierr); 2588 PetscFunctionReturn(0); 2589 } 2590 2591 #undef __FUNCT__ 2592 #define __FUNCT__ "MatSOR_MPISBAIJ" 2593 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2594 { 2595 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2596 PetscErrorCode ierr; 2597 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2598 PetscScalar *x,*ptr,*from; 2599 Vec bb1; 2600 const PetscScalar *b; 2601 2602 PetscFunctionBegin; 2603 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2604 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2605 2606 if (flag == SOR_APPLY_UPPER) { 2607 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2608 PetscFunctionReturn(0); 2609 } 2610 2611 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2612 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2614 its--; 2615 } 2616 2617 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2618 while (its--){ 2619 2620 /* lower triangular part: slvec0b = - B^T*xx */ 2621 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2622 2623 /* copy xx into slvec0a */ 2624 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2625 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2626 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2627 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2628 2629 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2630 2631 /* copy bb into slvec1a */ 2632 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2633 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2634 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2635 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2636 2637 /* set slvec1b = 0 */ 2638 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2639 2640 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2641 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2642 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2643 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2644 2645 /* upper triangular part: bb1 = bb1 - B*x */ 2646 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2647 2648 /* local diagonal sweep */ 2649 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2650 } 2651 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2652 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2653 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2654 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2655 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2656 } else if (flag & SOR_EISENSTAT) { 2657 Vec xx1; 2658 PetscBool hasop; 2659 const PetscScalar *diag; 2660 PetscScalar *sl,scale = (omega - 2.0)/omega; 2661 PetscInt i,n; 2662 2663 if (!mat->xx1) { 2664 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2665 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2666 } 2667 xx1 = mat->xx1; 2668 bb1 = mat->bb1; 2669 2670 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2671 2672 if (!mat->diag) { 2673 /* this is wrong for same matrix with new nonzero values */ 2674 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2675 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2676 } 2677 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2678 2679 if (hasop) { 2680 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2681 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2682 } else { 2683 /* 2684 These two lines are replaced by code that may be a bit faster for a good compiler 2685 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2686 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2687 */ 2688 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2689 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2690 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2691 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2692 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2693 if (omega == 1.0) { 2694 for (i=0; i<n; i++) { 2695 sl[i] = b[i] - diag[i]*x[i]; 2696 } 2697 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2698 } else { 2699 for (i=0; i<n; i++) { 2700 sl[i] = b[i] + scale*diag[i]*x[i]; 2701 } 2702 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2703 } 2704 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2705 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2706 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2707 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2708 } 2709 2710 /* multiply off-diagonal portion of matrix */ 2711 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2712 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2713 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2714 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2715 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2716 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2717 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2718 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2719 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2720 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2721 2722 /* local sweep */ 2723 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2724 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2725 } else { 2726 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2727 } 2728 PetscFunctionReturn(0); 2729 } 2730 2731 #undef __FUNCT__ 2732 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2733 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2734 { 2735 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2736 PetscErrorCode ierr; 2737 Vec lvec1,bb1; 2738 2739 PetscFunctionBegin; 2740 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2741 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2742 2743 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2744 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2745 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2746 its--; 2747 } 2748 2749 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2750 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2751 while (its--){ 2752 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2753 2754 /* lower diagonal part: bb1 = bb - B^T*xx */ 2755 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2756 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2757 2758 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2759 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2760 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2761 2762 /* upper diagonal part: bb1 = bb1 - B*x */ 2763 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2764 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2765 2766 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2767 2768 /* diagonal sweep */ 2769 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2770 } 2771 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2772 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2773 } else { 2774 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2775 } 2776 PetscFunctionReturn(0); 2777 } 2778 2779 #undef __FUNCT__ 2780 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2781 /*@ 2782 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2783 CSR format the local rows. 2784 2785 Collective on MPI_Comm 2786 2787 Input Parameters: 2788 + comm - MPI communicator 2789 . bs - the block size, only a block size of 1 is supported 2790 . m - number of local rows (Cannot be PETSC_DECIDE) 2791 . n - This value should be the same as the local size used in creating the 2792 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2793 calculated if N is given) For square matrices n is almost always m. 2794 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2795 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2796 . i - row indices 2797 . j - column indices 2798 - a - matrix values 2799 2800 Output Parameter: 2801 . mat - the matrix 2802 2803 Level: intermediate 2804 2805 Notes: 2806 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2807 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2808 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2809 2810 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2811 2812 .keywords: matrix, aij, compressed row, sparse, parallel 2813 2814 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2815 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 2816 @*/ 2817 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2818 { 2819 PetscErrorCode ierr; 2820 2821 2822 PetscFunctionBegin; 2823 if (i[0]) { 2824 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2825 } 2826 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2827 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2828 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2829 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2830 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2831 PetscFunctionReturn(0); 2832 } 2833 2834 2835 #undef __FUNCT__ 2836 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2837 /*@C 2838 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2839 (the default parallel PETSc format). 2840 2841 Collective on MPI_Comm 2842 2843 Input Parameters: 2844 + A - the matrix 2845 . bs - the block size 2846 . i - the indices into j for the start of each local row (starts with zero) 2847 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2848 - v - optional values in the matrix 2849 2850 Level: developer 2851 2852 .keywords: matrix, aij, compressed row, sparse, parallel 2853 2854 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2855 @*/ 2856 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2857 { 2858 PetscErrorCode ierr; 2859 2860 PetscFunctionBegin; 2861 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2862 PetscFunctionReturn(0); 2863 } 2864 2865 2866