1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 EXTERN_C_BEGIN 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 26 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 27 { 28 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 29 PetscErrorCode ierr; 30 31 PetscFunctionBegin; 32 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 33 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN_C_BEGIN 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 EXTERN_C_END 52 53 54 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 55 { \ 56 \ 57 brow = row/bs; \ 58 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 59 rmax = aimax[brow]; nrow = ailen[brow]; \ 60 bcol = col/bs; \ 61 ridx = row % bs; cidx = col % bs; \ 62 low = 0; high = nrow; \ 63 while (high-low > 3) { \ 64 t = (low+high)/2; \ 65 if (rp[t] > bcol) high = t; \ 66 else low = t; \ 67 } \ 68 for (_i=low; _i<high; _i++) { \ 69 if (rp[_i] > bcol) break; \ 70 if (rp[_i] == bcol) { \ 71 bap = ap + bs2*_i + bs*cidx + ridx; \ 72 if (addv == ADD_VALUES) *bap += value; \ 73 else *bap = value; \ 74 goto a_noinsert; \ 75 } \ 76 } \ 77 if (a->nonew == 1) goto a_noinsert; \ 78 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 79 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 80 N = nrow++ - 1; \ 81 /* shift up all the later entries in this row */ \ 82 for (ii=N; ii>=_i; ii--) { \ 83 rp[ii+1] = rp[ii]; \ 84 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 85 } \ 86 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 87 rp[_i] = bcol; \ 88 ap[bs2*_i + bs*cidx + ridx] = value; \ 89 a_noinsert:; \ 90 ailen[brow] = nrow; \ 91 } 92 93 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 94 { \ 95 brow = row/bs; \ 96 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 97 rmax = bimax[brow]; nrow = bilen[brow]; \ 98 bcol = col/bs; \ 99 ridx = row % bs; cidx = col % bs; \ 100 low = 0; high = nrow; \ 101 while (high-low > 3) { \ 102 t = (low+high)/2; \ 103 if (rp[t] > bcol) high = t; \ 104 else low = t; \ 105 } \ 106 for (_i=low; _i<high; _i++) { \ 107 if (rp[_i] > bcol) break; \ 108 if (rp[_i] == bcol) { \ 109 bap = ap + bs2*_i + bs*cidx + ridx; \ 110 if (addv == ADD_VALUES) *bap += value; \ 111 else *bap = value; \ 112 goto b_noinsert; \ 113 } \ 114 } \ 115 if (b->nonew == 1) goto b_noinsert; \ 116 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 117 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 118 N = nrow++ - 1; \ 119 /* shift up all the later entries in this row */ \ 120 for (ii=N; ii>=_i; ii--) { \ 121 rp[ii+1] = rp[ii]; \ 122 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 } \ 124 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 125 rp[_i] = bcol; \ 126 ap[bs2*_i + bs*cidx + ridx] = value; \ 127 b_noinsert:; \ 128 bilen[brow] = nrow; \ 129 } 130 131 /* Only add/insert a(i,j) with i<=j (blocks). 132 Any a(i,j) with i>j input by user is ingored. 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetValues_MPISBAIJ" 136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 137 { 138 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 139 MatScalar value; 140 PetscBool roworiented = baij->roworiented; 141 PetscErrorCode ierr; 142 PetscInt i,j,row,col; 143 PetscInt rstart_orig=mat->rmap->rstart; 144 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 145 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 146 147 /* Some Variables required in the macro */ 148 Mat A = baij->A; 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 150 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 151 MatScalar *aa=a->a; 152 153 Mat B = baij->B; 154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 155 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 156 MatScalar *ba=b->a; 157 158 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 159 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 160 MatScalar *ap,*bap; 161 162 /* for stash */ 163 PetscInt n_loc, *in_loc = PETSC_NULL; 164 MatScalar *v_loc = PETSC_NULL; 165 166 PetscFunctionBegin; 167 if (v) PetscValidScalarPointer(v,6); 168 if (!baij->donotstash){ 169 if (n > baij->n_loc) { 170 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 171 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 172 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 174 baij->n_loc = n; 175 } 176 in_loc = baij->in_loc; 177 v_loc = baij->v_loc; 178 } 179 180 for (i=0; i<m; i++) { 181 if (im[i] < 0) continue; 182 #if defined(PETSC_USE_DEBUG) 183 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 184 #endif 185 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 186 row = im[i] - rstart_orig; /* local row index */ 187 for (j=0; j<n; j++) { 188 if (im[i]/bs > in[j]/bs){ 189 if (a->ignore_ltriangular){ 190 continue; /* ignore lower triangular blocks */ 191 } else { 192 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 193 } 194 } 195 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 196 col = in[j] - cstart_orig; /* local col index */ 197 brow = row/bs; bcol = col/bs; 198 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 199 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 200 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 201 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 202 } else if (in[j] < 0) continue; 203 #if defined(PETSC_USE_DEBUG) 204 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 205 #endif 206 else { /* off-diag entry (B) */ 207 if (mat->was_assembled) { 208 if (!baij->colmap) { 209 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 210 } 211 #if defined (PETSC_USE_CTABLE) 212 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 213 col = col - 1; 214 #else 215 col = baij->colmap[in[j]/bs] - 1; 216 #endif 217 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 218 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 219 col = in[j]; 220 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 221 B = baij->B; 222 b = (Mat_SeqBAIJ*)(B)->data; 223 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 224 ba=b->a; 225 } else col += in[j]%bs; 226 } else col = in[j]; 227 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 228 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } 231 } 232 } else { /* off processor entry */ 233 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 234 if (!baij->donotstash) { 235 n_loc = 0; 236 for (j=0; j<n; j++){ 237 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 238 in_loc[n_loc] = in[j]; 239 if (roworiented) { 240 v_loc[n_loc] = v[i*n+j]; 241 } else { 242 v_loc[n_loc] = v[j*m+i]; 243 } 244 n_loc++; 245 } 246 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 247 } 248 } 249 } 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 256 { 257 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 258 const MatScalar *value; 259 MatScalar *barray=baij->barray; 260 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 261 PetscErrorCode ierr; 262 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 263 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 264 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 265 266 PetscFunctionBegin; 267 if(!barray) { 268 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 269 baij->barray = barray; 270 } 271 272 if (roworiented) { 273 stepval = (n-1)*bs; 274 } else { 275 stepval = (m-1)*bs; 276 } 277 for (i=0; i<m; i++) { 278 if (im[i] < 0) continue; 279 #if defined(PETSC_USE_DEBUG) 280 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 281 #endif 282 if (im[i] >= rstart && im[i] < rend) { 283 row = im[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (im[i] > in[j]) { 286 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 287 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 288 } 289 /* If NumCol = 1 then a copy is not required */ 290 if ((roworiented) && (n == 1)) { 291 barray = (MatScalar*) v + i*bs2; 292 } else if((!roworiented) && (m == 1)) { 293 barray = (MatScalar*) v + j*bs2; 294 } else { /* Here a copy is required */ 295 if (roworiented) { 296 value = v + i*(stepval+bs)*bs + j*bs; 297 } else { 298 value = v + j*(stepval+bs)*bs + i*bs; 299 } 300 for (ii=0; ii<bs; ii++,value+=stepval) { 301 for (jj=0; jj<bs; jj++) { 302 *barray++ = *value++; 303 } 304 } 305 barray -=bs2; 306 } 307 308 if (in[j] >= cstart && in[j] < cend){ 309 col = in[j] - cstart; 310 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 311 } 312 else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined (PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined (PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } 343 else col = in[j]; 344 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 345 } 346 } 347 } else { 348 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 349 if (!baij->donotstash) { 350 if (roworiented) { 351 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 352 } else { 353 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 354 } 355 } 356 } 357 } 358 PetscFunctionReturn(0); 359 } 360 361 #undef __FUNCT__ 362 #define __FUNCT__ "MatGetValues_MPISBAIJ" 363 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 364 { 365 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 366 PetscErrorCode ierr; 367 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 368 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 369 370 PetscFunctionBegin; 371 for (i=0; i<m; i++) { 372 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 373 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 374 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 375 row = idxm[i] - bsrstart; 376 for (j=0; j<n; j++) { 377 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 378 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 379 if (idxn[j] >= bscstart && idxn[j] < bscend){ 380 col = idxn[j] - bscstart; 381 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 382 } else { 383 if (!baij->colmap) { 384 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 385 } 386 #if defined (PETSC_USE_CTABLE) 387 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 388 data --; 389 #else 390 data = baij->colmap[idxn[j]/bs]-1; 391 #endif 392 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 393 else { 394 col = data + idxn[j]%bs; 395 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 396 } 397 } 398 } 399 } else { 400 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatNorm_MPISBAIJ" 408 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 409 { 410 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscReal sum[2],*lnorm2; 413 414 PetscFunctionBegin; 415 if (baij->size == 1) { 416 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 417 } else { 418 if (type == NORM_FROBENIUS) { 419 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 420 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 421 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 422 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 423 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 424 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 425 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 426 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 427 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 428 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 429 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 430 PetscReal *rsum,*rsum2,vabs; 431 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 432 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 433 MatScalar *v; 434 435 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 436 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 437 /* Amat */ 438 v = amat->a; jj = amat->j; 439 for (brow=0; brow<mbs; brow++) { 440 grow = bs*(rstart + brow); 441 nz = amat->i[brow+1] - amat->i[brow]; 442 for (bcol=0; bcol<nz; bcol++){ 443 gcol = bs*(rstart + *jj); jj++; 444 for (col=0; col<bs; col++){ 445 for (row=0; row<bs; row++){ 446 vabs = PetscAbsScalar(*v); v++; 447 rsum[gcol+col] += vabs; 448 /* non-diagonal block */ 449 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 450 } 451 } 452 } 453 } 454 /* Bmat */ 455 v = bmat->a; jj = bmat->j; 456 for (brow=0; brow<mbs; brow++) { 457 grow = bs*(rstart + brow); 458 nz = bmat->i[brow+1] - bmat->i[brow]; 459 for (bcol=0; bcol<nz; bcol++){ 460 gcol = bs*garray[*jj]; jj++; 461 for (col=0; col<bs; col++){ 462 for (row=0; row<bs; row++){ 463 vabs = PetscAbsScalar(*v); v++; 464 rsum[gcol+col] += vabs; 465 rsum[grow+row] += vabs; 466 } 467 } 468 } 469 } 470 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 471 *norm = 0.0; 472 for (col=0; col<mat->cmap->N; col++) { 473 if (rsum2[col] > *norm) *norm = rsum2[col]; 474 } 475 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 476 } else { 477 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 478 } 479 } 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 485 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 486 { 487 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 488 PetscErrorCode ierr; 489 PetscInt nstash,reallocs; 490 InsertMode addv; 491 492 PetscFunctionBegin; 493 if (baij->donotstash || mat->nooffprocentries) { 494 PetscFunctionReturn(0); 495 } 496 497 /* make sure all processors are either in INSERTMODE or ADDMODE */ 498 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 499 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 500 mat->insertmode = addv; /* in case this processor had no cache */ 501 502 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 503 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 504 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 505 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 506 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 507 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNCT__ 512 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 513 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 514 { 515 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 516 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 517 PetscErrorCode ierr; 518 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 519 PetscInt *row,*col; 520 PetscBool other_disassembled; 521 PetscMPIInt n; 522 PetscBool r1,r2,r3; 523 MatScalar *val; 524 InsertMode addv = mat->insertmode; 525 526 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 527 PetscFunctionBegin; 528 529 if (!baij->donotstash && !mat->nooffprocentries) { 530 while (1) { 531 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 532 if (!flg) break; 533 534 for (i=0; i<n;) { 535 /* Now identify the consecutive vals belonging to the same row */ 536 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 537 if (j < n) ncols = j-i; 538 else ncols = n-i; 539 /* Now assemble all these values with a single function call */ 540 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 541 i = j; 542 } 543 } 544 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 545 /* Now process the block-stash. Since the values are stashed column-oriented, 546 set the roworiented flag to column oriented, and after MatSetValues() 547 restore the original flags */ 548 r1 = baij->roworiented; 549 r2 = a->roworiented; 550 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 551 baij->roworiented = PETSC_FALSE; 552 a->roworiented = PETSC_FALSE; 553 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 554 while (1) { 555 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 556 if (!flg) break; 557 558 for (i=0; i<n;) { 559 /* Now identify the consecutive vals belonging to the same row */ 560 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 561 if (j < n) ncols = j-i; 562 else ncols = n-i; 563 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 564 i = j; 565 } 566 } 567 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 568 baij->roworiented = r1; 569 a->roworiented = r2; 570 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 571 } 572 573 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 575 576 /* determine if any processor has disassembled, if so we must 577 also disassemble ourselfs, in order that we may reassemble. */ 578 /* 579 if nonzero structure of submatrix B cannot change then we know that 580 no processor disassembled thus we can skip this stuff 581 */ 582 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 583 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 584 if (mat->was_assembled && !other_disassembled) { 585 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 586 } 587 } 588 589 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 590 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 591 } 592 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr); 593 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 594 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 595 596 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 597 baij->rowvalues = 0; 598 599 PetscFunctionReturn(0); 600 } 601 602 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 603 #undef __FUNCT__ 604 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 605 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 606 { 607 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 608 PetscErrorCode ierr; 609 PetscInt bs = mat->rmap->bs; 610 PetscMPIInt size = baij->size,rank = baij->rank; 611 PetscBool iascii,isdraw; 612 PetscViewer sviewer; 613 PetscViewerFormat format; 614 615 PetscFunctionBegin; 616 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 617 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 618 if (iascii) { 619 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 620 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 621 MatInfo info; 622 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 623 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 624 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 625 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 626 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 628 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 630 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 631 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 632 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 633 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 634 PetscFunctionReturn(0); 635 } else if (format == PETSC_VIEWER_ASCII_INFO) { 636 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 639 PetscFunctionReturn(0); 640 } 641 } 642 643 if (isdraw) { 644 PetscDraw draw; 645 PetscBool isnull; 646 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 647 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 648 } 649 650 if (size == 1) { 651 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 652 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 653 } else { 654 /* assemble the entire matrix onto first processor. */ 655 Mat A; 656 Mat_SeqSBAIJ *Aloc; 657 Mat_SeqBAIJ *Bloc; 658 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 659 MatScalar *a; 660 661 /* Should this be the same type as mat? */ 662 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 663 if (!rank) { 664 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 665 } else { 666 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 667 } 668 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 669 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 670 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 671 672 /* copy over the A part */ 673 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 674 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 675 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 676 677 for (i=0; i<mbs; i++) { 678 rvals[0] = bs*(baij->rstartbs + i); 679 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 680 for (j=ai[i]; j<ai[i+1]; j++) { 681 col = (baij->cstartbs+aj[j])*bs; 682 for (k=0; k<bs; k++) { 683 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 684 col++; a += bs; 685 } 686 } 687 } 688 /* copy over the B part */ 689 Bloc = (Mat_SeqBAIJ*)baij->B->data; 690 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 691 for (i=0; i<mbs; i++) { 692 693 rvals[0] = bs*(baij->rstartbs + i); 694 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 695 for (j=ai[i]; j<ai[i+1]; j++) { 696 col = baij->garray[aj[j]]*bs; 697 for (k=0; k<bs; k++) { 698 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 699 col++; a += bs; 700 } 701 } 702 } 703 ierr = PetscFree(rvals);CHKERRQ(ierr); 704 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 705 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 706 /* 707 Everyone has to call to draw the matrix since the graphics waits are 708 synchronized across all processors that share the PetscDraw object 709 */ 710 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 711 if (!rank) { 712 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 713 /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/ 714 PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ); 715 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 716 } 717 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 718 ierr = MatDestroy(&A);CHKERRQ(ierr); 719 } 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatView_MPISBAIJ" 725 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 726 { 727 PetscErrorCode ierr; 728 PetscBool iascii,isdraw,issocket,isbinary; 729 730 PetscFunctionBegin; 731 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 732 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 733 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 734 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 735 if (iascii || isdraw || issocket || isbinary) { 736 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 737 } else { 738 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatDestroy_MPISBAIJ" 745 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 746 { 747 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 #if defined(PETSC_USE_LOG) 752 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 753 #endif 754 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 755 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 756 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 757 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 758 #if defined (PETSC_USE_CTABLE) 759 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 760 #else 761 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 762 #endif 763 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 764 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 765 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 766 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 767 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 768 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 769 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 770 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 771 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 772 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 773 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 774 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 775 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 776 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 777 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 778 #if defined(PETSC_USE_REAL_MAT_SINGLE) 779 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 780 #endif 781 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 782 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 783 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 784 ierr = PetscFree(mat->data);CHKERRQ(ierr); 785 786 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 787 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 798 { 799 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 800 PetscErrorCode ierr; 801 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 802 PetscScalar *x,*from; 803 804 PetscFunctionBegin; 805 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 806 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 807 808 /* diagonal part */ 809 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 810 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 811 812 /* subdiagonal part */ 813 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 814 815 /* copy x into the vec slvec0 */ 816 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 819 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 820 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 821 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 822 823 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 /* supperdiagonal part */ 826 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatMult_MPISBAIJ" 832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 833 { 834 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 835 PetscErrorCode ierr; 836 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 837 PetscScalar *x,*from; 838 839 PetscFunctionBegin; 840 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 841 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 842 843 /* diagonal part */ 844 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 845 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 846 847 /* subdiagonal part */ 848 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 849 850 /* copy x into the vec slvec0 */ 851 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 852 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 853 854 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 855 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 856 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 857 858 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 /* supperdiagonal part */ 861 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 868 { 869 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 870 PetscErrorCode ierr; 871 PetscInt nt; 872 873 PetscFunctionBegin; 874 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 875 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 876 877 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 878 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 879 880 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 /* do diagonal part */ 882 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 883 /* do supperdiagonal part */ 884 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 886 /* do subdiagonal part */ 887 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 888 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 889 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 890 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 896 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 897 { 898 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 899 PetscErrorCode ierr; 900 PetscInt mbs=a->mbs,bs=A->rmap->bs; 901 PetscScalar *x,*from,zero=0.0; 902 903 PetscFunctionBegin; 904 /* 905 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 906 PetscSynchronizedFlush(((PetscObject)A)->comm); 907 */ 908 /* diagonal part */ 909 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 910 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 911 912 /* subdiagonal part */ 913 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 914 915 /* copy x into the vec slvec0 */ 916 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 917 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 918 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 919 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 920 921 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 923 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 924 925 /* supperdiagonal part */ 926 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 927 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 934 { 935 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 936 PetscErrorCode ierr; 937 938 PetscFunctionBegin; 939 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 /* do diagonal part */ 941 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 942 /* do supperdiagonal part */ 943 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 945 946 /* do subdiagonal part */ 947 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 948 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 949 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 950 951 PetscFunctionReturn(0); 952 } 953 954 /* 955 This only works correctly for square matrices where the subblock A->A is the 956 diagonal block 957 */ 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 961 { 962 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 967 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatScale_MPISBAIJ" 973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 974 { 975 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 980 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatGetRow_MPISBAIJ" 986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 987 { 988 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 989 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 990 PetscErrorCode ierr; 991 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 992 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 993 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 994 995 PetscFunctionBegin; 996 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 997 mat->getrowactive = PETSC_TRUE; 998 999 if (!mat->rowvalues && (idx || v)) { 1000 /* 1001 allocate enough space to hold information from the longest row. 1002 */ 1003 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1004 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1005 PetscInt max = 1,mbs = mat->mbs,tmp; 1006 for (i=0; i<mbs; i++) { 1007 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1008 if (max < tmp) { max = tmp; } 1009 } 1010 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1011 } 1012 1013 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1014 lrow = row - brstart; /* local row index */ 1015 1016 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1017 if (!v) {pvA = 0; pvB = 0;} 1018 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1019 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1020 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1021 nztot = nzA + nzB; 1022 1023 cmap = mat->garray; 1024 if (v || idx) { 1025 if (nztot) { 1026 /* Sort by increasing column numbers, assuming A and B already sorted */ 1027 PetscInt imark = -1; 1028 if (v) { 1029 *v = v_p = mat->rowvalues; 1030 for (i=0; i<nzB; i++) { 1031 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1032 else break; 1033 } 1034 imark = i; 1035 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1036 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1037 } 1038 if (idx) { 1039 *idx = idx_p = mat->rowindices; 1040 if (imark > -1) { 1041 for (i=0; i<imark; i++) { 1042 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1043 } 1044 } else { 1045 for (i=0; i<nzB; i++) { 1046 if (cmap[cworkB[i]/bs] < cstart) 1047 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1048 else break; 1049 } 1050 imark = i; 1051 } 1052 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1053 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1054 } 1055 } else { 1056 if (idx) *idx = 0; 1057 if (v) *v = 0; 1058 } 1059 } 1060 *nz = nztot; 1061 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1062 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1068 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1069 { 1070 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1071 1072 PetscFunctionBegin; 1073 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1074 baij->getrowactive = PETSC_FALSE; 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1080 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1081 { 1082 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1083 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1084 1085 PetscFunctionBegin; 1086 aA->getrow_utriangular = PETSC_TRUE; 1087 PetscFunctionReturn(0); 1088 } 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1091 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1092 { 1093 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1094 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1095 1096 PetscFunctionBegin; 1097 aA->getrow_utriangular = PETSC_FALSE; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1103 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1104 { 1105 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1110 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1116 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1117 { 1118 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1123 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1129 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1130 { 1131 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1136 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1142 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1143 { 1144 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1145 Mat A = a->A,B = a->B; 1146 PetscErrorCode ierr; 1147 PetscReal isend[5],irecv[5]; 1148 1149 PetscFunctionBegin; 1150 info->block_size = (PetscReal)matin->rmap->bs; 1151 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1152 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1153 isend[3] = info->memory; isend[4] = info->mallocs; 1154 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1155 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1156 isend[3] += info->memory; isend[4] += info->mallocs; 1157 if (flag == MAT_LOCAL) { 1158 info->nz_used = isend[0]; 1159 info->nz_allocated = isend[1]; 1160 info->nz_unneeded = isend[2]; 1161 info->memory = isend[3]; 1162 info->mallocs = isend[4]; 1163 } else if (flag == MAT_GLOBAL_MAX) { 1164 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1165 info->nz_used = irecv[0]; 1166 info->nz_allocated = irecv[1]; 1167 info->nz_unneeded = irecv[2]; 1168 info->memory = irecv[3]; 1169 info->mallocs = irecv[4]; 1170 } else if (flag == MAT_GLOBAL_SUM) { 1171 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1172 info->nz_used = irecv[0]; 1173 info->nz_allocated = irecv[1]; 1174 info->nz_unneeded = irecv[2]; 1175 info->memory = irecv[3]; 1176 info->mallocs = irecv[4]; 1177 } else { 1178 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1179 } 1180 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1181 info->fill_ratio_needed = 0; 1182 info->factor_mallocs = 0; 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1189 { 1190 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1191 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 switch (op) { 1196 case MAT_NEW_NONZERO_LOCATIONS: 1197 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1198 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1199 case MAT_KEEP_NONZERO_PATTERN: 1200 case MAT_NEW_NONZERO_LOCATION_ERR: 1201 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1202 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1203 break; 1204 case MAT_ROW_ORIENTED: 1205 a->roworiented = flg; 1206 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1207 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1208 break; 1209 case MAT_NEW_DIAGONALS: 1210 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1211 break; 1212 case MAT_IGNORE_OFF_PROC_ENTRIES: 1213 a->donotstash = flg; 1214 break; 1215 case MAT_USE_HASH_TABLE: 1216 a->ht_flag = flg; 1217 break; 1218 case MAT_HERMITIAN: 1219 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1220 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1221 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1222 break; 1223 case MAT_SPD: 1224 A->spd_set = PETSC_TRUE; 1225 A->spd = flg; 1226 if (flg) { 1227 A->symmetric = PETSC_TRUE; 1228 A->structurally_symmetric = PETSC_TRUE; 1229 A->symmetric_set = PETSC_TRUE; 1230 A->structurally_symmetric_set = PETSC_TRUE; 1231 } 1232 break; 1233 case MAT_SYMMETRIC: 1234 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1235 break; 1236 case MAT_STRUCTURALLY_SYMMETRIC: 1237 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1238 break; 1239 case MAT_SYMMETRY_ETERNAL: 1240 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1241 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1242 break; 1243 case MAT_IGNORE_LOWER_TRIANGULAR: 1244 aA->ignore_ltriangular = flg; 1245 break; 1246 case MAT_ERROR_LOWER_TRIANGULAR: 1247 aA->ignore_ltriangular = flg; 1248 break; 1249 case MAT_GETROW_UPPERTRIANGULAR: 1250 aA->getrow_utriangular = flg; 1251 break; 1252 default: 1253 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1260 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1261 { 1262 PetscErrorCode ierr; 1263 PetscFunctionBegin; 1264 if (MAT_INITIAL_MATRIX || *B != A) { 1265 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1266 } 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1272 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1273 { 1274 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1275 Mat a=baij->A, b=baij->B; 1276 PetscErrorCode ierr; 1277 PetscInt nv,m,n; 1278 PetscBool flg; 1279 1280 PetscFunctionBegin; 1281 if (ll != rr){ 1282 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1283 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1284 } 1285 if (!ll) PetscFunctionReturn(0); 1286 1287 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1288 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1289 1290 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1291 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1292 1293 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1294 1295 /* left diagonalscale the off-diagonal part */ 1296 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1297 1298 /* scale the diagonal part */ 1299 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1300 1301 /* right diagonalscale the off-diagonal part */ 1302 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1303 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1309 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1310 { 1311 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatEqual_MPISBAIJ" 1323 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1324 { 1325 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1326 Mat a,b,c,d; 1327 PetscBool flg; 1328 PetscErrorCode ierr; 1329 1330 PetscFunctionBegin; 1331 a = matA->A; b = matA->B; 1332 c = matB->A; d = matB->B; 1333 1334 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1335 if (flg) { 1336 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1337 } 1338 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatCopy_MPISBAIJ" 1344 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1345 { 1346 PetscErrorCode ierr; 1347 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1348 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1349 1350 PetscFunctionBegin; 1351 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1352 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1353 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1354 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1355 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1356 } else { 1357 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1358 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1359 } 1360 PetscFunctionReturn(0); 1361 } 1362 1363 #undef __FUNCT__ 1364 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1365 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1366 { 1367 PetscErrorCode ierr; 1368 1369 PetscFunctionBegin; 1370 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1376 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1377 { 1378 PetscErrorCode ierr; 1379 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1380 PetscBLASInt bnz,one=1; 1381 Mat_SeqSBAIJ *xa,*ya; 1382 Mat_SeqBAIJ *xb,*yb; 1383 1384 PetscFunctionBegin; 1385 if (str == SAME_NONZERO_PATTERN) { 1386 PetscScalar alpha = a; 1387 xa = (Mat_SeqSBAIJ *)xx->A->data; 1388 ya = (Mat_SeqSBAIJ *)yy->A->data; 1389 bnz = PetscBLASIntCast(xa->nz); 1390 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1391 xb = (Mat_SeqBAIJ *)xx->B->data; 1392 yb = (Mat_SeqBAIJ *)yy->B->data; 1393 bnz = PetscBLASIntCast(xb->nz); 1394 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1395 } else { 1396 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1397 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1398 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1405 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1406 { 1407 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1408 PetscInt rbs,cbs; 1409 PetscErrorCode ierr; 1410 1411 PetscFunctionBegin; 1412 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1413 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1414 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1415 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1416 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1417 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1423 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1424 { 1425 PetscErrorCode ierr; 1426 PetscInt i; 1427 PetscBool flg; 1428 1429 PetscFunctionBegin; 1430 for (i=0; i<n; i++) { 1431 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1432 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1433 } 1434 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 1439 /* -------------------------------------------------------------------*/ 1440 static struct _MatOps MatOps_Values = { 1441 MatSetValues_MPISBAIJ, 1442 MatGetRow_MPISBAIJ, 1443 MatRestoreRow_MPISBAIJ, 1444 MatMult_MPISBAIJ, 1445 /* 4*/ MatMultAdd_MPISBAIJ, 1446 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1447 MatMultAdd_MPISBAIJ, 1448 0, 1449 0, 1450 0, 1451 /*10*/ 0, 1452 0, 1453 0, 1454 MatSOR_MPISBAIJ, 1455 MatTranspose_MPISBAIJ, 1456 /*15*/ MatGetInfo_MPISBAIJ, 1457 MatEqual_MPISBAIJ, 1458 MatGetDiagonal_MPISBAIJ, 1459 MatDiagonalScale_MPISBAIJ, 1460 MatNorm_MPISBAIJ, 1461 /*20*/ MatAssemblyBegin_MPISBAIJ, 1462 MatAssemblyEnd_MPISBAIJ, 1463 MatSetOption_MPISBAIJ, 1464 MatZeroEntries_MPISBAIJ, 1465 /*24*/ 0, 1466 0, 1467 0, 1468 0, 1469 0, 1470 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1471 0, 1472 0, 1473 0, 1474 0, 1475 /*34*/ MatDuplicate_MPISBAIJ, 1476 0, 1477 0, 1478 0, 1479 0, 1480 /*39*/ MatAXPY_MPISBAIJ, 1481 MatGetSubMatrices_MPISBAIJ, 1482 MatIncreaseOverlap_MPISBAIJ, 1483 MatGetValues_MPISBAIJ, 1484 MatCopy_MPISBAIJ, 1485 /*44*/ 0, 1486 MatScale_MPISBAIJ, 1487 0, 1488 0, 1489 0, 1490 /*49*/ MatSetBlockSize_MPISBAIJ, 1491 0, 1492 0, 1493 0, 1494 0, 1495 /*54*/ 0, 1496 0, 1497 MatSetUnfactored_MPISBAIJ, 1498 0, 1499 MatSetValuesBlocked_MPISBAIJ, 1500 /*59*/ 0, 1501 0, 1502 0, 1503 0, 1504 0, 1505 /*64*/ 0, 1506 0, 1507 0, 1508 0, 1509 0, 1510 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1511 0, 1512 0, 1513 0, 1514 0, 1515 /*74*/ 0, 1516 0, 1517 0, 1518 0, 1519 0, 1520 /*79*/ 0, 1521 0, 1522 0, 1523 0, 1524 MatLoad_MPISBAIJ, 1525 /*84*/ 0, 1526 0, 1527 0, 1528 0, 1529 0, 1530 /*89*/ 0, 1531 0, 1532 0, 1533 0, 1534 0, 1535 /*94*/ 0, 1536 0, 1537 0, 1538 0, 1539 0, 1540 /*99*/ 0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 /*104*/0, 1546 MatRealPart_MPISBAIJ, 1547 MatImaginaryPart_MPISBAIJ, 1548 MatGetRowUpperTriangular_MPISBAIJ, 1549 MatRestoreRowUpperTriangular_MPISBAIJ, 1550 /*109*/0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 /*114*/0, 1556 0, 1557 0, 1558 0, 1559 0, 1560 /*119*/0, 1561 0, 1562 0, 1563 0 1564 }; 1565 1566 1567 EXTERN_C_BEGIN 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1570 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1571 { 1572 PetscFunctionBegin; 1573 *a = ((Mat_MPISBAIJ *)A->data)->A; 1574 PetscFunctionReturn(0); 1575 } 1576 EXTERN_C_END 1577 1578 EXTERN_C_BEGIN 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1581 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1582 { 1583 Mat_MPISBAIJ *b; 1584 PetscErrorCode ierr; 1585 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1586 1587 PetscFunctionBegin; 1588 if (bs < 0){ 1589 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1590 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1591 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1592 bs = PetscAbs(bs); 1593 } 1594 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1595 bs = newbs; 1596 1597 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1598 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1599 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1600 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1601 1602 B->rmap->bs = B->cmap->bs = bs; 1603 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1604 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1605 1606 if (d_nnz) { 1607 for (i=0; i<B->rmap->n/bs; i++) { 1608 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1609 } 1610 } 1611 if (o_nnz) { 1612 for (i=0; i<B->rmap->n/bs; i++) { 1613 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1614 } 1615 } 1616 1617 b = (Mat_MPISBAIJ*)B->data; 1618 mbs = B->rmap->n/bs; 1619 Mbs = B->rmap->N/bs; 1620 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1621 1622 B->rmap->bs = bs; 1623 b->bs2 = bs*bs; 1624 b->mbs = mbs; 1625 b->nbs = mbs; 1626 b->Mbs = Mbs; 1627 b->Nbs = Mbs; 1628 1629 for (i=0; i<=b->size; i++) { 1630 b->rangebs[i] = B->rmap->range[i]/bs; 1631 } 1632 b->rstartbs = B->rmap->rstart/bs; 1633 b->rendbs = B->rmap->rend/bs; 1634 1635 b->cstartbs = B->cmap->rstart/bs; 1636 b->cendbs = B->cmap->rend/bs; 1637 1638 if (!B->preallocated) { 1639 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1640 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1641 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1642 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1643 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1644 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1645 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1646 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1647 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1648 } 1649 1650 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1651 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1652 B->preallocated = PETSC_TRUE; 1653 PetscFunctionReturn(0); 1654 } 1655 EXTERN_C_END 1656 1657 EXTERN_C_BEGIN 1658 #undef __FUNCT__ 1659 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1660 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1661 { 1662 PetscInt m,rstart,cstart,cend; 1663 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1664 const PetscInt *JJ=0; 1665 PetscScalar *values=0; 1666 PetscErrorCode ierr; 1667 1668 PetscFunctionBegin; 1669 1670 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1671 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1672 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1673 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1674 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1675 m = B->rmap->n/bs; 1676 rstart = B->rmap->rstart/bs; 1677 cstart = B->cmap->rstart/bs; 1678 cend = B->cmap->rend/bs; 1679 1680 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1681 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 1682 for (i=0; i<m; i++) { 1683 nz = ii[i+1] - ii[i]; 1684 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1685 nz_max = PetscMax(nz_max,nz); 1686 JJ = jj + ii[i]; 1687 for (j=0; j<nz; j++) { 1688 if (*JJ >= cstart) break; 1689 JJ++; 1690 } 1691 d = 0; 1692 for (; j<nz; j++) { 1693 if (*JJ++ >= cend) break; 1694 d++; 1695 } 1696 d_nnz[i] = d; 1697 o_nnz[i] = nz - d; 1698 } 1699 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1700 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1701 1702 values = (PetscScalar*)V; 1703 if (!values) { 1704 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 1705 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1706 } 1707 for (i=0; i<m; i++) { 1708 PetscInt row = i + rstart; 1709 PetscInt ncols = ii[i+1] - ii[i]; 1710 const PetscInt *icols = jj + ii[i]; 1711 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1712 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 1715 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1716 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1717 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1718 1719 PetscFunctionReturn(0); 1720 } 1721 EXTERN_C_END 1722 1723 EXTERN_C_BEGIN 1724 #if defined(PETSC_HAVE_MUMPS) 1725 extern PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1726 #endif 1727 #if defined(PETSC_HAVE_SPOOLES) 1728 extern PetscErrorCode MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1729 #endif 1730 #if defined(PETSC_HAVE_PASTIX) 1731 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1732 #endif 1733 EXTERN_C_END 1734 1735 /*MC 1736 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1737 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1738 the matrix is stored. 1739 1740 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1741 can call MatSetOption(Mat, MAT_HERMITIAN); 1742 1743 Options Database Keys: 1744 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1745 1746 Level: beginner 1747 1748 .seealso: MatCreateMPISBAIJ 1749 M*/ 1750 1751 EXTERN_C_BEGIN 1752 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,const MatType,MatReuse,Mat*); 1753 EXTERN_C_END 1754 1755 EXTERN_C_BEGIN 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatCreate_MPISBAIJ" 1758 PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1759 { 1760 Mat_MPISBAIJ *b; 1761 PetscErrorCode ierr; 1762 PetscBool flg; 1763 1764 PetscFunctionBegin; 1765 1766 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1767 B->data = (void*)b; 1768 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1769 1770 B->ops->destroy = MatDestroy_MPISBAIJ; 1771 B->ops->view = MatView_MPISBAIJ; 1772 B->assembled = PETSC_FALSE; 1773 1774 B->insertmode = NOT_SET_VALUES; 1775 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1776 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1777 1778 /* build local table of row and column ownerships */ 1779 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1780 1781 /* build cache for off array entries formed */ 1782 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1783 b->donotstash = PETSC_FALSE; 1784 b->colmap = PETSC_NULL; 1785 b->garray = PETSC_NULL; 1786 b->roworiented = PETSC_TRUE; 1787 1788 /* stuff used in block assembly */ 1789 b->barray = 0; 1790 1791 /* stuff used for matrix vector multiply */ 1792 b->lvec = 0; 1793 b->Mvctx = 0; 1794 b->slvec0 = 0; 1795 b->slvec0b = 0; 1796 b->slvec1 = 0; 1797 b->slvec1a = 0; 1798 b->slvec1b = 0; 1799 b->sMvctx = 0; 1800 1801 /* stuff for MatGetRow() */ 1802 b->rowindices = 0; 1803 b->rowvalues = 0; 1804 b->getrowactive = PETSC_FALSE; 1805 1806 /* hash table stuff */ 1807 b->ht = 0; 1808 b->hd = 0; 1809 b->ht_size = 0; 1810 b->ht_flag = PETSC_FALSE; 1811 b->ht_fact = 0; 1812 b->ht_total_ct = 0; 1813 b->ht_insert_ct = 0; 1814 1815 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1816 b->ijonly = PETSC_FALSE; 1817 1818 b->in_loc = 0; 1819 b->v_loc = 0; 1820 b->n_loc = 0; 1821 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1822 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1823 if (flg) { 1824 PetscReal fact = 1.39; 1825 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1826 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1827 if (fact <= 1.0) fact = 1.39; 1828 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1829 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1830 } 1831 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1832 1833 #if defined(PETSC_HAVE_PASTIX) 1834 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1835 "MatGetFactor_mpisbaij_pastix", 1836 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1837 #endif 1838 #if defined(PETSC_HAVE_MUMPS) 1839 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1840 "MatGetFactor_sbaij_mumps", 1841 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1842 #endif 1843 #if defined(PETSC_HAVE_SPOOLES) 1844 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1845 "MatGetFactor_mpisbaij_spooles", 1846 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1847 #endif 1848 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1849 "MatStoreValues_MPISBAIJ", 1850 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1851 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1852 "MatRetrieveValues_MPISBAIJ", 1853 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1854 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1855 "MatGetDiagonalBlock_MPISBAIJ", 1856 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1857 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1858 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1859 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1860 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C", 1861 "MatMPISBAIJSetPreallocationCSR_MPISBAIJ", 1862 MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1863 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C", 1864 "MatConvert_MPISBAIJ_MPISBSTRM", 1865 MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1866 1867 B->symmetric = PETSC_TRUE; 1868 B->structurally_symmetric = PETSC_TRUE; 1869 B->symmetric_set = PETSC_TRUE; 1870 B->structurally_symmetric_set = PETSC_TRUE; 1871 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1872 PetscFunctionReturn(0); 1873 } 1874 EXTERN_C_END 1875 1876 /*MC 1877 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1878 1879 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1880 and MATMPISBAIJ otherwise. 1881 1882 Options Database Keys: 1883 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1884 1885 Level: beginner 1886 1887 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1888 M*/ 1889 1890 #undef __FUNCT__ 1891 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1892 /*@C 1893 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1894 the user should preallocate the matrix storage by setting the parameters 1895 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1896 performance can be increased by more than a factor of 50. 1897 1898 Collective on Mat 1899 1900 Input Parameters: 1901 + A - the matrix 1902 . bs - size of blockk 1903 . d_nz - number of block nonzeros per block row in diagonal portion of local 1904 submatrix (same for all local rows) 1905 . d_nnz - array containing the number of block nonzeros in the various block rows 1906 in the upper triangular and diagonal part of the in diagonal portion of the local 1907 (possibly different for each block row) or PETSC_NULL. You must leave room 1908 for the diagonal entry even if it is zero. 1909 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1910 submatrix (same for all local rows). 1911 - o_nnz - array containing the number of nonzeros in the various block rows of the 1912 off-diagonal portion of the local submatrix that is right of the diagonal 1913 (possibly different for each block row) or PETSC_NULL. 1914 1915 1916 Options Database Keys: 1917 . -mat_no_unroll - uses code that does not unroll the loops in the 1918 block calculations (much slower) 1919 . -mat_block_size - size of the blocks to use 1920 1921 Notes: 1922 1923 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1924 than it must be used on all processors that share the object for that argument. 1925 1926 If the *_nnz parameter is given then the *_nz parameter is ignored 1927 1928 Storage Information: 1929 For a square global matrix we define each processor's diagonal portion 1930 to be its local rows and the corresponding columns (a square submatrix); 1931 each processor's off-diagonal portion encompasses the remainder of the 1932 local matrix (a rectangular submatrix). 1933 1934 The user can specify preallocated storage for the diagonal part of 1935 the local submatrix with either d_nz or d_nnz (not both). Set 1936 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1937 memory allocation. Likewise, specify preallocated storage for the 1938 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1939 1940 You can call MatGetInfo() to get information on how effective the preallocation was; 1941 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1942 You can also run with the option -info and look for messages with the string 1943 malloc in them to see if additional memory allocation was needed. 1944 1945 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1946 the figure below we depict these three local rows and all columns (0-11). 1947 1948 .vb 1949 0 1 2 3 4 5 6 7 8 9 10 11 1950 ------------------- 1951 row 3 | . . . d d d o o o o o o 1952 row 4 | . . . d d d o o o o o o 1953 row 5 | . . . d d d o o o o o o 1954 ------------------- 1955 .ve 1956 1957 Thus, any entries in the d locations are stored in the d (diagonal) 1958 submatrix, and any entries in the o locations are stored in the 1959 o (off-diagonal) submatrix. Note that the d matrix is stored in 1960 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1961 1962 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1963 plus the diagonal part of the d matrix, 1964 and o_nz should indicate the number of block nonzeros per row in the o matrix 1965 1966 In general, for PDE problems in which most nonzeros are near the diagonal, 1967 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1968 or you will get TERRIBLE performance; see the users' manual chapter on 1969 matrices. 1970 1971 Level: intermediate 1972 1973 .keywords: matrix, block, aij, compressed row, sparse, parallel 1974 1975 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1976 @*/ 1977 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1978 { 1979 PetscErrorCode ierr; 1980 1981 PetscFunctionBegin; 1982 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1983 PetscValidType(B,1); 1984 PetscValidLogicalCollectiveInt(B,bs,2); 1985 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 1986 PetscFunctionReturn(0); 1987 } 1988 1989 #undef __FUNCT__ 1990 #define __FUNCT__ "MatCreateMPISBAIJ" 1991 /*@C 1992 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1993 (block compressed row). For good matrix assembly performance 1994 the user should preallocate the matrix storage by setting the parameters 1995 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1996 performance can be increased by more than a factor of 50. 1997 1998 Collective on MPI_Comm 1999 2000 Input Parameters: 2001 + comm - MPI communicator 2002 . bs - size of blockk 2003 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2004 This value should be the same as the local size used in creating the 2005 y vector for the matrix-vector product y = Ax. 2006 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2007 This value should be the same as the local size used in creating the 2008 x vector for the matrix-vector product y = Ax. 2009 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2010 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2011 . d_nz - number of block nonzeros per block row in diagonal portion of local 2012 submatrix (same for all local rows) 2013 . d_nnz - array containing the number of block nonzeros in the various block rows 2014 in the upper triangular portion of the in diagonal portion of the local 2015 (possibly different for each block block row) or PETSC_NULL. 2016 You must leave room for the diagonal entry even if it is zero. 2017 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2018 submatrix (same for all local rows). 2019 - o_nnz - array containing the number of nonzeros in the various block rows of the 2020 off-diagonal portion of the local submatrix (possibly different for 2021 each block row) or PETSC_NULL. 2022 2023 Output Parameter: 2024 . A - the matrix 2025 2026 Options Database Keys: 2027 . -mat_no_unroll - uses code that does not unroll the loops in the 2028 block calculations (much slower) 2029 . -mat_block_size - size of the blocks to use 2030 . -mat_mpi - use the parallel matrix data structures even on one processor 2031 (defaults to using SeqBAIJ format on one processor) 2032 2033 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2034 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2035 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2036 2037 Notes: 2038 The number of rows and columns must be divisible by blocksize. 2039 This matrix type does not support complex Hermitian operation. 2040 2041 The user MUST specify either the local or global matrix dimensions 2042 (possibly both). 2043 2044 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2045 than it must be used on all processors that share the object for that argument. 2046 2047 If the *_nnz parameter is given then the *_nz parameter is ignored 2048 2049 Storage Information: 2050 For a square global matrix we define each processor's diagonal portion 2051 to be its local rows and the corresponding columns (a square submatrix); 2052 each processor's off-diagonal portion encompasses the remainder of the 2053 local matrix (a rectangular submatrix). 2054 2055 The user can specify preallocated storage for the diagonal part of 2056 the local submatrix with either d_nz or d_nnz (not both). Set 2057 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2058 memory allocation. Likewise, specify preallocated storage for the 2059 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2060 2061 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2062 the figure below we depict these three local rows and all columns (0-11). 2063 2064 .vb 2065 0 1 2 3 4 5 6 7 8 9 10 11 2066 ------------------- 2067 row 3 | . . . d d d o o o o o o 2068 row 4 | . . . d d d o o o o o o 2069 row 5 | . . . d d d o o o o o o 2070 ------------------- 2071 .ve 2072 2073 Thus, any entries in the d locations are stored in the d (diagonal) 2074 submatrix, and any entries in the o locations are stored in the 2075 o (off-diagonal) submatrix. Note that the d matrix is stored in 2076 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2077 2078 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2079 plus the diagonal part of the d matrix, 2080 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2081 In general, for PDE problems in which most nonzeros are near the diagonal, 2082 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2083 or you will get TERRIBLE performance; see the users' manual chapter on 2084 matrices. 2085 2086 Level: intermediate 2087 2088 .keywords: matrix, block, aij, compressed row, sparse, parallel 2089 2090 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2091 @*/ 2092 2093 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2094 { 2095 PetscErrorCode ierr; 2096 PetscMPIInt size; 2097 2098 PetscFunctionBegin; 2099 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2100 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2101 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2102 if (size > 1) { 2103 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2104 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2105 } else { 2106 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2107 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2108 } 2109 PetscFunctionReturn(0); 2110 } 2111 2112 2113 #undef __FUNCT__ 2114 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2115 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2116 { 2117 Mat mat; 2118 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2119 PetscErrorCode ierr; 2120 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2121 PetscScalar *array; 2122 2123 PetscFunctionBegin; 2124 *newmat = 0; 2125 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2126 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2127 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2128 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2129 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2130 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2131 2132 mat->factortype = matin->factortype; 2133 mat->preallocated = PETSC_TRUE; 2134 mat->assembled = PETSC_TRUE; 2135 mat->insertmode = NOT_SET_VALUES; 2136 2137 a = (Mat_MPISBAIJ*)mat->data; 2138 a->bs2 = oldmat->bs2; 2139 a->mbs = oldmat->mbs; 2140 a->nbs = oldmat->nbs; 2141 a->Mbs = oldmat->Mbs; 2142 a->Nbs = oldmat->Nbs; 2143 2144 2145 a->size = oldmat->size; 2146 a->rank = oldmat->rank; 2147 a->donotstash = oldmat->donotstash; 2148 a->roworiented = oldmat->roworiented; 2149 a->rowindices = 0; 2150 a->rowvalues = 0; 2151 a->getrowactive = PETSC_FALSE; 2152 a->barray = 0; 2153 a->rstartbs = oldmat->rstartbs; 2154 a->rendbs = oldmat->rendbs; 2155 a->cstartbs = oldmat->cstartbs; 2156 a->cendbs = oldmat->cendbs; 2157 2158 /* hash table stuff */ 2159 a->ht = 0; 2160 a->hd = 0; 2161 a->ht_size = 0; 2162 a->ht_flag = oldmat->ht_flag; 2163 a->ht_fact = oldmat->ht_fact; 2164 a->ht_total_ct = 0; 2165 a->ht_insert_ct = 0; 2166 2167 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2168 if (oldmat->colmap) { 2169 #if defined (PETSC_USE_CTABLE) 2170 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2171 #else 2172 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2173 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2174 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2175 #endif 2176 } else a->colmap = 0; 2177 2178 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2179 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2180 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2181 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2182 } else a->garray = 0; 2183 2184 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2185 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2186 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2187 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2188 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2189 2190 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2191 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2192 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2193 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2194 2195 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2196 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2197 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2198 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2199 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2200 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2201 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2202 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2203 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2204 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2205 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2206 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2207 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2208 2209 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2210 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2211 a->sMvctx = oldmat->sMvctx; 2212 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2213 2214 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2215 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2216 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2217 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2218 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2219 *newmat = mat; 2220 PetscFunctionReturn(0); 2221 } 2222 2223 #undef __FUNCT__ 2224 #define __FUNCT__ "MatLoad_MPISBAIJ" 2225 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2226 { 2227 PetscErrorCode ierr; 2228 PetscInt i,nz,j,rstart,rend; 2229 PetscScalar *vals,*buf; 2230 MPI_Comm comm = ((PetscObject)viewer)->comm; 2231 MPI_Status status; 2232 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2233 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2234 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2235 PetscInt bs=1,Mbs,mbs,extra_rows; 2236 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2237 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2238 int fd; 2239 2240 PetscFunctionBegin; 2241 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2242 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2243 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2244 2245 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2246 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2247 if (!rank) { 2248 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2249 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2250 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2251 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2252 } 2253 2254 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2255 2256 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2257 M = header[1]; N = header[2]; 2258 2259 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2260 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2261 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2262 2263 /* If global sizes are set, check if they are consistent with that given in the file */ 2264 if (sizesset) { 2265 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2266 } 2267 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2268 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2269 2270 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2271 2272 /* 2273 This code adds extra rows to make sure the number of rows is 2274 divisible by the blocksize 2275 */ 2276 Mbs = M/bs; 2277 extra_rows = bs - M + bs*(Mbs); 2278 if (extra_rows == bs) extra_rows = 0; 2279 else Mbs++; 2280 if (extra_rows &&!rank) { 2281 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2282 } 2283 2284 /* determine ownership of all rows */ 2285 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2286 mbs = Mbs/size + ((Mbs % size) > rank); 2287 m = mbs*bs; 2288 } else { /* User Set */ 2289 m = newmat->rmap->n; 2290 mbs = m/bs; 2291 } 2292 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2293 mmbs = PetscMPIIntCast(mbs); 2294 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2295 rowners[0] = 0; 2296 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2297 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2298 rstart = rowners[rank]; 2299 rend = rowners[rank+1]; 2300 2301 /* distribute row lengths to all processors */ 2302 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2303 if (!rank) { 2304 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2305 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2306 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2307 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2308 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2309 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2310 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2311 } else { 2312 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2313 } 2314 2315 if (!rank) { /* procs[0] */ 2316 /* calculate the number of nonzeros on each processor */ 2317 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2318 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2319 for (i=0; i<size; i++) { 2320 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2321 procsnz[i] += rowlengths[j]; 2322 } 2323 } 2324 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2325 2326 /* determine max buffer needed and allocate it */ 2327 maxnz = 0; 2328 for (i=0; i<size; i++) { 2329 maxnz = PetscMax(maxnz,procsnz[i]); 2330 } 2331 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2332 2333 /* read in my part of the matrix column indices */ 2334 nz = procsnz[0]; 2335 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2336 mycols = ibuf; 2337 if (size == 1) nz -= extra_rows; 2338 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2339 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2340 2341 /* read in every ones (except the last) and ship off */ 2342 for (i=1; i<size-1; i++) { 2343 nz = procsnz[i]; 2344 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2345 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2346 } 2347 /* read in the stuff for the last proc */ 2348 if (size != 1) { 2349 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2350 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2351 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2352 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2353 } 2354 ierr = PetscFree(cols);CHKERRQ(ierr); 2355 } else { /* procs[i], i>0 */ 2356 /* determine buffer space needed for message */ 2357 nz = 0; 2358 for (i=0; i<m; i++) { 2359 nz += locrowlens[i]; 2360 } 2361 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2362 mycols = ibuf; 2363 /* receive message of column indices*/ 2364 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2365 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2366 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2367 } 2368 2369 /* loop over local rows, determining number of off diagonal entries */ 2370 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2371 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2372 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2373 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2374 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2375 rowcount = 0; 2376 nzcount = 0; 2377 for (i=0; i<mbs; i++) { 2378 dcount = 0; 2379 odcount = 0; 2380 for (j=0; j<bs; j++) { 2381 kmax = locrowlens[rowcount]; 2382 for (k=0; k<kmax; k++) { 2383 tmp = mycols[nzcount++]/bs; /* block col. index */ 2384 if (!mask[tmp]) { 2385 mask[tmp] = 1; 2386 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2387 else masked1[dcount++] = tmp; /* entry in diag portion */ 2388 } 2389 } 2390 rowcount++; 2391 } 2392 2393 dlens[i] = dcount; /* d_nzz[i] */ 2394 odlens[i] = odcount; /* o_nzz[i] */ 2395 2396 /* zero out the mask elements we set */ 2397 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2398 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2399 } 2400 if (!sizesset) { 2401 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2402 } 2403 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2404 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2405 2406 if (!rank) { 2407 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2408 /* read in my part of the matrix numerical values */ 2409 nz = procsnz[0]; 2410 vals = buf; 2411 mycols = ibuf; 2412 if (size == 1) nz -= extra_rows; 2413 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2414 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2415 2416 /* insert into matrix */ 2417 jj = rstart*bs; 2418 for (i=0; i<m; i++) { 2419 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2420 mycols += locrowlens[i]; 2421 vals += locrowlens[i]; 2422 jj++; 2423 } 2424 2425 /* read in other processors (except the last one) and ship out */ 2426 for (i=1; i<size-1; i++) { 2427 nz = procsnz[i]; 2428 vals = buf; 2429 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2430 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2431 } 2432 /* the last proc */ 2433 if (size != 1){ 2434 nz = procsnz[i] - extra_rows; 2435 vals = buf; 2436 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2437 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2438 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2439 } 2440 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2441 2442 } else { 2443 /* receive numeric values */ 2444 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2445 2446 /* receive message of values*/ 2447 vals = buf; 2448 mycols = ibuf; 2449 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2450 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2451 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2452 2453 /* insert into matrix */ 2454 jj = rstart*bs; 2455 for (i=0; i<m; i++) { 2456 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2457 mycols += locrowlens[i]; 2458 vals += locrowlens[i]; 2459 jj++; 2460 } 2461 } 2462 2463 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2464 ierr = PetscFree(buf);CHKERRQ(ierr); 2465 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2466 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2467 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2468 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2469 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2470 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2471 PetscFunctionReturn(0); 2472 } 2473 2474 #undef __FUNCT__ 2475 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2476 /*XXXXX@ 2477 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2478 2479 Input Parameters: 2480 . mat - the matrix 2481 . fact - factor 2482 2483 Not Collective on Mat, each process can have a different hash factor 2484 2485 Level: advanced 2486 2487 Notes: 2488 This can also be set by the command line option: -mat_use_hash_table fact 2489 2490 .keywords: matrix, hashtable, factor, HT 2491 2492 .seealso: MatSetOption() 2493 @XXXXX*/ 2494 2495 2496 #undef __FUNCT__ 2497 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2498 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2499 { 2500 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2501 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2502 PetscReal atmp; 2503 PetscReal *work,*svalues,*rvalues; 2504 PetscErrorCode ierr; 2505 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2506 PetscMPIInt rank,size; 2507 PetscInt *rowners_bs,dest,count,source; 2508 PetscScalar *va; 2509 MatScalar *ba; 2510 MPI_Status stat; 2511 2512 PetscFunctionBegin; 2513 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2514 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2515 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2516 2517 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2518 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2519 2520 bs = A->rmap->bs; 2521 mbs = a->mbs; 2522 Mbs = a->Mbs; 2523 ba = b->a; 2524 bi = b->i; 2525 bj = b->j; 2526 2527 /* find ownerships */ 2528 rowners_bs = A->rmap->range; 2529 2530 /* each proc creates an array to be distributed */ 2531 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2532 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2533 2534 /* row_max for B */ 2535 if (rank != size-1){ 2536 for (i=0; i<mbs; i++) { 2537 ncols = bi[1] - bi[0]; bi++; 2538 brow = bs*i; 2539 for (j=0; j<ncols; j++){ 2540 bcol = bs*(*bj); 2541 for (kcol=0; kcol<bs; kcol++){ 2542 col = bcol + kcol; /* local col index */ 2543 col += rowners_bs[rank+1]; /* global col index */ 2544 for (krow=0; krow<bs; krow++){ 2545 atmp = PetscAbsScalar(*ba); ba++; 2546 row = brow + krow; /* local row index */ 2547 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2548 if (work[col] < atmp) work[col] = atmp; 2549 } 2550 } 2551 bj++; 2552 } 2553 } 2554 2555 /* send values to its owners */ 2556 for (dest=rank+1; dest<size; dest++){ 2557 svalues = work + rowners_bs[dest]; 2558 count = rowners_bs[dest+1]-rowners_bs[dest]; 2559 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2560 } 2561 } 2562 2563 /* receive values */ 2564 if (rank){ 2565 rvalues = work; 2566 count = rowners_bs[rank+1]-rowners_bs[rank]; 2567 for (source=0; source<rank; source++){ 2568 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2569 /* process values */ 2570 for (i=0; i<count; i++){ 2571 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2572 } 2573 } 2574 } 2575 2576 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2577 ierr = PetscFree(work);CHKERRQ(ierr); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 #undef __FUNCT__ 2582 #define __FUNCT__ "MatSOR_MPISBAIJ" 2583 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2584 { 2585 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2586 PetscErrorCode ierr; 2587 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2588 PetscScalar *x,*ptr,*from; 2589 Vec bb1; 2590 const PetscScalar *b; 2591 2592 PetscFunctionBegin; 2593 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2594 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2595 2596 if (flag == SOR_APPLY_UPPER) { 2597 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2598 PetscFunctionReturn(0); 2599 } 2600 2601 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2602 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2603 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2604 its--; 2605 } 2606 2607 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2608 while (its--){ 2609 2610 /* lower triangular part: slvec0b = - B^T*xx */ 2611 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2612 2613 /* copy xx into slvec0a */ 2614 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2615 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2616 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2617 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2618 2619 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2620 2621 /* copy bb into slvec1a */ 2622 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2623 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2624 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2625 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2626 2627 /* set slvec1b = 0 */ 2628 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2629 2630 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2631 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2632 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2633 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2634 2635 /* upper triangular part: bb1 = bb1 - B*x */ 2636 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2637 2638 /* local diagonal sweep */ 2639 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2640 } 2641 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2642 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2643 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2644 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2645 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2646 } else if (flag & SOR_EISENSTAT) { 2647 Vec xx1; 2648 PetscBool hasop; 2649 const PetscScalar *diag; 2650 PetscScalar *sl,scale = (omega - 2.0)/omega; 2651 PetscInt i,n; 2652 2653 if (!mat->xx1) { 2654 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2655 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2656 } 2657 xx1 = mat->xx1; 2658 bb1 = mat->bb1; 2659 2660 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2661 2662 if (!mat->diag) { 2663 /* this is wrong for same matrix with new nonzero values */ 2664 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2665 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2666 } 2667 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2668 2669 if (hasop) { 2670 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2671 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2672 } else { 2673 /* 2674 These two lines are replaced by code that may be a bit faster for a good compiler 2675 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2676 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2677 */ 2678 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2679 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2680 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2681 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2682 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2683 if (omega == 1.0) { 2684 for (i=0; i<n; i++) { 2685 sl[i] = b[i] - diag[i]*x[i]; 2686 } 2687 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2688 } else { 2689 for (i=0; i<n; i++) { 2690 sl[i] = b[i] + scale*diag[i]*x[i]; 2691 } 2692 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2693 } 2694 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2695 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2696 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2697 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2698 } 2699 2700 /* multiply off-diagonal portion of matrix */ 2701 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2702 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2703 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2704 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2705 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2706 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2707 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2708 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2709 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2710 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2711 2712 /* local sweep */ 2713 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2714 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2715 } else { 2716 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2717 } 2718 PetscFunctionReturn(0); 2719 } 2720 2721 #undef __FUNCT__ 2722 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2723 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2724 { 2725 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2726 PetscErrorCode ierr; 2727 Vec lvec1,bb1; 2728 2729 PetscFunctionBegin; 2730 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2731 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2732 2733 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2734 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2735 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2736 its--; 2737 } 2738 2739 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2740 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2741 while (its--){ 2742 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2743 2744 /* lower diagonal part: bb1 = bb - B^T*xx */ 2745 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2746 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2747 2748 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2749 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2750 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2751 2752 /* upper diagonal part: bb1 = bb1 - B*x */ 2753 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2754 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2755 2756 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2757 2758 /* diagonal sweep */ 2759 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2760 } 2761 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2762 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2763 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2764 PetscFunctionReturn(0); 2765 } 2766 2767 #undef __FUNCT__ 2768 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2769 /*@ 2770 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2771 CSR format the local rows. 2772 2773 Collective on MPI_Comm 2774 2775 Input Parameters: 2776 + comm - MPI communicator 2777 . bs - the block size, only a block size of 1 is supported 2778 . m - number of local rows (Cannot be PETSC_DECIDE) 2779 . n - This value should be the same as the local size used in creating the 2780 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2781 calculated if N is given) For square matrices n is almost always m. 2782 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2783 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2784 . i - row indices 2785 . j - column indices 2786 - a - matrix values 2787 2788 Output Parameter: 2789 . mat - the matrix 2790 2791 Level: intermediate 2792 2793 Notes: 2794 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2795 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2796 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2797 2798 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2799 2800 .keywords: matrix, aij, compressed row, sparse, parallel 2801 2802 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2803 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 2804 @*/ 2805 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2806 { 2807 PetscErrorCode ierr; 2808 2809 2810 PetscFunctionBegin; 2811 if (i[0]) { 2812 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2813 } 2814 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2815 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2816 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2817 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2818 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2819 PetscFunctionReturn(0); 2820 } 2821 2822 2823 #undef __FUNCT__ 2824 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2825 /*@C 2826 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2827 (the default parallel PETSc format). 2828 2829 Collective on MPI_Comm 2830 2831 Input Parameters: 2832 + A - the matrix 2833 . bs - the block size 2834 . i - the indices into j for the start of each local row (starts with zero) 2835 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2836 - v - optional values in the matrix 2837 2838 Level: developer 2839 2840 .keywords: matrix, aij, compressed row, sparse, parallel 2841 2842 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2843 @*/ 2844 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2845 { 2846 PetscErrorCode ierr; 2847 2848 PetscFunctionBegin; 2849 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2850 PetscFunctionReturn(0); 2851 } 2852 2853 2854