1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 EXTERN_C_BEGIN 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 26 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 27 { 28 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 29 PetscErrorCode ierr; 30 31 PetscFunctionBegin; 32 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 33 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN_C_BEGIN 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 EXTERN_C_END 52 53 54 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 55 { \ 56 \ 57 brow = row/bs; \ 58 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 59 rmax = aimax[brow]; nrow = ailen[brow]; \ 60 bcol = col/bs; \ 61 ridx = row % bs; cidx = col % bs; \ 62 low = 0; high = nrow; \ 63 while (high-low > 3) { \ 64 t = (low+high)/2; \ 65 if (rp[t] > bcol) high = t; \ 66 else low = t; \ 67 } \ 68 for (_i=low; _i<high; _i++) { \ 69 if (rp[_i] > bcol) break; \ 70 if (rp[_i] == bcol) { \ 71 bap = ap + bs2*_i + bs*cidx + ridx; \ 72 if (addv == ADD_VALUES) *bap += value; \ 73 else *bap = value; \ 74 goto a_noinsert; \ 75 } \ 76 } \ 77 if (a->nonew == 1) goto a_noinsert; \ 78 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 79 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 80 N = nrow++ - 1; \ 81 /* shift up all the later entries in this row */ \ 82 for (ii=N; ii>=_i; ii--) { \ 83 rp[ii+1] = rp[ii]; \ 84 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 85 } \ 86 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 87 rp[_i] = bcol; \ 88 ap[bs2*_i + bs*cidx + ridx] = value; \ 89 a_noinsert:; \ 90 ailen[brow] = nrow; \ 91 } 92 93 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 94 { \ 95 brow = row/bs; \ 96 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 97 rmax = bimax[brow]; nrow = bilen[brow]; \ 98 bcol = col/bs; \ 99 ridx = row % bs; cidx = col % bs; \ 100 low = 0; high = nrow; \ 101 while (high-low > 3) { \ 102 t = (low+high)/2; \ 103 if (rp[t] > bcol) high = t; \ 104 else low = t; \ 105 } \ 106 for (_i=low; _i<high; _i++) { \ 107 if (rp[_i] > bcol) break; \ 108 if (rp[_i] == bcol) { \ 109 bap = ap + bs2*_i + bs*cidx + ridx; \ 110 if (addv == ADD_VALUES) *bap += value; \ 111 else *bap = value; \ 112 goto b_noinsert; \ 113 } \ 114 } \ 115 if (b->nonew == 1) goto b_noinsert; \ 116 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 117 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 118 N = nrow++ - 1; \ 119 /* shift up all the later entries in this row */ \ 120 for (ii=N; ii>=_i; ii--) { \ 121 rp[ii+1] = rp[ii]; \ 122 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 } \ 124 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 125 rp[_i] = bcol; \ 126 ap[bs2*_i + bs*cidx + ridx] = value; \ 127 b_noinsert:; \ 128 bilen[brow] = nrow; \ 129 } 130 131 /* Only add/insert a(i,j) with i<=j (blocks). 132 Any a(i,j) with i>j input by user is ingored. 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetValues_MPISBAIJ" 136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 137 { 138 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 139 MatScalar value; 140 PetscBool roworiented = baij->roworiented; 141 PetscErrorCode ierr; 142 PetscInt i,j,row,col; 143 PetscInt rstart_orig=mat->rmap->rstart; 144 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 145 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 146 147 /* Some Variables required in the macro */ 148 Mat A = baij->A; 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 150 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 151 MatScalar *aa=a->a; 152 153 Mat B = baij->B; 154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 155 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 156 MatScalar *ba=b->a; 157 158 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 159 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 160 MatScalar *ap,*bap; 161 162 /* for stash */ 163 PetscInt n_loc, *in_loc = PETSC_NULL; 164 MatScalar *v_loc = PETSC_NULL; 165 166 PetscFunctionBegin; 167 if (v) PetscValidScalarPointer(v,6); 168 if (!baij->donotstash){ 169 if (n > baij->n_loc) { 170 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 171 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 172 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 174 baij->n_loc = n; 175 } 176 in_loc = baij->in_loc; 177 v_loc = baij->v_loc; 178 } 179 180 for (i=0; i<m; i++) { 181 if (im[i] < 0) continue; 182 #if defined(PETSC_USE_DEBUG) 183 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 184 #endif 185 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 186 row = im[i] - rstart_orig; /* local row index */ 187 for (j=0; j<n; j++) { 188 if (im[i]/bs > in[j]/bs){ 189 if (a->ignore_ltriangular){ 190 continue; /* ignore lower triangular blocks */ 191 } else { 192 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 193 } 194 } 195 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 196 col = in[j] - cstart_orig; /* local col index */ 197 brow = row/bs; bcol = col/bs; 198 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 199 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 200 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 201 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 202 } else if (in[j] < 0) continue; 203 #if defined(PETSC_USE_DEBUG) 204 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 205 #endif 206 else { /* off-diag entry (B) */ 207 if (mat->was_assembled) { 208 if (!baij->colmap) { 209 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 210 } 211 #if defined (PETSC_USE_CTABLE) 212 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 213 col = col - 1; 214 #else 215 col = baij->colmap[in[j]/bs] - 1; 216 #endif 217 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 218 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 219 col = in[j]; 220 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 221 B = baij->B; 222 b = (Mat_SeqBAIJ*)(B)->data; 223 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 224 ba=b->a; 225 } else col += in[j]%bs; 226 } else col = in[j]; 227 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 228 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } 231 } 232 } else { /* off processor entry */ 233 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 234 if (!baij->donotstash) { 235 n_loc = 0; 236 for (j=0; j<n; j++){ 237 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 238 in_loc[n_loc] = in[j]; 239 if (roworiented) { 240 v_loc[n_loc] = v[i*n+j]; 241 } else { 242 v_loc[n_loc] = v[j*m+i]; 243 } 244 n_loc++; 245 } 246 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 247 } 248 } 249 } 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 256 { 257 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 258 const MatScalar *value; 259 MatScalar *barray=baij->barray; 260 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 261 PetscErrorCode ierr; 262 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 263 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 264 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 265 266 PetscFunctionBegin; 267 if(!barray) { 268 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 269 baij->barray = barray; 270 } 271 272 if (roworiented) { 273 stepval = (n-1)*bs; 274 } else { 275 stepval = (m-1)*bs; 276 } 277 for (i=0; i<m; i++) { 278 if (im[i] < 0) continue; 279 #if defined(PETSC_USE_DEBUG) 280 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 281 #endif 282 if (im[i] >= rstart && im[i] < rend) { 283 row = im[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (im[i] > in[j]) { 286 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 287 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 288 } 289 /* If NumCol = 1 then a copy is not required */ 290 if ((roworiented) && (n == 1)) { 291 barray = (MatScalar*) v + i*bs2; 292 } else if((!roworiented) && (m == 1)) { 293 barray = (MatScalar*) v + j*bs2; 294 } else { /* Here a copy is required */ 295 if (roworiented) { 296 value = v + i*(stepval+bs)*bs + j*bs; 297 } else { 298 value = v + j*(stepval+bs)*bs + i*bs; 299 } 300 for (ii=0; ii<bs; ii++,value+=stepval) { 301 for (jj=0; jj<bs; jj++) { 302 *barray++ = *value++; 303 } 304 } 305 barray -=bs2; 306 } 307 308 if (in[j] >= cstart && in[j] < cend){ 309 col = in[j] - cstart; 310 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 311 } 312 else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined (PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined (PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } 343 else col = in[j]; 344 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 345 } 346 } 347 } else { 348 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 349 if (!baij->donotstash) { 350 if (roworiented) { 351 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 352 } else { 353 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 354 } 355 } 356 } 357 } 358 PetscFunctionReturn(0); 359 } 360 361 #undef __FUNCT__ 362 #define __FUNCT__ "MatGetValues_MPISBAIJ" 363 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 364 { 365 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 366 PetscErrorCode ierr; 367 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 368 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 369 370 PetscFunctionBegin; 371 for (i=0; i<m; i++) { 372 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 373 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 374 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 375 row = idxm[i] - bsrstart; 376 for (j=0; j<n; j++) { 377 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 378 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 379 if (idxn[j] >= bscstart && idxn[j] < bscend){ 380 col = idxn[j] - bscstart; 381 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 382 } else { 383 if (!baij->colmap) { 384 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 385 } 386 #if defined (PETSC_USE_CTABLE) 387 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 388 data --; 389 #else 390 data = baij->colmap[idxn[j]/bs]-1; 391 #endif 392 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 393 else { 394 col = data + idxn[j]%bs; 395 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 396 } 397 } 398 } 399 } else { 400 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 401 } 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatNorm_MPISBAIJ" 408 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 409 { 410 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscReal sum[2],*lnorm2; 413 414 PetscFunctionBegin; 415 if (baij->size == 1) { 416 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 417 } else { 418 if (type == NORM_FROBENIUS) { 419 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 420 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 421 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 422 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 423 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 424 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 425 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 426 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 427 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 428 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 429 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 430 PetscReal *rsum,*rsum2,vabs; 431 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 432 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 433 MatScalar *v; 434 435 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 436 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 437 /* Amat */ 438 v = amat->a; jj = amat->j; 439 for (brow=0; brow<mbs; brow++) { 440 grow = bs*(rstart + brow); 441 nz = amat->i[brow+1] - amat->i[brow]; 442 for (bcol=0; bcol<nz; bcol++){ 443 gcol = bs*(rstart + *jj); jj++; 444 for (col=0; col<bs; col++){ 445 for (row=0; row<bs; row++){ 446 vabs = PetscAbsScalar(*v); v++; 447 rsum[gcol+col] += vabs; 448 /* non-diagonal block */ 449 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 450 } 451 } 452 } 453 } 454 /* Bmat */ 455 v = bmat->a; jj = bmat->j; 456 for (brow=0; brow<mbs; brow++) { 457 grow = bs*(rstart + brow); 458 nz = bmat->i[brow+1] - bmat->i[brow]; 459 for (bcol=0; bcol<nz; bcol++){ 460 gcol = bs*garray[*jj]; jj++; 461 for (col=0; col<bs; col++){ 462 for (row=0; row<bs; row++){ 463 vabs = PetscAbsScalar(*v); v++; 464 rsum[gcol+col] += vabs; 465 rsum[grow+row] += vabs; 466 } 467 } 468 } 469 } 470 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 471 *norm = 0.0; 472 for (col=0; col<mat->cmap->N; col++) { 473 if (rsum2[col] > *norm) *norm = rsum2[col]; 474 } 475 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 476 } else { 477 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 478 } 479 } 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 485 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 486 { 487 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 488 PetscErrorCode ierr; 489 PetscInt nstash,reallocs; 490 InsertMode addv; 491 492 PetscFunctionBegin; 493 if (baij->donotstash || mat->nooffprocentries) { 494 PetscFunctionReturn(0); 495 } 496 497 /* make sure all processors are either in INSERTMODE or ADDMODE */ 498 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 499 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 500 mat->insertmode = addv; /* in case this processor had no cache */ 501 502 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 503 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 504 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 505 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 506 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 507 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNCT__ 512 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 513 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 514 { 515 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 516 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 517 PetscErrorCode ierr; 518 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 519 PetscInt *row,*col; 520 PetscBool other_disassembled; 521 PetscMPIInt n; 522 PetscBool r1,r2,r3; 523 MatScalar *val; 524 InsertMode addv = mat->insertmode; 525 526 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 527 PetscFunctionBegin; 528 529 if (!baij->donotstash && !mat->nooffprocentries) { 530 while (1) { 531 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 532 if (!flg) break; 533 534 for (i=0; i<n;) { 535 /* Now identify the consecutive vals belonging to the same row */ 536 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 537 if (j < n) ncols = j-i; 538 else ncols = n-i; 539 /* Now assemble all these values with a single function call */ 540 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 541 i = j; 542 } 543 } 544 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 545 /* Now process the block-stash. Since the values are stashed column-oriented, 546 set the roworiented flag to column oriented, and after MatSetValues() 547 restore the original flags */ 548 r1 = baij->roworiented; 549 r2 = a->roworiented; 550 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 551 baij->roworiented = PETSC_FALSE; 552 a->roworiented = PETSC_FALSE; 553 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 554 while (1) { 555 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 556 if (!flg) break; 557 558 for (i=0; i<n;) { 559 /* Now identify the consecutive vals belonging to the same row */ 560 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 561 if (j < n) ncols = j-i; 562 else ncols = n-i; 563 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 564 i = j; 565 } 566 } 567 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 568 baij->roworiented = r1; 569 a->roworiented = r2; 570 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 571 } 572 573 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 575 576 /* determine if any processor has disassembled, if so we must 577 also disassemble ourselfs, in order that we may reassemble. */ 578 /* 579 if nonzero structure of submatrix B cannot change then we know that 580 no processor disassembled thus we can skip this stuff 581 */ 582 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 583 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 584 if (mat->was_assembled && !other_disassembled) { 585 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 586 } 587 } 588 589 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 590 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 591 } 592 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr); 593 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 594 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 595 596 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 597 baij->rowvalues = 0; 598 599 PetscFunctionReturn(0); 600 } 601 602 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 603 #undef __FUNCT__ 604 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 605 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 606 { 607 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 608 PetscErrorCode ierr; 609 PetscInt bs = mat->rmap->bs; 610 PetscMPIInt size = baij->size,rank = baij->rank; 611 PetscBool iascii,isdraw; 612 PetscViewer sviewer; 613 PetscViewerFormat format; 614 615 PetscFunctionBegin; 616 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 617 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 618 if (iascii) { 619 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 620 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 621 MatInfo info; 622 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 623 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 624 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 625 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 626 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 628 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 630 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 631 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 632 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 633 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 634 PetscFunctionReturn(0); 635 } else if (format == PETSC_VIEWER_ASCII_INFO) { 636 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 639 PetscFunctionReturn(0); 640 } 641 } 642 643 if (isdraw) { 644 PetscDraw draw; 645 PetscBool isnull; 646 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 647 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 648 } 649 650 if (size == 1) { 651 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 652 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 653 } else { 654 /* assemble the entire matrix onto first processor. */ 655 Mat A; 656 Mat_SeqSBAIJ *Aloc; 657 Mat_SeqBAIJ *Bloc; 658 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 659 MatScalar *a; 660 661 /* Should this be the same type as mat? */ 662 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 663 if (!rank) { 664 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 665 } else { 666 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 667 } 668 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 669 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 670 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 671 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 672 673 /* copy over the A part */ 674 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 675 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 676 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 677 678 for (i=0; i<mbs; i++) { 679 rvals[0] = bs*(baij->rstartbs + i); 680 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 681 for (j=ai[i]; j<ai[i+1]; j++) { 682 col = (baij->cstartbs+aj[j])*bs; 683 for (k=0; k<bs; k++) { 684 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 685 col++; a += bs; 686 } 687 } 688 } 689 /* copy over the B part */ 690 Bloc = (Mat_SeqBAIJ*)baij->B->data; 691 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 692 for (i=0; i<mbs; i++) { 693 694 rvals[0] = bs*(baij->rstartbs + i); 695 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 696 for (j=ai[i]; j<ai[i+1]; j++) { 697 col = baij->garray[aj[j]]*bs; 698 for (k=0; k<bs; k++) { 699 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 700 col++; a += bs; 701 } 702 } 703 } 704 ierr = PetscFree(rvals);CHKERRQ(ierr); 705 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 707 /* 708 Everyone has to call to draw the matrix since the graphics waits are 709 synchronized across all processors that share the PetscDraw object 710 */ 711 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 712 if (!rank) { 713 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 714 /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/ 715 PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ); 716 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 717 } 718 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 719 ierr = MatDestroy(&A);CHKERRQ(ierr); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 #undef __FUNCT__ 725 #define __FUNCT__ "MatView_MPISBAIJ" 726 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 727 { 728 PetscErrorCode ierr; 729 PetscBool iascii,isdraw,issocket,isbinary; 730 731 PetscFunctionBegin; 732 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 733 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 734 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 735 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 736 if (iascii || isdraw || issocket || isbinary) { 737 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 738 } else { 739 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 740 } 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatDestroy_MPISBAIJ" 746 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 747 { 748 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 #if defined(PETSC_USE_LOG) 753 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 754 #endif 755 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 756 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 757 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 758 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 759 #if defined (PETSC_USE_CTABLE) 760 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 761 #else 762 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 763 #endif 764 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 765 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 766 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 767 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 768 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 769 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 770 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 771 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 772 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 773 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 774 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 775 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 776 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 777 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 778 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 779 #if defined(PETSC_USE_REAL_MAT_SINGLE) 780 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 781 #endif 782 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 783 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 784 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 785 ierr = PetscFree(mat->data);CHKERRQ(ierr); 786 787 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 790 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 792 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 798 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 799 { 800 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 801 PetscErrorCode ierr; 802 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 803 PetscScalar *x,*from; 804 805 PetscFunctionBegin; 806 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 807 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 808 809 /* diagonal part */ 810 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 811 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 812 813 /* subdiagonal part */ 814 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 815 816 /* copy x into the vec slvec0 */ 817 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 818 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 819 820 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 821 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 822 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 823 824 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 /* supperdiagonal part */ 827 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 828 PetscFunctionReturn(0); 829 } 830 831 #undef __FUNCT__ 832 #define __FUNCT__ "MatMult_MPISBAIJ" 833 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 834 { 835 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 836 PetscErrorCode ierr; 837 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 838 PetscScalar *x,*from; 839 840 PetscFunctionBegin; 841 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 842 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 843 844 /* diagonal part */ 845 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 846 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 847 848 /* subdiagonal part */ 849 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 850 851 /* copy x into the vec slvec0 */ 852 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 853 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 854 855 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 856 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 857 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 858 859 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 861 /* supperdiagonal part */ 862 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 868 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 869 { 870 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 871 PetscErrorCode ierr; 872 PetscInt nt; 873 874 PetscFunctionBegin; 875 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 876 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 877 878 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 879 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 880 881 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 /* do diagonal part */ 883 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 884 /* do supperdiagonal part */ 885 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 887 /* do subdiagonal part */ 888 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 889 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 890 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 891 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 897 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 898 { 899 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 900 PetscErrorCode ierr; 901 PetscInt mbs=a->mbs,bs=A->rmap->bs; 902 PetscScalar *x,*from,zero=0.0; 903 904 PetscFunctionBegin; 905 /* 906 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 907 PetscSynchronizedFlush(((PetscObject)A)->comm); 908 */ 909 /* diagonal part */ 910 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 911 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 912 913 /* subdiagonal part */ 914 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 915 916 /* copy x into the vec slvec0 */ 917 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 918 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 919 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 920 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 921 922 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 924 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 925 926 /* supperdiagonal part */ 927 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 928 929 PetscFunctionReturn(0); 930 } 931 932 #undef __FUNCT__ 933 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 934 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 935 { 936 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 937 PetscErrorCode ierr; 938 939 PetscFunctionBegin; 940 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 /* do diagonal part */ 942 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 943 /* do supperdiagonal part */ 944 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 946 947 /* do subdiagonal part */ 948 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 949 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 950 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 951 952 PetscFunctionReturn(0); 953 } 954 955 /* 956 This only works correctly for square matrices where the subblock A->A is the 957 diagonal block 958 */ 959 #undef __FUNCT__ 960 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 961 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 962 { 963 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 964 PetscErrorCode ierr; 965 966 PetscFunctionBegin; 967 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 968 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatScale_MPISBAIJ" 974 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 975 { 976 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 977 PetscErrorCode ierr; 978 979 PetscFunctionBegin; 980 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 981 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatGetRow_MPISBAIJ" 987 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 988 { 989 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 990 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 991 PetscErrorCode ierr; 992 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 993 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 994 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 995 996 PetscFunctionBegin; 997 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 998 mat->getrowactive = PETSC_TRUE; 999 1000 if (!mat->rowvalues && (idx || v)) { 1001 /* 1002 allocate enough space to hold information from the longest row. 1003 */ 1004 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1005 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1006 PetscInt max = 1,mbs = mat->mbs,tmp; 1007 for (i=0; i<mbs; i++) { 1008 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1009 if (max < tmp) { max = tmp; } 1010 } 1011 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1012 } 1013 1014 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1015 lrow = row - brstart; /* local row index */ 1016 1017 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1018 if (!v) {pvA = 0; pvB = 0;} 1019 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1020 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1021 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1022 nztot = nzA + nzB; 1023 1024 cmap = mat->garray; 1025 if (v || idx) { 1026 if (nztot) { 1027 /* Sort by increasing column numbers, assuming A and B already sorted */ 1028 PetscInt imark = -1; 1029 if (v) { 1030 *v = v_p = mat->rowvalues; 1031 for (i=0; i<nzB; i++) { 1032 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1033 else break; 1034 } 1035 imark = i; 1036 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1037 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1038 } 1039 if (idx) { 1040 *idx = idx_p = mat->rowindices; 1041 if (imark > -1) { 1042 for (i=0; i<imark; i++) { 1043 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1044 } 1045 } else { 1046 for (i=0; i<nzB; i++) { 1047 if (cmap[cworkB[i]/bs] < cstart) 1048 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1049 else break; 1050 } 1051 imark = i; 1052 } 1053 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1054 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1055 } 1056 } else { 1057 if (idx) *idx = 0; 1058 if (v) *v = 0; 1059 } 1060 } 1061 *nz = nztot; 1062 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1063 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1069 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1070 { 1071 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1072 1073 PetscFunctionBegin; 1074 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1075 baij->getrowactive = PETSC_FALSE; 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1081 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1082 { 1083 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1084 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1085 1086 PetscFunctionBegin; 1087 aA->getrow_utriangular = PETSC_TRUE; 1088 PetscFunctionReturn(0); 1089 } 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1092 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1093 { 1094 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1095 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1096 1097 PetscFunctionBegin; 1098 aA->getrow_utriangular = PETSC_FALSE; 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1104 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1105 { 1106 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1111 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1117 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1118 { 1119 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1120 PetscErrorCode ierr; 1121 1122 PetscFunctionBegin; 1123 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1124 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1130 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1131 { 1132 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 1135 PetscFunctionBegin; 1136 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1137 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1143 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1144 { 1145 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1146 Mat A = a->A,B = a->B; 1147 PetscErrorCode ierr; 1148 PetscReal isend[5],irecv[5]; 1149 1150 PetscFunctionBegin; 1151 info->block_size = (PetscReal)matin->rmap->bs; 1152 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1153 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1154 isend[3] = info->memory; isend[4] = info->mallocs; 1155 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1156 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1157 isend[3] += info->memory; isend[4] += info->mallocs; 1158 if (flag == MAT_LOCAL) { 1159 info->nz_used = isend[0]; 1160 info->nz_allocated = isend[1]; 1161 info->nz_unneeded = isend[2]; 1162 info->memory = isend[3]; 1163 info->mallocs = isend[4]; 1164 } else if (flag == MAT_GLOBAL_MAX) { 1165 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1166 info->nz_used = irecv[0]; 1167 info->nz_allocated = irecv[1]; 1168 info->nz_unneeded = irecv[2]; 1169 info->memory = irecv[3]; 1170 info->mallocs = irecv[4]; 1171 } else if (flag == MAT_GLOBAL_SUM) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else { 1179 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1180 } 1181 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1182 info->fill_ratio_needed = 0; 1183 info->factor_mallocs = 0; 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1189 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1190 { 1191 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1192 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 switch (op) { 1197 case MAT_NEW_NONZERO_LOCATIONS: 1198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1199 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1200 case MAT_KEEP_NONZERO_PATTERN: 1201 case MAT_NEW_NONZERO_LOCATION_ERR: 1202 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1203 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1204 break; 1205 case MAT_ROW_ORIENTED: 1206 a->roworiented = flg; 1207 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1208 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1209 break; 1210 case MAT_NEW_DIAGONALS: 1211 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1212 break; 1213 case MAT_IGNORE_OFF_PROC_ENTRIES: 1214 a->donotstash = flg; 1215 break; 1216 case MAT_USE_HASH_TABLE: 1217 a->ht_flag = flg; 1218 break; 1219 case MAT_HERMITIAN: 1220 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1221 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1222 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1223 break; 1224 case MAT_SPD: 1225 A->spd_set = PETSC_TRUE; 1226 A->spd = flg; 1227 if (flg) { 1228 A->symmetric = PETSC_TRUE; 1229 A->structurally_symmetric = PETSC_TRUE; 1230 A->symmetric_set = PETSC_TRUE; 1231 A->structurally_symmetric_set = PETSC_TRUE; 1232 } 1233 break; 1234 case MAT_SYMMETRIC: 1235 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1236 break; 1237 case MAT_STRUCTURALLY_SYMMETRIC: 1238 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1239 break; 1240 case MAT_SYMMETRY_ETERNAL: 1241 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1242 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1243 break; 1244 case MAT_IGNORE_LOWER_TRIANGULAR: 1245 aA->ignore_ltriangular = flg; 1246 break; 1247 case MAT_ERROR_LOWER_TRIANGULAR: 1248 aA->ignore_ltriangular = flg; 1249 break; 1250 case MAT_GETROW_UPPERTRIANGULAR: 1251 aA->getrow_utriangular = flg; 1252 break; 1253 default: 1254 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1261 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1262 { 1263 PetscErrorCode ierr; 1264 PetscFunctionBegin; 1265 if (MAT_INITIAL_MATRIX || *B != A) { 1266 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1267 } 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1273 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1274 { 1275 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1276 Mat a=baij->A, b=baij->B; 1277 PetscErrorCode ierr; 1278 PetscInt nv,m,n; 1279 PetscBool flg; 1280 1281 PetscFunctionBegin; 1282 if (ll != rr){ 1283 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1284 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1285 } 1286 if (!ll) PetscFunctionReturn(0); 1287 1288 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1289 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1290 1291 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1292 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1293 1294 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1295 1296 /* left diagonalscale the off-diagonal part */ 1297 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1298 1299 /* scale the diagonal part */ 1300 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1301 1302 /* right diagonalscale the off-diagonal part */ 1303 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1304 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1310 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1311 { 1312 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1313 PetscErrorCode ierr; 1314 1315 PetscFunctionBegin; 1316 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatEqual_MPISBAIJ" 1324 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1325 { 1326 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1327 Mat a,b,c,d; 1328 PetscBool flg; 1329 PetscErrorCode ierr; 1330 1331 PetscFunctionBegin; 1332 a = matA->A; b = matA->B; 1333 c = matB->A; d = matB->B; 1334 1335 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1336 if (flg) { 1337 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1338 } 1339 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatCopy_MPISBAIJ" 1345 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1346 { 1347 PetscErrorCode ierr; 1348 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1349 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1350 1351 PetscFunctionBegin; 1352 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1353 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1354 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1355 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1356 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1357 } else { 1358 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1359 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1360 } 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1366 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1367 { 1368 PetscErrorCode ierr; 1369 1370 PetscFunctionBegin; 1371 ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1377 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1378 { 1379 PetscErrorCode ierr; 1380 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1381 PetscBLASInt bnz,one=1; 1382 Mat_SeqSBAIJ *xa,*ya; 1383 Mat_SeqBAIJ *xb,*yb; 1384 1385 PetscFunctionBegin; 1386 if (str == SAME_NONZERO_PATTERN) { 1387 PetscScalar alpha = a; 1388 xa = (Mat_SeqSBAIJ *)xx->A->data; 1389 ya = (Mat_SeqSBAIJ *)yy->A->data; 1390 bnz = PetscBLASIntCast(xa->nz); 1391 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1392 xb = (Mat_SeqBAIJ *)xx->B->data; 1393 yb = (Mat_SeqBAIJ *)yy->B->data; 1394 bnz = PetscBLASIntCast(xb->nz); 1395 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1396 } else { 1397 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1398 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1399 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ" 1406 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs) 1407 { 1408 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1409 PetscInt rbs,cbs; 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1414 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1415 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1416 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1417 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs); 1418 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1424 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1425 { 1426 PetscErrorCode ierr; 1427 PetscInt i; 1428 PetscBool flg; 1429 1430 PetscFunctionBegin; 1431 for (i=0; i<n; i++) { 1432 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1433 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1434 } 1435 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 1440 /* -------------------------------------------------------------------*/ 1441 static struct _MatOps MatOps_Values = { 1442 MatSetValues_MPISBAIJ, 1443 MatGetRow_MPISBAIJ, 1444 MatRestoreRow_MPISBAIJ, 1445 MatMult_MPISBAIJ, 1446 /* 4*/ MatMultAdd_MPISBAIJ, 1447 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1448 MatMultAdd_MPISBAIJ, 1449 0, 1450 0, 1451 0, 1452 /*10*/ 0, 1453 0, 1454 0, 1455 MatSOR_MPISBAIJ, 1456 MatTranspose_MPISBAIJ, 1457 /*15*/ MatGetInfo_MPISBAIJ, 1458 MatEqual_MPISBAIJ, 1459 MatGetDiagonal_MPISBAIJ, 1460 MatDiagonalScale_MPISBAIJ, 1461 MatNorm_MPISBAIJ, 1462 /*20*/ MatAssemblyBegin_MPISBAIJ, 1463 MatAssemblyEnd_MPISBAIJ, 1464 MatSetOption_MPISBAIJ, 1465 MatZeroEntries_MPISBAIJ, 1466 /*24*/ 0, 1467 0, 1468 0, 1469 0, 1470 0, 1471 /*29*/ MatSetUpPreallocation_MPISBAIJ, 1472 0, 1473 0, 1474 0, 1475 0, 1476 /*34*/ MatDuplicate_MPISBAIJ, 1477 0, 1478 0, 1479 0, 1480 0, 1481 /*39*/ MatAXPY_MPISBAIJ, 1482 MatGetSubMatrices_MPISBAIJ, 1483 MatIncreaseOverlap_MPISBAIJ, 1484 MatGetValues_MPISBAIJ, 1485 MatCopy_MPISBAIJ, 1486 /*44*/ 0, 1487 MatScale_MPISBAIJ, 1488 0, 1489 0, 1490 0, 1491 /*49*/ MatSetBlockSize_MPISBAIJ, 1492 0, 1493 0, 1494 0, 1495 0, 1496 /*54*/ 0, 1497 0, 1498 MatSetUnfactored_MPISBAIJ, 1499 0, 1500 MatSetValuesBlocked_MPISBAIJ, 1501 /*59*/ 0, 1502 0, 1503 0, 1504 0, 1505 0, 1506 /*64*/ 0, 1507 0, 1508 0, 1509 0, 1510 0, 1511 /*69*/ MatGetRowMaxAbs_MPISBAIJ, 1512 0, 1513 0, 1514 0, 1515 0, 1516 /*74*/ 0, 1517 0, 1518 0, 1519 0, 1520 0, 1521 /*79*/ 0, 1522 0, 1523 0, 1524 0, 1525 MatLoad_MPISBAIJ, 1526 /*84*/ 0, 1527 0, 1528 0, 1529 0, 1530 0, 1531 /*89*/ 0, 1532 0, 1533 0, 1534 0, 1535 0, 1536 /*94*/ 0, 1537 0, 1538 0, 1539 0, 1540 0, 1541 /*99*/ 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 /*104*/0, 1547 MatRealPart_MPISBAIJ, 1548 MatImaginaryPart_MPISBAIJ, 1549 MatGetRowUpperTriangular_MPISBAIJ, 1550 MatRestoreRowUpperTriangular_MPISBAIJ, 1551 /*109*/0, 1552 0, 1553 0, 1554 0, 1555 0, 1556 /*114*/0, 1557 0, 1558 0, 1559 0, 1560 0, 1561 /*119*/0, 1562 0, 1563 0, 1564 0 1565 }; 1566 1567 1568 EXTERN_C_BEGIN 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1571 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1572 { 1573 PetscFunctionBegin; 1574 *a = ((Mat_MPISBAIJ *)A->data)->A; 1575 PetscFunctionReturn(0); 1576 } 1577 EXTERN_C_END 1578 1579 EXTERN_C_BEGIN 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1582 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1583 { 1584 Mat_MPISBAIJ *b; 1585 PetscErrorCode ierr; 1586 PetscInt i,mbs,Mbs,newbs = PetscAbs(bs); 1587 1588 PetscFunctionBegin; 1589 if (bs < 0){ 1590 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1591 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1592 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1593 bs = PetscAbs(bs); 1594 } 1595 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 1596 bs = newbs; 1597 1598 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1599 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1600 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1601 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1602 1603 B->rmap->bs = B->cmap->bs = bs; 1604 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1605 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1606 1607 if (d_nnz) { 1608 for (i=0; i<B->rmap->n/bs; i++) { 1609 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1610 } 1611 } 1612 if (o_nnz) { 1613 for (i=0; i<B->rmap->n/bs; i++) { 1614 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1615 } 1616 } 1617 1618 b = (Mat_MPISBAIJ*)B->data; 1619 mbs = B->rmap->n/bs; 1620 Mbs = B->rmap->N/bs; 1621 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1622 1623 B->rmap->bs = bs; 1624 b->bs2 = bs*bs; 1625 b->mbs = mbs; 1626 b->nbs = mbs; 1627 b->Mbs = Mbs; 1628 b->Nbs = Mbs; 1629 1630 for (i=0; i<=b->size; i++) { 1631 b->rangebs[i] = B->rmap->range[i]/bs; 1632 } 1633 b->rstartbs = B->rmap->rstart/bs; 1634 b->rendbs = B->rmap->rend/bs; 1635 1636 b->cstartbs = B->cmap->rstart/bs; 1637 b->cendbs = B->cmap->rend/bs; 1638 1639 if (!B->preallocated) { 1640 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1641 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1642 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1643 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1644 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1645 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1646 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1647 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1648 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1649 } 1650 1651 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1652 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1653 B->preallocated = PETSC_TRUE; 1654 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1655 PetscFunctionReturn(0); 1656 } 1657 EXTERN_C_END 1658 1659 EXTERN_C_BEGIN 1660 #undef __FUNCT__ 1661 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1662 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1663 { 1664 PetscInt m,rstart,cstart,cend; 1665 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1666 const PetscInt *JJ=0; 1667 PetscScalar *values=0; 1668 PetscErrorCode ierr; 1669 1670 PetscFunctionBegin; 1671 1672 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1673 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1674 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1675 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1676 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1677 m = B->rmap->n/bs; 1678 rstart = B->rmap->rstart/bs; 1679 cstart = B->cmap->rstart/bs; 1680 cend = B->cmap->rend/bs; 1681 1682 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1683 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 1684 for (i=0; i<m; i++) { 1685 nz = ii[i+1] - ii[i]; 1686 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1687 nz_max = PetscMax(nz_max,nz); 1688 JJ = jj + ii[i]; 1689 for (j=0; j<nz; j++) { 1690 if (*JJ >= cstart) break; 1691 JJ++; 1692 } 1693 d = 0; 1694 for (; j<nz; j++) { 1695 if (*JJ++ >= cend) break; 1696 d++; 1697 } 1698 d_nnz[i] = d; 1699 o_nnz[i] = nz - d; 1700 } 1701 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1702 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1703 1704 values = (PetscScalar*)V; 1705 if (!values) { 1706 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 1707 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1708 } 1709 for (i=0; i<m; i++) { 1710 PetscInt row = i + rstart; 1711 PetscInt ncols = ii[i+1] - ii[i]; 1712 const PetscInt *icols = jj + ii[i]; 1713 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1714 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1715 } 1716 1717 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1718 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1719 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1720 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1721 PetscFunctionReturn(0); 1722 } 1723 EXTERN_C_END 1724 1725 EXTERN_C_BEGIN 1726 #if defined(PETSC_HAVE_MUMPS) 1727 extern PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1728 #endif 1729 #if defined(PETSC_HAVE_SPOOLES) 1730 extern PetscErrorCode MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*); 1731 #endif 1732 #if defined(PETSC_HAVE_PASTIX) 1733 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1734 #endif 1735 EXTERN_C_END 1736 1737 /*MC 1738 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1739 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1740 the matrix is stored. 1741 1742 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1743 can call MatSetOption(Mat, MAT_HERMITIAN); 1744 1745 Options Database Keys: 1746 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1747 1748 Level: beginner 1749 1750 .seealso: MatCreateMPISBAIJ 1751 M*/ 1752 1753 EXTERN_C_BEGIN 1754 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,const MatType,MatReuse,Mat*); 1755 EXTERN_C_END 1756 1757 EXTERN_C_BEGIN 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatCreate_MPISBAIJ" 1760 PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1761 { 1762 Mat_MPISBAIJ *b; 1763 PetscErrorCode ierr; 1764 PetscBool flg; 1765 1766 PetscFunctionBegin; 1767 1768 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1769 B->data = (void*)b; 1770 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1771 1772 B->ops->destroy = MatDestroy_MPISBAIJ; 1773 B->ops->view = MatView_MPISBAIJ; 1774 B->assembled = PETSC_FALSE; 1775 1776 B->insertmode = NOT_SET_VALUES; 1777 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1778 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1779 1780 /* build local table of row and column ownerships */ 1781 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1782 1783 /* build cache for off array entries formed */ 1784 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1785 b->donotstash = PETSC_FALSE; 1786 b->colmap = PETSC_NULL; 1787 b->garray = PETSC_NULL; 1788 b->roworiented = PETSC_TRUE; 1789 1790 /* stuff used in block assembly */ 1791 b->barray = 0; 1792 1793 /* stuff used for matrix vector multiply */ 1794 b->lvec = 0; 1795 b->Mvctx = 0; 1796 b->slvec0 = 0; 1797 b->slvec0b = 0; 1798 b->slvec1 = 0; 1799 b->slvec1a = 0; 1800 b->slvec1b = 0; 1801 b->sMvctx = 0; 1802 1803 /* stuff for MatGetRow() */ 1804 b->rowindices = 0; 1805 b->rowvalues = 0; 1806 b->getrowactive = PETSC_FALSE; 1807 1808 /* hash table stuff */ 1809 b->ht = 0; 1810 b->hd = 0; 1811 b->ht_size = 0; 1812 b->ht_flag = PETSC_FALSE; 1813 b->ht_fact = 0; 1814 b->ht_total_ct = 0; 1815 b->ht_insert_ct = 0; 1816 1817 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1818 b->ijonly = PETSC_FALSE; 1819 1820 b->in_loc = 0; 1821 b->v_loc = 0; 1822 b->n_loc = 0; 1823 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1824 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1825 if (flg) { 1826 PetscReal fact = 1.39; 1827 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1828 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1829 if (fact <= 1.0) fact = 1.39; 1830 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1831 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1832 } 1833 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1834 1835 #if defined(PETSC_HAVE_PASTIX) 1836 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1837 "MatGetFactor_mpisbaij_pastix", 1838 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1839 #endif 1840 #if defined(PETSC_HAVE_MUMPS) 1841 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1842 "MatGetFactor_sbaij_mumps", 1843 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1844 #endif 1845 #if defined(PETSC_HAVE_SPOOLES) 1846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 1847 "MatGetFactor_mpisbaij_spooles", 1848 MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr); 1849 #endif 1850 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1851 "MatStoreValues_MPISBAIJ", 1852 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1853 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1854 "MatRetrieveValues_MPISBAIJ", 1855 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1856 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1857 "MatGetDiagonalBlock_MPISBAIJ", 1858 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1859 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1860 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1861 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1862 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C", 1863 "MatMPISBAIJSetPreallocationCSR_MPISBAIJ", 1864 MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1865 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C", 1866 "MatConvert_MPISBAIJ_MPISBSTRM", 1867 MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1868 1869 B->symmetric = PETSC_TRUE; 1870 B->structurally_symmetric = PETSC_TRUE; 1871 B->symmetric_set = PETSC_TRUE; 1872 B->structurally_symmetric_set = PETSC_TRUE; 1873 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1874 PetscFunctionReturn(0); 1875 } 1876 EXTERN_C_END 1877 1878 /*MC 1879 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1880 1881 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1882 and MATMPISBAIJ otherwise. 1883 1884 Options Database Keys: 1885 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1886 1887 Level: beginner 1888 1889 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1890 M*/ 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1894 /*@C 1895 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1896 the user should preallocate the matrix storage by setting the parameters 1897 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1898 performance can be increased by more than a factor of 50. 1899 1900 Collective on Mat 1901 1902 Input Parameters: 1903 + A - the matrix 1904 . bs - size of blockk 1905 . d_nz - number of block nonzeros per block row in diagonal portion of local 1906 submatrix (same for all local rows) 1907 . d_nnz - array containing the number of block nonzeros in the various block rows 1908 in the upper triangular and diagonal part of the in diagonal portion of the local 1909 (possibly different for each block row) or PETSC_NULL. You must leave room 1910 for the diagonal entry even if it is zero. 1911 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1912 submatrix (same for all local rows). 1913 - o_nnz - array containing the number of nonzeros in the various block rows of the 1914 off-diagonal portion of the local submatrix that is right of the diagonal 1915 (possibly different for each block row) or PETSC_NULL. 1916 1917 1918 Options Database Keys: 1919 . -mat_no_unroll - uses code that does not unroll the loops in the 1920 block calculations (much slower) 1921 . -mat_block_size - size of the blocks to use 1922 1923 Notes: 1924 1925 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1926 than it must be used on all processors that share the object for that argument. 1927 1928 If the *_nnz parameter is given then the *_nz parameter is ignored 1929 1930 Storage Information: 1931 For a square global matrix we define each processor's diagonal portion 1932 to be its local rows and the corresponding columns (a square submatrix); 1933 each processor's off-diagonal portion encompasses the remainder of the 1934 local matrix (a rectangular submatrix). 1935 1936 The user can specify preallocated storage for the diagonal part of 1937 the local submatrix with either d_nz or d_nnz (not both). Set 1938 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1939 memory allocation. Likewise, specify preallocated storage for the 1940 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1941 1942 You can call MatGetInfo() to get information on how effective the preallocation was; 1943 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1944 You can also run with the option -info and look for messages with the string 1945 malloc in them to see if additional memory allocation was needed. 1946 1947 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1948 the figure below we depict these three local rows and all columns (0-11). 1949 1950 .vb 1951 0 1 2 3 4 5 6 7 8 9 10 11 1952 ------------------- 1953 row 3 | . . . d d d o o o o o o 1954 row 4 | . . . d d d o o o o o o 1955 row 5 | . . . d d d o o o o o o 1956 ------------------- 1957 .ve 1958 1959 Thus, any entries in the d locations are stored in the d (diagonal) 1960 submatrix, and any entries in the o locations are stored in the 1961 o (off-diagonal) submatrix. Note that the d matrix is stored in 1962 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1963 1964 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1965 plus the diagonal part of the d matrix, 1966 and o_nz should indicate the number of block nonzeros per row in the o matrix 1967 1968 In general, for PDE problems in which most nonzeros are near the diagonal, 1969 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1970 or you will get TERRIBLE performance; see the users' manual chapter on 1971 matrices. 1972 1973 Level: intermediate 1974 1975 .keywords: matrix, block, aij, compressed row, sparse, parallel 1976 1977 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1978 @*/ 1979 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1980 { 1981 PetscErrorCode ierr; 1982 1983 PetscFunctionBegin; 1984 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1985 PetscValidType(B,1); 1986 PetscValidLogicalCollectiveInt(B,bs,2); 1987 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 1988 PetscFunctionReturn(0); 1989 } 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "MatCreateMPISBAIJ" 1993 /*@C 1994 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1995 (block compressed row). For good matrix assembly performance 1996 the user should preallocate the matrix storage by setting the parameters 1997 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1998 performance can be increased by more than a factor of 50. 1999 2000 Collective on MPI_Comm 2001 2002 Input Parameters: 2003 + comm - MPI communicator 2004 . bs - size of blockk 2005 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2006 This value should be the same as the local size used in creating the 2007 y vector for the matrix-vector product y = Ax. 2008 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2009 This value should be the same as the local size used in creating the 2010 x vector for the matrix-vector product y = Ax. 2011 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2012 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2013 . d_nz - number of block nonzeros per block row in diagonal portion of local 2014 submatrix (same for all local rows) 2015 . d_nnz - array containing the number of block nonzeros in the various block rows 2016 in the upper triangular portion of the in diagonal portion of the local 2017 (possibly different for each block block row) or PETSC_NULL. 2018 You must leave room for the diagonal entry even if it is zero. 2019 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2020 submatrix (same for all local rows). 2021 - o_nnz - array containing the number of nonzeros in the various block rows of the 2022 off-diagonal portion of the local submatrix (possibly different for 2023 each block row) or PETSC_NULL. 2024 2025 Output Parameter: 2026 . A - the matrix 2027 2028 Options Database Keys: 2029 . -mat_no_unroll - uses code that does not unroll the loops in the 2030 block calculations (much slower) 2031 . -mat_block_size - size of the blocks to use 2032 . -mat_mpi - use the parallel matrix data structures even on one processor 2033 (defaults to using SeqBAIJ format on one processor) 2034 2035 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2036 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2037 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2038 2039 Notes: 2040 The number of rows and columns must be divisible by blocksize. 2041 This matrix type does not support complex Hermitian operation. 2042 2043 The user MUST specify either the local or global matrix dimensions 2044 (possibly both). 2045 2046 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2047 than it must be used on all processors that share the object for that argument. 2048 2049 If the *_nnz parameter is given then the *_nz parameter is ignored 2050 2051 Storage Information: 2052 For a square global matrix we define each processor's diagonal portion 2053 to be its local rows and the corresponding columns (a square submatrix); 2054 each processor's off-diagonal portion encompasses the remainder of the 2055 local matrix (a rectangular submatrix). 2056 2057 The user can specify preallocated storage for the diagonal part of 2058 the local submatrix with either d_nz or d_nnz (not both). Set 2059 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2060 memory allocation. Likewise, specify preallocated storage for the 2061 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2062 2063 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2064 the figure below we depict these three local rows and all columns (0-11). 2065 2066 .vb 2067 0 1 2 3 4 5 6 7 8 9 10 11 2068 ------------------- 2069 row 3 | . . . d d d o o o o o o 2070 row 4 | . . . d d d o o o o o o 2071 row 5 | . . . d d d o o o o o o 2072 ------------------- 2073 .ve 2074 2075 Thus, any entries in the d locations are stored in the d (diagonal) 2076 submatrix, and any entries in the o locations are stored in the 2077 o (off-diagonal) submatrix. Note that the d matrix is stored in 2078 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2079 2080 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2081 plus the diagonal part of the d matrix, 2082 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2083 In general, for PDE problems in which most nonzeros are near the diagonal, 2084 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2085 or you will get TERRIBLE performance; see the users' manual chapter on 2086 matrices. 2087 2088 Level: intermediate 2089 2090 .keywords: matrix, block, aij, compressed row, sparse, parallel 2091 2092 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2093 @*/ 2094 2095 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2096 { 2097 PetscErrorCode ierr; 2098 PetscMPIInt size; 2099 2100 PetscFunctionBegin; 2101 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2102 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2103 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2104 if (size > 1) { 2105 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2106 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2107 } else { 2108 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2109 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2110 } 2111 PetscFunctionReturn(0); 2112 } 2113 2114 2115 #undef __FUNCT__ 2116 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2117 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2118 { 2119 Mat mat; 2120 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2121 PetscErrorCode ierr; 2122 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2123 PetscScalar *array; 2124 2125 PetscFunctionBegin; 2126 *newmat = 0; 2127 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2128 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2129 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2130 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2131 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2132 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2133 2134 mat->factortype = matin->factortype; 2135 mat->preallocated = PETSC_TRUE; 2136 mat->assembled = PETSC_TRUE; 2137 mat->insertmode = NOT_SET_VALUES; 2138 2139 a = (Mat_MPISBAIJ*)mat->data; 2140 a->bs2 = oldmat->bs2; 2141 a->mbs = oldmat->mbs; 2142 a->nbs = oldmat->nbs; 2143 a->Mbs = oldmat->Mbs; 2144 a->Nbs = oldmat->Nbs; 2145 2146 2147 a->size = oldmat->size; 2148 a->rank = oldmat->rank; 2149 a->donotstash = oldmat->donotstash; 2150 a->roworiented = oldmat->roworiented; 2151 a->rowindices = 0; 2152 a->rowvalues = 0; 2153 a->getrowactive = PETSC_FALSE; 2154 a->barray = 0; 2155 a->rstartbs = oldmat->rstartbs; 2156 a->rendbs = oldmat->rendbs; 2157 a->cstartbs = oldmat->cstartbs; 2158 a->cendbs = oldmat->cendbs; 2159 2160 /* hash table stuff */ 2161 a->ht = 0; 2162 a->hd = 0; 2163 a->ht_size = 0; 2164 a->ht_flag = oldmat->ht_flag; 2165 a->ht_fact = oldmat->ht_fact; 2166 a->ht_total_ct = 0; 2167 a->ht_insert_ct = 0; 2168 2169 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2170 if (oldmat->colmap) { 2171 #if defined (PETSC_USE_CTABLE) 2172 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2173 #else 2174 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2175 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2176 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2177 #endif 2178 } else a->colmap = 0; 2179 2180 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2181 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2182 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2183 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2184 } else a->garray = 0; 2185 2186 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2187 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2188 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2189 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2190 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2191 2192 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2193 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2194 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2195 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2196 2197 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2198 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2199 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2200 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2201 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2202 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2203 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2204 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2205 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2206 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2207 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2208 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2209 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2210 2211 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2212 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2213 a->sMvctx = oldmat->sMvctx; 2214 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2215 2216 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2217 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2218 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2219 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2220 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2221 *newmat = mat; 2222 PetscFunctionReturn(0); 2223 } 2224 2225 #undef __FUNCT__ 2226 #define __FUNCT__ "MatLoad_MPISBAIJ" 2227 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2228 { 2229 PetscErrorCode ierr; 2230 PetscInt i,nz,j,rstart,rend; 2231 PetscScalar *vals,*buf; 2232 MPI_Comm comm = ((PetscObject)viewer)->comm; 2233 MPI_Status status; 2234 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs; 2235 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2236 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2237 PetscInt bs=1,Mbs,mbs,extra_rows; 2238 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2239 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2240 int fd; 2241 2242 PetscFunctionBegin; 2243 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2244 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2245 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2246 2247 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2249 if (!rank) { 2250 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2251 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2252 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2253 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2254 } 2255 2256 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2257 2258 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2259 M = header[1]; N = header[2]; 2260 2261 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2262 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2263 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2264 2265 /* If global sizes are set, check if they are consistent with that given in the file */ 2266 if (sizesset) { 2267 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2268 } 2269 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2270 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2271 2272 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2273 2274 /* 2275 This code adds extra rows to make sure the number of rows is 2276 divisible by the blocksize 2277 */ 2278 Mbs = M/bs; 2279 extra_rows = bs - M + bs*(Mbs); 2280 if (extra_rows == bs) extra_rows = 0; 2281 else Mbs++; 2282 if (extra_rows &&!rank) { 2283 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2284 } 2285 2286 /* determine ownership of all rows */ 2287 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2288 mbs = Mbs/size + ((Mbs % size) > rank); 2289 m = mbs*bs; 2290 } else { /* User Set */ 2291 m = newmat->rmap->n; 2292 mbs = m/bs; 2293 } 2294 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2295 mmbs = PetscMPIIntCast(mbs); 2296 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2297 rowners[0] = 0; 2298 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2299 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2300 rstart = rowners[rank]; 2301 rend = rowners[rank+1]; 2302 2303 /* distribute row lengths to all processors */ 2304 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2305 if (!rank) { 2306 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2307 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2308 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2309 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2310 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2311 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2312 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2313 } else { 2314 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2315 } 2316 2317 if (!rank) { /* procs[0] */ 2318 /* calculate the number of nonzeros on each processor */ 2319 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2320 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2321 for (i=0; i<size; i++) { 2322 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2323 procsnz[i] += rowlengths[j]; 2324 } 2325 } 2326 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2327 2328 /* determine max buffer needed and allocate it */ 2329 maxnz = 0; 2330 for (i=0; i<size; i++) { 2331 maxnz = PetscMax(maxnz,procsnz[i]); 2332 } 2333 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2334 2335 /* read in my part of the matrix column indices */ 2336 nz = procsnz[0]; 2337 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2338 mycols = ibuf; 2339 if (size == 1) nz -= extra_rows; 2340 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2341 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2342 2343 /* read in every ones (except the last) and ship off */ 2344 for (i=1; i<size-1; i++) { 2345 nz = procsnz[i]; 2346 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2347 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2348 } 2349 /* read in the stuff for the last proc */ 2350 if (size != 1) { 2351 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2352 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2353 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2354 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2355 } 2356 ierr = PetscFree(cols);CHKERRQ(ierr); 2357 } else { /* procs[i], i>0 */ 2358 /* determine buffer space needed for message */ 2359 nz = 0; 2360 for (i=0; i<m; i++) { 2361 nz += locrowlens[i]; 2362 } 2363 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2364 mycols = ibuf; 2365 /* receive message of column indices*/ 2366 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2367 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2368 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2369 } 2370 2371 /* loop over local rows, determining number of off diagonal entries */ 2372 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2373 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2374 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2375 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2376 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2377 rowcount = 0; 2378 nzcount = 0; 2379 for (i=0; i<mbs; i++) { 2380 dcount = 0; 2381 odcount = 0; 2382 for (j=0; j<bs; j++) { 2383 kmax = locrowlens[rowcount]; 2384 for (k=0; k<kmax; k++) { 2385 tmp = mycols[nzcount++]/bs; /* block col. index */ 2386 if (!mask[tmp]) { 2387 mask[tmp] = 1; 2388 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2389 else masked1[dcount++] = tmp; /* entry in diag portion */ 2390 } 2391 } 2392 rowcount++; 2393 } 2394 2395 dlens[i] = dcount; /* d_nzz[i] */ 2396 odlens[i] = odcount; /* o_nzz[i] */ 2397 2398 /* zero out the mask elements we set */ 2399 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2400 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2401 } 2402 if (!sizesset) { 2403 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2404 } 2405 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2406 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2407 2408 if (!rank) { 2409 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2410 /* read in my part of the matrix numerical values */ 2411 nz = procsnz[0]; 2412 vals = buf; 2413 mycols = ibuf; 2414 if (size == 1) nz -= extra_rows; 2415 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2416 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2417 2418 /* insert into matrix */ 2419 jj = rstart*bs; 2420 for (i=0; i<m; i++) { 2421 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2422 mycols += locrowlens[i]; 2423 vals += locrowlens[i]; 2424 jj++; 2425 } 2426 2427 /* read in other processors (except the last one) and ship out */ 2428 for (i=1; i<size-1; i++) { 2429 nz = procsnz[i]; 2430 vals = buf; 2431 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2432 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2433 } 2434 /* the last proc */ 2435 if (size != 1){ 2436 nz = procsnz[i] - extra_rows; 2437 vals = buf; 2438 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2439 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2440 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2441 } 2442 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2443 2444 } else { 2445 /* receive numeric values */ 2446 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2447 2448 /* receive message of values*/ 2449 vals = buf; 2450 mycols = ibuf; 2451 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2452 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2453 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2454 2455 /* insert into matrix */ 2456 jj = rstart*bs; 2457 for (i=0; i<m; i++) { 2458 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2459 mycols += locrowlens[i]; 2460 vals += locrowlens[i]; 2461 jj++; 2462 } 2463 } 2464 2465 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2466 ierr = PetscFree(buf);CHKERRQ(ierr); 2467 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2468 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2469 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2470 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2471 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2472 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2473 PetscFunctionReturn(0); 2474 } 2475 2476 #undef __FUNCT__ 2477 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2478 /*XXXXX@ 2479 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2480 2481 Input Parameters: 2482 . mat - the matrix 2483 . fact - factor 2484 2485 Not Collective on Mat, each process can have a different hash factor 2486 2487 Level: advanced 2488 2489 Notes: 2490 This can also be set by the command line option: -mat_use_hash_table fact 2491 2492 .keywords: matrix, hashtable, factor, HT 2493 2494 .seealso: MatSetOption() 2495 @XXXXX*/ 2496 2497 2498 #undef __FUNCT__ 2499 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2500 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2501 { 2502 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2503 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2504 PetscReal atmp; 2505 PetscReal *work,*svalues,*rvalues; 2506 PetscErrorCode ierr; 2507 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2508 PetscMPIInt rank,size; 2509 PetscInt *rowners_bs,dest,count,source; 2510 PetscScalar *va; 2511 MatScalar *ba; 2512 MPI_Status stat; 2513 2514 PetscFunctionBegin; 2515 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2516 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2517 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2518 2519 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2520 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2521 2522 bs = A->rmap->bs; 2523 mbs = a->mbs; 2524 Mbs = a->Mbs; 2525 ba = b->a; 2526 bi = b->i; 2527 bj = b->j; 2528 2529 /* find ownerships */ 2530 rowners_bs = A->rmap->range; 2531 2532 /* each proc creates an array to be distributed */ 2533 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2534 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2535 2536 /* row_max for B */ 2537 if (rank != size-1){ 2538 for (i=0; i<mbs; i++) { 2539 ncols = bi[1] - bi[0]; bi++; 2540 brow = bs*i; 2541 for (j=0; j<ncols; j++){ 2542 bcol = bs*(*bj); 2543 for (kcol=0; kcol<bs; kcol++){ 2544 col = bcol + kcol; /* local col index */ 2545 col += rowners_bs[rank+1]; /* global col index */ 2546 for (krow=0; krow<bs; krow++){ 2547 atmp = PetscAbsScalar(*ba); ba++; 2548 row = brow + krow; /* local row index */ 2549 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2550 if (work[col] < atmp) work[col] = atmp; 2551 } 2552 } 2553 bj++; 2554 } 2555 } 2556 2557 /* send values to its owners */ 2558 for (dest=rank+1; dest<size; dest++){ 2559 svalues = work + rowners_bs[dest]; 2560 count = rowners_bs[dest+1]-rowners_bs[dest]; 2561 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2562 } 2563 } 2564 2565 /* receive values */ 2566 if (rank){ 2567 rvalues = work; 2568 count = rowners_bs[rank+1]-rowners_bs[rank]; 2569 for (source=0; source<rank; source++){ 2570 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2571 /* process values */ 2572 for (i=0; i<count; i++){ 2573 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2574 } 2575 } 2576 } 2577 2578 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2579 ierr = PetscFree(work);CHKERRQ(ierr); 2580 PetscFunctionReturn(0); 2581 } 2582 2583 #undef __FUNCT__ 2584 #define __FUNCT__ "MatSOR_MPISBAIJ" 2585 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2586 { 2587 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2588 PetscErrorCode ierr; 2589 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2590 PetscScalar *x,*ptr,*from; 2591 Vec bb1; 2592 const PetscScalar *b; 2593 2594 PetscFunctionBegin; 2595 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2596 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2597 2598 if (flag == SOR_APPLY_UPPER) { 2599 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2604 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2605 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2606 its--; 2607 } 2608 2609 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2610 while (its--){ 2611 2612 /* lower triangular part: slvec0b = - B^T*xx */ 2613 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2614 2615 /* copy xx into slvec0a */ 2616 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2617 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2618 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2619 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2620 2621 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2622 2623 /* copy bb into slvec1a */ 2624 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2625 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2626 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2627 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2628 2629 /* set slvec1b = 0 */ 2630 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2631 2632 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2633 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2634 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2635 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2636 2637 /* upper triangular part: bb1 = bb1 - B*x */ 2638 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2639 2640 /* local diagonal sweep */ 2641 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2642 } 2643 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2644 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2645 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2646 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){ 2647 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2648 } else if (flag & SOR_EISENSTAT) { 2649 Vec xx1; 2650 PetscBool hasop; 2651 const PetscScalar *diag; 2652 PetscScalar *sl,scale = (omega - 2.0)/omega; 2653 PetscInt i,n; 2654 2655 if (!mat->xx1) { 2656 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2657 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2658 } 2659 xx1 = mat->xx1; 2660 bb1 = mat->bb1; 2661 2662 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2663 2664 if (!mat->diag) { 2665 /* this is wrong for same matrix with new nonzero values */ 2666 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2667 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2668 } 2669 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2670 2671 if (hasop) { 2672 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2673 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2674 } else { 2675 /* 2676 These two lines are replaced by code that may be a bit faster for a good compiler 2677 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2678 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2679 */ 2680 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2681 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2682 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2683 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2684 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2685 if (omega == 1.0) { 2686 for (i=0; i<n; i++) { 2687 sl[i] = b[i] - diag[i]*x[i]; 2688 } 2689 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2690 } else { 2691 for (i=0; i<n; i++) { 2692 sl[i] = b[i] + scale*diag[i]*x[i]; 2693 } 2694 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2695 } 2696 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2697 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2698 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2699 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2700 } 2701 2702 /* multiply off-diagonal portion of matrix */ 2703 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2704 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2705 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2706 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2707 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2708 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2709 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2710 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2711 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2712 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2713 2714 /* local sweep */ 2715 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2716 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2717 } else { 2718 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2719 } 2720 PetscFunctionReturn(0); 2721 } 2722 2723 #undef __FUNCT__ 2724 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2725 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2726 { 2727 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2728 PetscErrorCode ierr; 2729 Vec lvec1,bb1; 2730 2731 PetscFunctionBegin; 2732 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2733 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2734 2735 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2736 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2737 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2738 its--; 2739 } 2740 2741 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2742 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2743 while (its--){ 2744 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2745 2746 /* lower diagonal part: bb1 = bb - B^T*xx */ 2747 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2748 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2749 2750 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2751 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2752 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2753 2754 /* upper diagonal part: bb1 = bb1 - B*x */ 2755 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2756 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2757 2758 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2759 2760 /* diagonal sweep */ 2761 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2762 } 2763 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2764 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2765 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2766 PetscFunctionReturn(0); 2767 } 2768 2769 #undef __FUNCT__ 2770 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2771 /*@ 2772 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2773 CSR format the local rows. 2774 2775 Collective on MPI_Comm 2776 2777 Input Parameters: 2778 + comm - MPI communicator 2779 . bs - the block size, only a block size of 1 is supported 2780 . m - number of local rows (Cannot be PETSC_DECIDE) 2781 . n - This value should be the same as the local size used in creating the 2782 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2783 calculated if N is given) For square matrices n is almost always m. 2784 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2785 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2786 . i - row indices 2787 . j - column indices 2788 - a - matrix values 2789 2790 Output Parameter: 2791 . mat - the matrix 2792 2793 Level: intermediate 2794 2795 Notes: 2796 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2797 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2798 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2799 2800 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2801 2802 .keywords: matrix, aij, compressed row, sparse, parallel 2803 2804 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2805 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 2806 @*/ 2807 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2808 { 2809 PetscErrorCode ierr; 2810 2811 2812 PetscFunctionBegin; 2813 if (i[0]) { 2814 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2815 } 2816 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2817 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2818 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2819 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2820 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2821 PetscFunctionReturn(0); 2822 } 2823 2824 2825 #undef __FUNCT__ 2826 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2827 /*@C 2828 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2829 (the default parallel PETSc format). 2830 2831 Collective on MPI_Comm 2832 2833 Input Parameters: 2834 + A - the matrix 2835 . bs - the block size 2836 . i - the indices into j for the start of each local row (starts with zero) 2837 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2838 - v - optional values in the matrix 2839 2840 Level: developer 2841 2842 .keywords: matrix, aij, compressed row, sparse, parallel 2843 2844 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2845 @*/ 2846 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2847 { 2848 PetscErrorCode ierr; 2849 2850 PetscFunctionBegin; 2851 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2852 PetscFunctionReturn(0); 2853 } 2854 2855 2856