1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 if (nrow >= rmax) { \ 104 /* there is no extra room in row, therefore enlarge */ \ 105 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 106 MatScalar *new_a; \ 107 \ 108 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 109 \ 110 /* malloc new storage space */ \ 111 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \ 112 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 113 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 114 new_i = new_j + new_nz; \ 115 \ 116 /* copy over old data into new slots */ \ 117 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 118 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 119 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 120 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 121 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 122 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 124 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 125 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 126 /* free up old matrix storage */ \ 127 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 128 if (!a->singlemalloc) { \ 129 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 130 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 131 } \ 132 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 133 a->singlemalloc = PETSC_TRUE; \ 134 \ 135 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 136 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 137 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 138 a->maxnz += bs2*CHUNKSIZE; \ 139 a->reallocs++; \ 140 a->nz++; \ 141 } \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 a_noinsert:; \ 152 ailen[brow] = nrow; \ 153 } 154 #ifndef MatSetValues_SeqBAIJ_B_Private 155 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 156 { \ 157 brow = row/bs; \ 158 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 159 rmax = bimax[brow]; nrow = bilen[brow]; \ 160 bcol = col/bs; \ 161 ridx = row % bs; cidx = col % bs; \ 162 low = 0; high = nrow; \ 163 while (high-low > 3) { \ 164 t = (low+high)/2; \ 165 if (rp[t] > bcol) high = t; \ 166 else low = t; \ 167 } \ 168 for (_i=low; _i<high; _i++) { \ 169 if (rp[_i] > bcol) break; \ 170 if (rp[_i] == bcol) { \ 171 bap = ap + bs2*_i + bs*cidx + ridx; \ 172 if (addv == ADD_VALUES) *bap += value; \ 173 else *bap = value; \ 174 goto b_noinsert; \ 175 } \ 176 } \ 177 if (b->nonew == 1) goto b_noinsert; \ 178 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 179 if (nrow >= rmax) { \ 180 /* there is no extra room in row, therefore enlarge */ \ 181 PetscInt new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 182 MatScalar *new_a; \ 183 \ 184 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 185 \ 186 /* malloc new storage space */ \ 187 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \ 188 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 189 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 190 new_i = new_j + new_nz; \ 191 \ 192 /* copy over old data into new slots */ \ 193 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 194 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 195 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 196 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 197 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 198 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 199 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 200 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 201 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 202 /* free up old matrix storage */ \ 203 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 204 if (!b->singlemalloc) { \ 205 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 206 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 207 } \ 208 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 209 b->singlemalloc = PETSC_TRUE; \ 210 \ 211 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 212 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 213 ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 214 b->maxnz += bs2*CHUNKSIZE; \ 215 b->reallocs++; \ 216 b->nz++; \ 217 } \ 218 N = nrow++ - 1; \ 219 /* shift up all the later entries in this row */ \ 220 for (ii=N; ii>=_i; ii--) { \ 221 rp[ii+1] = rp[ii]; \ 222 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 223 } \ 224 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 225 rp[_i] = bcol; \ 226 ap[bs2*_i + bs*cidx + ridx] = value; \ 227 b_noinsert:; \ 228 bilen[brow] = nrow; \ 229 } 230 #endif 231 232 #if defined(PETSC_USE_MAT_SINGLE) 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatSetValues_MPISBAIJ" 235 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 236 { 237 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 238 PetscErrorCode ierr; 239 PetscInt i,N = m*n; 240 MatScalar *vsingle; 241 242 PetscFunctionBegin; 243 if (N > b->setvalueslen) { 244 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 245 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 246 b->setvalueslen = N; 247 } 248 vsingle = b->setvaluescopy; 249 250 for (i=0; i<N; i++) { 251 vsingle[i] = v[i]; 252 } 253 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 254 PetscFunctionReturn(0); 255 } 256 257 #undef __FUNCT__ 258 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 259 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 260 { 261 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 262 PetscErrorCode ierr; 263 PetscInt i,N = m*n*b->bs2; 264 MatScalar *vsingle; 265 266 PetscFunctionBegin; 267 if (N > b->setvalueslen) { 268 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 269 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 270 b->setvalueslen = N; 271 } 272 vsingle = b->setvaluescopy; 273 for (i=0; i<N; i++) { 274 vsingle[i] = v[i]; 275 } 276 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 #endif 280 281 /* Only add/insert a(i,j) with i<=j (blocks). 282 Any a(i,j) with i>j input by user is ingored. 283 */ 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 286 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 287 { 288 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 289 MatScalar value; 290 PetscTruth roworiented = baij->roworiented; 291 PetscErrorCode ierr; 292 PetscInt i,j,row,col; 293 PetscInt rstart_orig=baij->rstart_bs; 294 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 295 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 296 297 /* Some Variables required in the macro */ 298 Mat A = baij->A; 299 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 300 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 301 MatScalar *aa=a->a; 302 303 Mat B = baij->B; 304 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 305 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 306 MatScalar *ba=b->a; 307 308 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 309 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 310 MatScalar *ap,*bap; 311 312 /* for stash */ 313 PetscInt n_loc, *in_loc=0; 314 MatScalar *v_loc=0; 315 316 PetscFunctionBegin; 317 318 if(!baij->donotstash){ 319 ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr); 320 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 321 } 322 323 for (i=0; i<m; i++) { 324 if (im[i] < 0) continue; 325 #if defined(PETSC_USE_DEBUG) 326 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 327 #endif 328 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 329 row = im[i] - rstart_orig; /* local row index */ 330 for (j=0; j<n; j++) { 331 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 332 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 333 col = in[j] - cstart_orig; /* local col index */ 334 brow = row/bs; bcol = col/bs; 335 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 336 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 337 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 338 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 339 } else if (in[j] < 0) continue; 340 #if defined(PETSC_USE_DEBUG) 341 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 342 #endif 343 else { /* off-diag entry (B) */ 344 if (mat->was_assembled) { 345 if (!baij->colmap) { 346 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 347 } 348 #if defined (PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 350 col = col - 1; 351 #else 352 col = baij->colmap[in[j]/bs] - 1; 353 #endif 354 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 355 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 358 B = baij->B; 359 b = (Mat_SeqBAIJ*)(B)->data; 360 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 361 ba=b->a; 362 } else col += in[j]%bs; 363 } else col = in[j]; 364 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 365 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 366 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 367 } 368 } 369 } else { /* off processor entry */ 370 if (!baij->donotstash) { 371 n_loc = 0; 372 for (j=0; j<n; j++){ 373 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 374 in_loc[n_loc] = in[j]; 375 if (roworiented) { 376 v_loc[n_loc] = v[i*n+j]; 377 } else { 378 v_loc[n_loc] = v[j*m+i]; 379 } 380 n_loc++; 381 } 382 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 383 } 384 } 385 } 386 387 if(!baij->donotstash){ 388 ierr = PetscFree(in_loc);CHKERRQ(ierr); 389 ierr = PetscFree(v_loc);CHKERRQ(ierr); 390 } 391 PetscFunctionReturn(0); 392 } 393 394 #undef __FUNCT__ 395 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 396 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 397 { 398 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 399 const MatScalar *value; 400 MatScalar *barray=baij->barray; 401 PetscTruth roworiented = baij->roworiented; 402 PetscErrorCode ierr; 403 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 404 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 405 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 406 407 PetscFunctionBegin; 408 if(!barray) { 409 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 410 baij->barray = barray; 411 } 412 413 if (roworiented) { 414 stepval = (n-1)*bs; 415 } else { 416 stepval = (m-1)*bs; 417 } 418 for (i=0; i<m; i++) { 419 if (im[i] < 0) continue; 420 #if defined(PETSC_USE_DEBUG) 421 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 422 #endif 423 if (im[i] >= rstart && im[i] < rend) { 424 row = im[i] - rstart; 425 for (j=0; j<n; j++) { 426 /* If NumCol = 1 then a copy is not required */ 427 if ((roworiented) && (n == 1)) { 428 barray = (MatScalar*) v + i*bs2; 429 } else if((!roworiented) && (m == 1)) { 430 barray = (MatScalar*) v + j*bs2; 431 } else { /* Here a copy is required */ 432 if (roworiented) { 433 value = v + i*(stepval+bs)*bs + j*bs; 434 } else { 435 value = v + j*(stepval+bs)*bs + i*bs; 436 } 437 for (ii=0; ii<bs; ii++,value+=stepval) { 438 for (jj=0; jj<bs; jj++) { 439 *barray++ = *value++; 440 } 441 } 442 barray -=bs2; 443 } 444 445 if (in[j] >= cstart && in[j] < cend){ 446 col = in[j] - cstart; 447 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 448 } 449 else if (in[j] < 0) continue; 450 #if defined(PETSC_USE_DEBUG) 451 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 452 #endif 453 else { 454 if (mat->was_assembled) { 455 if (!baij->colmap) { 456 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 457 } 458 459 #if defined(PETSC_USE_DEBUG) 460 #if defined (PETSC_USE_CTABLE) 461 { PetscInt data; 462 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 463 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 464 } 465 #else 466 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 467 #endif 468 #endif 469 #if defined (PETSC_USE_CTABLE) 470 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 471 col = (col - 1)/bs; 472 #else 473 col = (baij->colmap[in[j]] - 1)/bs; 474 #endif 475 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 476 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 477 col = in[j]; 478 } 479 } 480 else col = in[j]; 481 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 482 } 483 } 484 } else { 485 if (!baij->donotstash) { 486 if (roworiented) { 487 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 488 } else { 489 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 490 } 491 } 492 } 493 } 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatGetValues_MPISBAIJ" 499 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 500 { 501 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 502 PetscErrorCode ierr; 503 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 504 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 505 506 PetscFunctionBegin; 507 for (i=0; i<m; i++) { 508 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 509 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 510 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 511 row = idxm[i] - bsrstart; 512 for (j=0; j<n; j++) { 513 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 514 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 515 if (idxn[j] >= bscstart && idxn[j] < bscend){ 516 col = idxn[j] - bscstart; 517 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 518 } else { 519 if (!baij->colmap) { 520 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 521 } 522 #if defined (PETSC_USE_CTABLE) 523 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 524 data --; 525 #else 526 data = baij->colmap[idxn[j]/bs]-1; 527 #endif 528 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 529 else { 530 col = data + idxn[j]%bs; 531 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 532 } 533 } 534 } 535 } else { 536 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 537 } 538 } 539 PetscFunctionReturn(0); 540 } 541 542 #undef __FUNCT__ 543 #define __FUNCT__ "MatNorm_MPISBAIJ" 544 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 545 { 546 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 547 PetscErrorCode ierr; 548 PetscReal sum[2],*lnorm2; 549 550 PetscFunctionBegin; 551 if (baij->size == 1) { 552 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 553 } else { 554 if (type == NORM_FROBENIUS) { 555 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 556 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 557 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 558 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 559 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 560 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 561 *norm = sqrt(sum[0] + 2*sum[1]); 562 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 563 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 564 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 565 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 566 PetscReal *rsum,*rsum2,vabs; 567 PetscInt *jj,*garray=baij->garray,rstart=baij->rstart,nz; 568 PetscInt brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs; 569 MatScalar *v; 570 571 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 572 rsum2 = rsum + mat->N; 573 ierr = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 574 /* Amat */ 575 v = amat->a; jj = amat->j; 576 for (brow=0; brow<mbs; brow++) { 577 grow = bs*(rstart + brow); 578 nz = amat->i[brow+1] - amat->i[brow]; 579 for (bcol=0; bcol<nz; bcol++){ 580 gcol = bs*(rstart + *jj); jj++; 581 for (col=0; col<bs; col++){ 582 for (row=0; row<bs; row++){ 583 vabs = PetscAbsScalar(*v); v++; 584 rsum[gcol+col] += vabs; 585 /* non-diagonal block */ 586 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 587 } 588 } 589 } 590 } 591 /* Bmat */ 592 v = bmat->a; jj = bmat->j; 593 for (brow=0; brow<mbs; brow++) { 594 grow = bs*(rstart + brow); 595 nz = bmat->i[brow+1] - bmat->i[brow]; 596 for (bcol=0; bcol<nz; bcol++){ 597 gcol = bs*garray[*jj]; jj++; 598 for (col=0; col<bs; col++){ 599 for (row=0; row<bs; row++){ 600 vabs = PetscAbsScalar(*v); v++; 601 rsum[gcol+col] += vabs; 602 rsum[grow+row] += vabs; 603 } 604 } 605 } 606 } 607 ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 608 *norm = 0.0; 609 for (col=0; col<mat->N; col++) { 610 if (rsum2[col] > *norm) *norm = rsum2[col]; 611 } 612 ierr = PetscFree(rsum);CHKERRQ(ierr); 613 } else { 614 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 615 } 616 } 617 PetscFunctionReturn(0); 618 } 619 620 #undef __FUNCT__ 621 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 622 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 623 { 624 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 625 PetscErrorCode ierr; 626 PetscInt nstash,reallocs; 627 InsertMode addv; 628 629 PetscFunctionBegin; 630 if (baij->donotstash) { 631 PetscFunctionReturn(0); 632 } 633 634 /* make sure all processors are either in INSERTMODE or ADDMODE */ 635 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 636 if (addv == (ADD_VALUES|INSERT_VALUES)) { 637 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 638 } 639 mat->insertmode = addv; /* in case this processor had no cache */ 640 641 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 642 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 643 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 644 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 645 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 646 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 652 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 653 { 654 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 655 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 656 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 657 PetscErrorCode ierr; 658 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 659 PetscInt *row,*col,other_disassembled; 660 PetscMPIInt n; 661 PetscTruth r1,r2,r3; 662 MatScalar *val; 663 InsertMode addv = mat->insertmode; 664 665 PetscFunctionBegin; 666 667 if (!baij->donotstash) { 668 while (1) { 669 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 670 if (!flg) break; 671 672 for (i=0; i<n;) { 673 /* Now identify the consecutive vals belonging to the same row */ 674 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 675 if (j < n) ncols = j-i; 676 else ncols = n-i; 677 /* Now assemble all these values with a single function call */ 678 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 679 i = j; 680 } 681 } 682 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 683 /* Now process the block-stash. Since the values are stashed column-oriented, 684 set the roworiented flag to column oriented, and after MatSetValues() 685 restore the original flags */ 686 r1 = baij->roworiented; 687 r2 = a->roworiented; 688 r3 = b->roworiented; 689 baij->roworiented = PETSC_FALSE; 690 a->roworiented = PETSC_FALSE; 691 b->roworiented = PETSC_FALSE; 692 while (1) { 693 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 694 if (!flg) break; 695 696 for (i=0; i<n;) { 697 /* Now identify the consecutive vals belonging to the same row */ 698 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 699 if (j < n) ncols = j-i; 700 else ncols = n-i; 701 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 702 i = j; 703 } 704 } 705 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 706 baij->roworiented = r1; 707 a->roworiented = r2; 708 b->roworiented = r3; 709 } 710 711 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 712 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 713 714 /* determine if any processor has disassembled, if so we must 715 also disassemble ourselfs, in order that we may reassemble. */ 716 /* 717 if nonzero structure of submatrix B cannot change then we know that 718 no processor disassembled thus we can skip this stuff 719 */ 720 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 721 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 722 if (mat->was_assembled && !other_disassembled) { 723 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 724 } 725 } 726 727 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 728 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 729 } 730 b->compressedrow.use = PETSC_TRUE; 731 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 733 734 if (baij->rowvalues) { 735 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 736 baij->rowvalues = 0; 737 } 738 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 744 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 745 { 746 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 747 PetscErrorCode ierr; 748 PetscInt bs = mat->bs; 749 PetscMPIInt size = baij->size,rank = baij->rank; 750 PetscTruth iascii,isdraw; 751 PetscViewer sviewer; 752 PetscViewerFormat format; 753 754 PetscFunctionBegin; 755 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 756 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 757 if (iascii) { 758 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 759 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 760 MatInfo info; 761 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 762 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 763 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 764 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 765 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 766 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 767 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 768 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 769 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 770 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 771 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 772 PetscFunctionReturn(0); 773 } else if (format == PETSC_VIEWER_ASCII_INFO) { 774 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 775 PetscFunctionReturn(0); 776 } 777 } 778 779 if (isdraw) { 780 PetscDraw draw; 781 PetscTruth isnull; 782 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 783 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 784 } 785 786 if (size == 1) { 787 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 788 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 789 } else { 790 /* assemble the entire matrix onto first processor. */ 791 Mat A; 792 Mat_SeqSBAIJ *Aloc; 793 Mat_SeqBAIJ *Bloc; 794 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 795 MatScalar *a; 796 797 /* Should this be the same type as mat? */ 798 if (!rank) { 799 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 800 } else { 801 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 802 } 803 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 804 ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 805 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 806 807 /* copy over the A part */ 808 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 809 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 810 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 811 812 for (i=0; i<mbs; i++) { 813 rvals[0] = bs*(baij->rstart + i); 814 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 815 for (j=ai[i]; j<ai[i+1]; j++) { 816 col = (baij->cstart+aj[j])*bs; 817 for (k=0; k<bs; k++) { 818 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 819 col++; a += bs; 820 } 821 } 822 } 823 /* copy over the B part */ 824 Bloc = (Mat_SeqBAIJ*)baij->B->data; 825 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 826 for (i=0; i<mbs; i++) { 827 rvals[0] = bs*(baij->rstart + i); 828 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 829 for (j=ai[i]; j<ai[i+1]; j++) { 830 col = baij->garray[aj[j]]*bs; 831 for (k=0; k<bs; k++) { 832 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 833 col++; a += bs; 834 } 835 } 836 } 837 ierr = PetscFree(rvals);CHKERRQ(ierr); 838 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 840 /* 841 Everyone has to call to draw the matrix since the graphics waits are 842 synchronized across all processors that share the PetscDraw object 843 */ 844 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 845 if (!rank) { 846 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 847 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 848 } 849 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 850 ierr = MatDestroy(A);CHKERRQ(ierr); 851 } 852 PetscFunctionReturn(0); 853 } 854 855 #undef __FUNCT__ 856 #define __FUNCT__ "MatView_MPISBAIJ" 857 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 858 { 859 PetscErrorCode ierr; 860 PetscTruth iascii,isdraw,issocket,isbinary; 861 862 PetscFunctionBegin; 863 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 864 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 865 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 866 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 867 if (iascii || isdraw || issocket || isbinary) { 868 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 869 } else { 870 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 871 } 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "MatDestroy_MPISBAIJ" 877 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 878 { 879 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 880 PetscErrorCode ierr; 881 882 PetscFunctionBegin; 883 #if defined(PETSC_USE_LOG) 884 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 885 #endif 886 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 887 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 888 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 889 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 890 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 891 #if defined (PETSC_USE_CTABLE) 892 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 893 #else 894 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 895 #endif 896 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 897 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 898 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 899 if (baij->slvec0) { 900 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 901 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 902 } 903 if (baij->slvec1) { 904 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 905 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 906 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 907 } 908 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 909 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 910 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 911 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 912 #if defined(PETSC_USE_MAT_SINGLE) 913 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 914 #endif 915 ierr = PetscFree(baij);CHKERRQ(ierr); 916 917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 919 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 921 PetscFunctionReturn(0); 922 } 923 924 #undef __FUNCT__ 925 #define __FUNCT__ "MatMult_MPISBAIJ" 926 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 927 { 928 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 929 PetscErrorCode ierr; 930 PetscInt nt,mbs=a->mbs,bs=A->bs; 931 PetscScalar *x,*from,zero=0.0; 932 933 PetscFunctionBegin; 934 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 935 if (nt != A->n) { 936 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 937 } 938 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 939 if (nt != A->m) { 940 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 941 } 942 943 /* diagonal part */ 944 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 945 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 946 947 /* subdiagonal part */ 948 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 949 950 /* copy x into the vec slvec0 */ 951 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 952 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 953 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 954 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 955 956 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 957 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 958 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 959 960 /* supperdiagonal part */ 961 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 962 963 PetscFunctionReturn(0); 964 } 965 966 #undef __FUNCT__ 967 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 968 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 969 { 970 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 971 PetscErrorCode ierr; 972 PetscInt nt; 973 974 PetscFunctionBegin; 975 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 976 if (nt != A->n) { 977 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 978 } 979 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 980 if (nt != A->m) { 981 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 982 } 983 984 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 985 /* do diagonal part */ 986 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 987 /* do supperdiagonal part */ 988 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 989 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 990 /* do subdiagonal part */ 991 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 992 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 993 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 994 995 PetscFunctionReturn(0); 996 } 997 998 #undef __FUNCT__ 999 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1000 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1001 { 1002 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1003 PetscErrorCode ierr; 1004 PetscInt mbs=a->mbs,bs=A->bs; 1005 PetscScalar *x,*from,zero=0.0; 1006 1007 PetscFunctionBegin; 1008 /* 1009 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1010 PetscSynchronizedFlush(A->comm); 1011 */ 1012 /* diagonal part */ 1013 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1014 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1015 1016 /* subdiagonal part */ 1017 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1018 1019 /* copy x into the vec slvec0 */ 1020 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1021 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1022 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1023 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1024 1025 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1026 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1027 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1028 1029 /* supperdiagonal part */ 1030 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1031 1032 PetscFunctionReturn(0); 1033 } 1034 1035 #undef __FUNCT__ 1036 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1037 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1038 { 1039 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1040 PetscErrorCode ierr; 1041 1042 PetscFunctionBegin; 1043 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1044 /* do diagonal part */ 1045 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1046 /* do supperdiagonal part */ 1047 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1048 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1049 1050 /* do subdiagonal part */ 1051 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1052 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1054 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1060 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1061 { 1062 PetscErrorCode ierr; 1063 1064 PetscFunctionBegin; 1065 ierr = MatMult(A,xx,yy);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1071 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1072 { 1073 PetscErrorCode ierr; 1074 1075 PetscFunctionBegin; 1076 ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 /* 1081 This only works correctly for square matrices where the subblock A->A is the 1082 diagonal block 1083 */ 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1086 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1087 { 1088 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1089 PetscErrorCode ierr; 1090 1091 PetscFunctionBegin; 1092 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1093 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "MatScale_MPISBAIJ" 1099 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1100 { 1101 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1106 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1112 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1113 { 1114 PetscFunctionBegin; 1115 SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format"); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1121 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1122 { 1123 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1124 1125 PetscFunctionBegin; 1126 if (!baij->getrowactive) { 1127 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1128 } 1129 baij->getrowactive = PETSC_FALSE; 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1135 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1136 { 1137 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1138 PetscErrorCode ierr; 1139 1140 PetscFunctionBegin; 1141 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1142 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1148 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1149 { 1150 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1151 Mat A = a->A,B = a->B; 1152 PetscErrorCode ierr; 1153 PetscReal isend[5],irecv[5]; 1154 1155 PetscFunctionBegin; 1156 info->block_size = (PetscReal)matin->bs; 1157 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1158 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1159 isend[3] = info->memory; isend[4] = info->mallocs; 1160 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1161 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1162 isend[3] += info->memory; isend[4] += info->mallocs; 1163 if (flag == MAT_LOCAL) { 1164 info->nz_used = isend[0]; 1165 info->nz_allocated = isend[1]; 1166 info->nz_unneeded = isend[2]; 1167 info->memory = isend[3]; 1168 info->mallocs = isend[4]; 1169 } else if (flag == MAT_GLOBAL_MAX) { 1170 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1171 info->nz_used = irecv[0]; 1172 info->nz_allocated = irecv[1]; 1173 info->nz_unneeded = irecv[2]; 1174 info->memory = irecv[3]; 1175 info->mallocs = irecv[4]; 1176 } else if (flag == MAT_GLOBAL_SUM) { 1177 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1178 info->nz_used = irecv[0]; 1179 info->nz_allocated = irecv[1]; 1180 info->nz_unneeded = irecv[2]; 1181 info->memory = irecv[3]; 1182 info->mallocs = irecv[4]; 1183 } else { 1184 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1185 } 1186 info->rows_global = (PetscReal)A->M; 1187 info->columns_global = (PetscReal)A->N; 1188 info->rows_local = (PetscReal)A->m; 1189 info->columns_local = (PetscReal)A->N; 1190 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1191 info->fill_ratio_needed = 0; 1192 info->factor_mallocs = 0; 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1198 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1199 { 1200 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1201 PetscErrorCode ierr; 1202 1203 PetscFunctionBegin; 1204 switch (op) { 1205 case MAT_NO_NEW_NONZERO_LOCATIONS: 1206 case MAT_YES_NEW_NONZERO_LOCATIONS: 1207 case MAT_COLUMNS_UNSORTED: 1208 case MAT_COLUMNS_SORTED: 1209 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1210 case MAT_KEEP_ZEROED_ROWS: 1211 case MAT_NEW_NONZERO_LOCATION_ERR: 1212 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1213 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1214 break; 1215 case MAT_ROW_ORIENTED: 1216 a->roworiented = PETSC_TRUE; 1217 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1218 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1219 break; 1220 case MAT_ROWS_SORTED: 1221 case MAT_ROWS_UNSORTED: 1222 case MAT_YES_NEW_DIAGONALS: 1223 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1224 break; 1225 case MAT_COLUMN_ORIENTED: 1226 a->roworiented = PETSC_FALSE; 1227 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1228 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1229 break; 1230 case MAT_IGNORE_OFF_PROC_ENTRIES: 1231 a->donotstash = PETSC_TRUE; 1232 break; 1233 case MAT_NO_NEW_DIAGONALS: 1234 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1235 case MAT_USE_HASH_TABLE: 1236 a->ht_flag = PETSC_TRUE; 1237 break; 1238 case MAT_NOT_SYMMETRIC: 1239 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1240 case MAT_HERMITIAN: 1241 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1242 case MAT_SYMMETRIC: 1243 case MAT_STRUCTURALLY_SYMMETRIC: 1244 case MAT_NOT_HERMITIAN: 1245 case MAT_SYMMETRY_ETERNAL: 1246 case MAT_NOT_SYMMETRY_ETERNAL: 1247 break; 1248 default: 1249 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1250 } 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1256 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1257 { 1258 PetscErrorCode ierr; 1259 PetscFunctionBegin; 1260 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNCT__ 1265 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1266 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1267 { 1268 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1269 Mat a=baij->A, b=baij->B; 1270 PetscErrorCode ierr; 1271 PetscInt nv,m,n; 1272 PetscTruth flg; 1273 1274 PetscFunctionBegin; 1275 if (ll != rr){ 1276 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1277 if (!flg) 1278 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1279 } 1280 if (!ll) PetscFunctionReturn(0); 1281 1282 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1283 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1284 1285 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1286 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1287 1288 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1289 1290 /* left diagonalscale the off-diagonal part */ 1291 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1292 1293 /* scale the diagonal part */ 1294 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1295 1296 /* right diagonalscale the off-diagonal part */ 1297 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1298 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1304 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1305 { 1306 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1307 MPI_Comm comm = A->comm; 1308 static PetscTruth called = PETSC_FALSE; 1309 PetscErrorCode ierr; 1310 1311 PetscFunctionBegin; 1312 if (!a->rank) { 1313 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1314 } 1315 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1316 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1317 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1323 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1324 { 1325 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1326 PetscErrorCode ierr; 1327 1328 PetscFunctionBegin; 1329 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatEqual_MPISBAIJ" 1337 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1338 { 1339 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1340 Mat a,b,c,d; 1341 PetscTruth flg; 1342 PetscErrorCode ierr; 1343 1344 PetscFunctionBegin; 1345 a = matA->A; b = matA->B; 1346 c = matB->A; d = matB->B; 1347 1348 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1349 if (flg) { 1350 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1351 } 1352 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1358 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1359 { 1360 PetscErrorCode ierr; 1361 1362 PetscFunctionBegin; 1363 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1369 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1370 { 1371 PetscErrorCode ierr; 1372 PetscInt i; 1373 PetscTruth flg; 1374 1375 PetscFunctionBegin; 1376 for (i=0; i<n; i++) { 1377 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1378 if (!flg) { 1379 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1380 } 1381 } 1382 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1383 PetscFunctionReturn(0); 1384 } 1385 1386 1387 /* -------------------------------------------------------------------*/ 1388 static struct _MatOps MatOps_Values = { 1389 MatSetValues_MPISBAIJ, 1390 MatGetRow_MPISBAIJ, 1391 MatRestoreRow_MPISBAIJ, 1392 MatMult_MPISBAIJ, 1393 /* 4*/ MatMultAdd_MPISBAIJ, 1394 MatMultTranspose_MPISBAIJ, 1395 MatMultTransposeAdd_MPISBAIJ, 1396 0, 1397 0, 1398 0, 1399 /*10*/ 0, 1400 0, 1401 0, 1402 MatRelax_MPISBAIJ, 1403 MatTranspose_MPISBAIJ, 1404 /*15*/ MatGetInfo_MPISBAIJ, 1405 MatEqual_MPISBAIJ, 1406 MatGetDiagonal_MPISBAIJ, 1407 MatDiagonalScale_MPISBAIJ, 1408 MatNorm_MPISBAIJ, 1409 /*20*/ MatAssemblyBegin_MPISBAIJ, 1410 MatAssemblyEnd_MPISBAIJ, 1411 0, 1412 MatSetOption_MPISBAIJ, 1413 MatZeroEntries_MPISBAIJ, 1414 /*25*/ 0, 1415 0, 1416 0, 1417 0, 1418 0, 1419 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1420 0, 1421 0, 1422 0, 1423 0, 1424 /*35*/ MatDuplicate_MPISBAIJ, 1425 0, 1426 0, 1427 0, 1428 0, 1429 /*40*/ 0, 1430 MatGetSubMatrices_MPISBAIJ, 1431 MatIncreaseOverlap_MPISBAIJ, 1432 MatGetValues_MPISBAIJ, 1433 0, 1434 /*45*/ MatPrintHelp_MPISBAIJ, 1435 MatScale_MPISBAIJ, 1436 0, 1437 0, 1438 0, 1439 /*50*/ 0, 1440 0, 1441 0, 1442 0, 1443 0, 1444 /*55*/ 0, 1445 0, 1446 MatSetUnfactored_MPISBAIJ, 1447 0, 1448 MatSetValuesBlocked_MPISBAIJ, 1449 /*60*/ 0, 1450 0, 1451 0, 1452 MatGetPetscMaps_Petsc, 1453 0, 1454 /*65*/ 0, 1455 0, 1456 0, 1457 0, 1458 0, 1459 /*70*/ MatGetRowMax_MPISBAIJ, 1460 0, 1461 0, 1462 0, 1463 0, 1464 /*75*/ 0, 1465 0, 1466 0, 1467 0, 1468 0, 1469 /*80*/ 0, 1470 0, 1471 0, 1472 0, 1473 MatLoad_MPISBAIJ, 1474 /*85*/ 0, 1475 0, 1476 0, 1477 0, 1478 0, 1479 /*90*/ 0, 1480 0, 1481 0, 1482 0, 1483 0, 1484 /*95*/ 0, 1485 0, 1486 0, 1487 0}; 1488 1489 1490 EXTERN_C_BEGIN 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1493 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1494 { 1495 PetscFunctionBegin; 1496 *a = ((Mat_MPISBAIJ *)A->data)->A; 1497 *iscopy = PETSC_FALSE; 1498 PetscFunctionReturn(0); 1499 } 1500 EXTERN_C_END 1501 1502 EXTERN_C_BEGIN 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1505 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1506 { 1507 Mat_MPISBAIJ *b; 1508 PetscErrorCode ierr; 1509 PetscInt i,mbs,Mbs; 1510 1511 PetscFunctionBegin; 1512 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1513 1514 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1515 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1516 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1517 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1518 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1519 if (d_nnz) { 1520 for (i=0; i<B->m/bs; i++) { 1521 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1522 } 1523 } 1524 if (o_nnz) { 1525 for (i=0; i<B->m/bs; i++) { 1526 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1527 } 1528 } 1529 B->preallocated = PETSC_TRUE; 1530 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1531 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1532 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1533 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1534 1535 b = (Mat_MPISBAIJ*)B->data; 1536 mbs = B->m/bs; 1537 Mbs = B->M/bs; 1538 if (mbs*bs != B->m) { 1539 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs); 1540 } 1541 1542 B->bs = bs; 1543 b->bs2 = bs*bs; 1544 b->mbs = mbs; 1545 b->nbs = mbs; 1546 b->Mbs = Mbs; 1547 b->Nbs = Mbs; 1548 1549 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1550 b->rowners[0] = 0; 1551 for (i=2; i<=b->size; i++) { 1552 b->rowners[i] += b->rowners[i-1]; 1553 } 1554 b->rstart = b->rowners[b->rank]; 1555 b->rend = b->rowners[b->rank+1]; 1556 b->cstart = b->rstart; 1557 b->cend = b->rend; 1558 for (i=0; i<=b->size; i++) { 1559 b->rowners_bs[i] = b->rowners[i]*bs; 1560 } 1561 b->rstart_bs = b-> rstart*bs; 1562 b->rend_bs = b->rend*bs; 1563 1564 b->cstart_bs = b->cstart*bs; 1565 b->cend_bs = b->cend*bs; 1566 1567 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr); 1568 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1569 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1570 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1571 1572 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr); 1573 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1574 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1575 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1576 1577 /* build cache for off array entries formed */ 1578 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1579 1580 PetscFunctionReturn(0); 1581 } 1582 EXTERN_C_END 1583 1584 /*MC 1585 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1586 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1587 1588 Options Database Keys: 1589 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1590 1591 Level: beginner 1592 1593 .seealso: MatCreateMPISBAIJ 1594 M*/ 1595 1596 EXTERN_C_BEGIN 1597 #undef __FUNCT__ 1598 #define __FUNCT__ "MatCreate_MPISBAIJ" 1599 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1600 { 1601 Mat_MPISBAIJ *b; 1602 PetscErrorCode ierr; 1603 PetscTruth flg; 1604 1605 PetscFunctionBegin; 1606 1607 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1608 B->data = (void*)b; 1609 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1610 1611 B->ops->destroy = MatDestroy_MPISBAIJ; 1612 B->ops->view = MatView_MPISBAIJ; 1613 B->mapping = 0; 1614 B->factor = 0; 1615 B->assembled = PETSC_FALSE; 1616 1617 B->insertmode = NOT_SET_VALUES; 1618 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1619 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1620 1621 /* build local table of row and column ownerships */ 1622 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 1623 b->cowners = b->rowners + b->size + 2; 1624 b->rowners_bs = b->cowners + b->size + 2; 1625 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1626 1627 /* build cache for off array entries formed */ 1628 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1629 b->donotstash = PETSC_FALSE; 1630 b->colmap = PETSC_NULL; 1631 b->garray = PETSC_NULL; 1632 b->roworiented = PETSC_TRUE; 1633 1634 #if defined(PETSC_USE_MAT_SINGLE) 1635 /* stuff for MatSetValues_XXX in single precision */ 1636 b->setvalueslen = 0; 1637 b->setvaluescopy = PETSC_NULL; 1638 #endif 1639 1640 /* stuff used in block assembly */ 1641 b->barray = 0; 1642 1643 /* stuff used for matrix vector multiply */ 1644 b->lvec = 0; 1645 b->Mvctx = 0; 1646 b->slvec0 = 0; 1647 b->slvec0b = 0; 1648 b->slvec1 = 0; 1649 b->slvec1a = 0; 1650 b->slvec1b = 0; 1651 b->sMvctx = 0; 1652 1653 /* stuff for MatGetRow() */ 1654 b->rowindices = 0; 1655 b->rowvalues = 0; 1656 b->getrowactive = PETSC_FALSE; 1657 1658 /* hash table stuff */ 1659 b->ht = 0; 1660 b->hd = 0; 1661 b->ht_size = 0; 1662 b->ht_flag = PETSC_FALSE; 1663 b->ht_fact = 0; 1664 b->ht_total_ct = 0; 1665 b->ht_insert_ct = 0; 1666 1667 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1668 if (flg) { 1669 PetscReal fact = 1.39; 1670 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1671 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1672 if (fact <= 1.0) fact = 1.39; 1673 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1674 ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 1675 } 1676 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1677 "MatStoreValues_MPISBAIJ", 1678 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1679 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1680 "MatRetrieveValues_MPISBAIJ", 1681 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1682 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1683 "MatGetDiagonalBlock_MPISBAIJ", 1684 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1685 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1686 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1687 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1688 B->symmetric = PETSC_TRUE; 1689 B->structurally_symmetric = PETSC_TRUE; 1690 B->symmetric_set = PETSC_TRUE; 1691 B->structurally_symmetric_set = PETSC_TRUE; 1692 PetscFunctionReturn(0); 1693 } 1694 EXTERN_C_END 1695 1696 /*MC 1697 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1698 1699 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1700 and MATMPISBAIJ otherwise. 1701 1702 Options Database Keys: 1703 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1704 1705 Level: beginner 1706 1707 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1708 M*/ 1709 1710 EXTERN_C_BEGIN 1711 #undef __FUNCT__ 1712 #define __FUNCT__ "MatCreate_SBAIJ" 1713 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1714 { 1715 PetscErrorCode ierr; 1716 PetscMPIInt size; 1717 1718 PetscFunctionBegin; 1719 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1720 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1721 if (size == 1) { 1722 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1723 } else { 1724 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1725 } 1726 PetscFunctionReturn(0); 1727 } 1728 EXTERN_C_END 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1732 /*@C 1733 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1734 the user should preallocate the matrix storage by setting the parameters 1735 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1736 performance can be increased by more than a factor of 50. 1737 1738 Collective on Mat 1739 1740 Input Parameters: 1741 + A - the matrix 1742 . bs - size of blockk 1743 . d_nz - number of block nonzeros per block row in diagonal portion of local 1744 submatrix (same for all local rows) 1745 . d_nnz - array containing the number of block nonzeros in the various block rows 1746 in the upper triangular and diagonal part of the in diagonal portion of the local 1747 (possibly different for each block row) or PETSC_NULL. You must leave room 1748 for the diagonal entry even if it is zero. 1749 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1750 submatrix (same for all local rows). 1751 - o_nnz - array containing the number of nonzeros in the various block rows of the 1752 off-diagonal portion of the local submatrix (possibly different for 1753 each block row) or PETSC_NULL. 1754 1755 1756 Options Database Keys: 1757 . -mat_no_unroll - uses code that does not unroll the loops in the 1758 block calculations (much slower) 1759 . -mat_block_size - size of the blocks to use 1760 1761 Notes: 1762 1763 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1764 than it must be used on all processors that share the object for that argument. 1765 1766 If the *_nnz parameter is given then the *_nz parameter is ignored 1767 1768 Storage Information: 1769 For a square global matrix we define each processor's diagonal portion 1770 to be its local rows and the corresponding columns (a square submatrix); 1771 each processor's off-diagonal portion encompasses the remainder of the 1772 local matrix (a rectangular submatrix). 1773 1774 The user can specify preallocated storage for the diagonal part of 1775 the local submatrix with either d_nz or d_nnz (not both). Set 1776 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1777 memory allocation. Likewise, specify preallocated storage for the 1778 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1779 1780 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1781 the figure below we depict these three local rows and all columns (0-11). 1782 1783 .vb 1784 0 1 2 3 4 5 6 7 8 9 10 11 1785 ------------------- 1786 row 3 | o o o d d d o o o o o o 1787 row 4 | o o o d d d o o o o o o 1788 row 5 | o o o d d d o o o o o o 1789 ------------------- 1790 .ve 1791 1792 Thus, any entries in the d locations are stored in the d (diagonal) 1793 submatrix, and any entries in the o locations are stored in the 1794 o (off-diagonal) submatrix. Note that the d matrix is stored in 1795 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1796 1797 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1798 plus the diagonal part of the d matrix, 1799 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1800 In general, for PDE problems in which most nonzeros are near the diagonal, 1801 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1802 or you will get TERRIBLE performance; see the users' manual chapter on 1803 matrices. 1804 1805 Level: intermediate 1806 1807 .keywords: matrix, block, aij, compressed row, sparse, parallel 1808 1809 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1810 @*/ 1811 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1812 { 1813 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1814 1815 PetscFunctionBegin; 1816 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1817 if (f) { 1818 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1819 } 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatCreateMPISBAIJ" 1825 /*@C 1826 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1827 (block compressed row). For good matrix assembly performance 1828 the user should preallocate the matrix storage by setting the parameters 1829 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1830 performance can be increased by more than a factor of 50. 1831 1832 Collective on MPI_Comm 1833 1834 Input Parameters: 1835 + comm - MPI communicator 1836 . bs - size of blockk 1837 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1838 This value should be the same as the local size used in creating the 1839 y vector for the matrix-vector product y = Ax. 1840 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1841 This value should be the same as the local size used in creating the 1842 x vector for the matrix-vector product y = Ax. 1843 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1844 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1845 . d_nz - number of block nonzeros per block row in diagonal portion of local 1846 submatrix (same for all local rows) 1847 . d_nnz - array containing the number of block nonzeros in the various block rows 1848 in the upper triangular portion of the in diagonal portion of the local 1849 (possibly different for each block block row) or PETSC_NULL. 1850 You must leave room for the diagonal entry even if it is zero. 1851 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1852 submatrix (same for all local rows). 1853 - o_nnz - array containing the number of nonzeros in the various block rows of the 1854 off-diagonal portion of the local submatrix (possibly different for 1855 each block row) or PETSC_NULL. 1856 1857 Output Parameter: 1858 . A - the matrix 1859 1860 Options Database Keys: 1861 . -mat_no_unroll - uses code that does not unroll the loops in the 1862 block calculations (much slower) 1863 . -mat_block_size - size of the blocks to use 1864 . -mat_mpi - use the parallel matrix data structures even on one processor 1865 (defaults to using SeqBAIJ format on one processor) 1866 1867 Notes: 1868 The number of rows and columns must be divisible by blocksize. 1869 1870 The user MUST specify either the local or global matrix dimensions 1871 (possibly both). 1872 1873 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1874 than it must be used on all processors that share the object for that argument. 1875 1876 If the *_nnz parameter is given then the *_nz parameter is ignored 1877 1878 Storage Information: 1879 For a square global matrix we define each processor's diagonal portion 1880 to be its local rows and the corresponding columns (a square submatrix); 1881 each processor's off-diagonal portion encompasses the remainder of the 1882 local matrix (a rectangular submatrix). 1883 1884 The user can specify preallocated storage for the diagonal part of 1885 the local submatrix with either d_nz or d_nnz (not both). Set 1886 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1887 memory allocation. Likewise, specify preallocated storage for the 1888 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1889 1890 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1891 the figure below we depict these three local rows and all columns (0-11). 1892 1893 .vb 1894 0 1 2 3 4 5 6 7 8 9 10 11 1895 ------------------- 1896 row 3 | o o o d d d o o o o o o 1897 row 4 | o o o d d d o o o o o o 1898 row 5 | o o o d d d o o o o o o 1899 ------------------- 1900 .ve 1901 1902 Thus, any entries in the d locations are stored in the d (diagonal) 1903 submatrix, and any entries in the o locations are stored in the 1904 o (off-diagonal) submatrix. Note that the d matrix is stored in 1905 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1906 1907 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1908 plus the diagonal part of the d matrix, 1909 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1910 In general, for PDE problems in which most nonzeros are near the diagonal, 1911 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1912 or you will get TERRIBLE performance; see the users' manual chapter on 1913 matrices. 1914 1915 Level: intermediate 1916 1917 .keywords: matrix, block, aij, compressed row, sparse, parallel 1918 1919 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1920 @*/ 1921 1922 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 1923 { 1924 PetscErrorCode ierr; 1925 PetscMPIInt size; 1926 1927 PetscFunctionBegin; 1928 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1929 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1930 if (size > 1) { 1931 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1932 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1933 } else { 1934 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1935 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1936 } 1937 PetscFunctionReturn(0); 1938 } 1939 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1943 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1944 { 1945 Mat mat; 1946 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1947 PetscErrorCode ierr; 1948 PetscInt len=0,nt,bs=matin->bs,mbs=oldmat->mbs; 1949 PetscScalar *array; 1950 1951 PetscFunctionBegin; 1952 *newmat = 0; 1953 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1954 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1955 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1956 1957 mat->factor = matin->factor; 1958 mat->preallocated = PETSC_TRUE; 1959 mat->assembled = PETSC_TRUE; 1960 mat->insertmode = NOT_SET_VALUES; 1961 1962 a = (Mat_MPISBAIJ*)mat->data; 1963 mat->bs = matin->bs; 1964 a->bs2 = oldmat->bs2; 1965 a->mbs = oldmat->mbs; 1966 a->nbs = oldmat->nbs; 1967 a->Mbs = oldmat->Mbs; 1968 a->Nbs = oldmat->Nbs; 1969 1970 a->rstart = oldmat->rstart; 1971 a->rend = oldmat->rend; 1972 a->cstart = oldmat->cstart; 1973 a->cend = oldmat->cend; 1974 a->size = oldmat->size; 1975 a->rank = oldmat->rank; 1976 a->donotstash = oldmat->donotstash; 1977 a->roworiented = oldmat->roworiented; 1978 a->rowindices = 0; 1979 a->rowvalues = 0; 1980 a->getrowactive = PETSC_FALSE; 1981 a->barray = 0; 1982 a->rstart_bs = oldmat->rstart_bs; 1983 a->rend_bs = oldmat->rend_bs; 1984 a->cstart_bs = oldmat->cstart_bs; 1985 a->cend_bs = oldmat->cend_bs; 1986 1987 /* hash table stuff */ 1988 a->ht = 0; 1989 a->hd = 0; 1990 a->ht_size = 0; 1991 a->ht_flag = oldmat->ht_flag; 1992 a->ht_fact = oldmat->ht_fact; 1993 a->ht_total_ct = 0; 1994 a->ht_insert_ct = 0; 1995 1996 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1997 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1998 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 1999 if (oldmat->colmap) { 2000 #if defined (PETSC_USE_CTABLE) 2001 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2002 #else 2003 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2004 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2005 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2006 #endif 2007 } else a->colmap = 0; 2008 2009 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2010 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2011 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2012 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2013 } else a->garray = 0; 2014 2015 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2016 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2017 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2018 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2019 2020 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2021 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2022 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2023 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2024 2025 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2026 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2027 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2028 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2029 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2030 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2031 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2032 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2033 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2034 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2035 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2036 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2037 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2038 2039 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2040 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2041 a->sMvctx = oldmat->sMvctx; 2042 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2043 2044 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2045 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2046 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2047 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2048 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2049 *newmat = mat; 2050 PetscFunctionReturn(0); 2051 } 2052 2053 #include "petscsys.h" 2054 2055 #undef __FUNCT__ 2056 #define __FUNCT__ "MatLoad_MPISBAIJ" 2057 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2058 { 2059 Mat A; 2060 PetscErrorCode ierr; 2061 PetscInt i,nz,j,rstart,rend; 2062 PetscScalar *vals,*buf; 2063 MPI_Comm comm = ((PetscObject)viewer)->comm; 2064 MPI_Status status; 2065 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners; 2066 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2067 PetscInt *locrowlens,*procsnz = 0,jj,*mycols,*ibuf; 2068 PetscInt bs=1,Mbs,mbs,extra_rows; 2069 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2070 PetscInt dcount,kmax,k,nzcount,tmp; 2071 int fd; 2072 2073 PetscFunctionBegin; 2074 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2075 2076 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2077 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2078 if (!rank) { 2079 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2080 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2081 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2082 if (header[3] < 0) { 2083 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2084 } 2085 } 2086 2087 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2088 M = header[1]; N = header[2]; 2089 2090 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2091 2092 /* 2093 This code adds extra rows to make sure the number of rows is 2094 divisible by the blocksize 2095 */ 2096 Mbs = M/bs; 2097 extra_rows = bs - M + bs*(Mbs); 2098 if (extra_rows == bs) extra_rows = 0; 2099 else Mbs++; 2100 if (extra_rows &&!rank) { 2101 ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2102 } 2103 2104 /* determine ownership of all rows */ 2105 mbs = Mbs/size + ((Mbs % size) > rank); 2106 m = mbs*bs; 2107 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2108 browners = rowners + size + 1; 2109 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2110 rowners[0] = 0; 2111 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2112 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2113 rstart = rowners[rank]; 2114 rend = rowners[rank+1]; 2115 2116 /* distribute row lengths to all processors */ 2117 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2118 if (!rank) { 2119 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2120 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2121 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2122 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2123 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2124 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2125 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2126 } else { 2127 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2128 } 2129 2130 if (!rank) { /* procs[0] */ 2131 /* calculate the number of nonzeros on each processor */ 2132 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2133 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2134 for (i=0; i<size; i++) { 2135 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2136 procsnz[i] += rowlengths[j]; 2137 } 2138 } 2139 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2140 2141 /* determine max buffer needed and allocate it */ 2142 maxnz = 0; 2143 for (i=0; i<size; i++) { 2144 maxnz = PetscMax(maxnz,procsnz[i]); 2145 } 2146 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2147 2148 /* read in my part of the matrix column indices */ 2149 nz = procsnz[0]; 2150 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2151 mycols = ibuf; 2152 if (size == 1) nz -= extra_rows; 2153 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2154 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2155 2156 /* read in every ones (except the last) and ship off */ 2157 for (i=1; i<size-1; i++) { 2158 nz = procsnz[i]; 2159 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2160 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2161 } 2162 /* read in the stuff for the last proc */ 2163 if (size != 1) { 2164 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2165 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2166 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2167 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2168 } 2169 ierr = PetscFree(cols);CHKERRQ(ierr); 2170 } else { /* procs[i], i>0 */ 2171 /* determine buffer space needed for message */ 2172 nz = 0; 2173 for (i=0; i<m; i++) { 2174 nz += locrowlens[i]; 2175 } 2176 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2177 mycols = ibuf; 2178 /* receive message of column indices*/ 2179 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2180 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2181 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2182 } 2183 2184 /* loop over local rows, determining number of off diagonal entries */ 2185 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2186 odlens = dlens + (rend-rstart); 2187 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2188 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2189 masked1 = mask + Mbs; 2190 masked2 = masked1 + Mbs; 2191 rowcount = 0; nzcount = 0; 2192 for (i=0; i<mbs; i++) { 2193 dcount = 0; 2194 odcount = 0; 2195 for (j=0; j<bs; j++) { 2196 kmax = locrowlens[rowcount]; 2197 for (k=0; k<kmax; k++) { 2198 tmp = mycols[nzcount++]/bs; /* block col. index */ 2199 if (!mask[tmp]) { 2200 mask[tmp] = 1; 2201 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2202 else masked1[dcount++] = tmp; /* entry in diag portion */ 2203 } 2204 } 2205 rowcount++; 2206 } 2207 2208 dlens[i] = dcount; /* d_nzz[i] */ 2209 odlens[i] = odcount; /* o_nzz[i] */ 2210 2211 /* zero out the mask elements we set */ 2212 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2213 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2214 } 2215 2216 /* create our matrix */ 2217 ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr); 2218 ierr = MatSetType(A,type);CHKERRQ(ierr); 2219 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2220 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2221 2222 if (!rank) { 2223 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2224 /* read in my part of the matrix numerical values */ 2225 nz = procsnz[0]; 2226 vals = buf; 2227 mycols = ibuf; 2228 if (size == 1) nz -= extra_rows; 2229 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2230 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2231 2232 /* insert into matrix */ 2233 jj = rstart*bs; 2234 for (i=0; i<m; i++) { 2235 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2236 mycols += locrowlens[i]; 2237 vals += locrowlens[i]; 2238 jj++; 2239 } 2240 2241 /* read in other processors (except the last one) and ship out */ 2242 for (i=1; i<size-1; i++) { 2243 nz = procsnz[i]; 2244 vals = buf; 2245 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2246 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2247 } 2248 /* the last proc */ 2249 if (size != 1){ 2250 nz = procsnz[i] - extra_rows; 2251 vals = buf; 2252 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2253 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2254 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2255 } 2256 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2257 2258 } else { 2259 /* receive numeric values */ 2260 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2261 2262 /* receive message of values*/ 2263 vals = buf; 2264 mycols = ibuf; 2265 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2266 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2267 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2268 2269 /* insert into matrix */ 2270 jj = rstart*bs; 2271 for (i=0; i<m; i++) { 2272 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2273 mycols += locrowlens[i]; 2274 vals += locrowlens[i]; 2275 jj++; 2276 } 2277 } 2278 2279 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2280 ierr = PetscFree(buf);CHKERRQ(ierr); 2281 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2282 ierr = PetscFree(rowners);CHKERRQ(ierr); 2283 ierr = PetscFree(dlens);CHKERRQ(ierr); 2284 ierr = PetscFree(mask);CHKERRQ(ierr); 2285 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2286 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2287 *newmat = A; 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2293 /*XXXXX@ 2294 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2295 2296 Input Parameters: 2297 . mat - the matrix 2298 . fact - factor 2299 2300 Collective on Mat 2301 2302 Level: advanced 2303 2304 Notes: 2305 This can also be set by the command line option: -mat_use_hash_table fact 2306 2307 .keywords: matrix, hashtable, factor, HT 2308 2309 .seealso: MatSetOption() 2310 @XXXXX*/ 2311 2312 2313 #undef __FUNCT__ 2314 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2315 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2316 { 2317 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2318 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2319 PetscReal atmp; 2320 PetscReal *work,*svalues,*rvalues; 2321 PetscErrorCode ierr; 2322 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2323 PetscMPIInt rank,size; 2324 PetscInt *rowners_bs,dest,count,source; 2325 PetscScalar *va; 2326 MatScalar *ba; 2327 MPI_Status stat; 2328 2329 PetscFunctionBegin; 2330 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2331 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2332 2333 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2334 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2335 2336 bs = A->bs; 2337 mbs = a->mbs; 2338 Mbs = a->Mbs; 2339 ba = b->a; 2340 bi = b->i; 2341 bj = b->j; 2342 2343 /* find ownerships */ 2344 rowners_bs = a->rowners_bs; 2345 2346 /* each proc creates an array to be distributed */ 2347 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2348 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2349 2350 /* row_max for B */ 2351 if (rank != size-1){ 2352 for (i=0; i<mbs; i++) { 2353 ncols = bi[1] - bi[0]; bi++; 2354 brow = bs*i; 2355 for (j=0; j<ncols; j++){ 2356 bcol = bs*(*bj); 2357 for (kcol=0; kcol<bs; kcol++){ 2358 col = bcol + kcol; /* local col index */ 2359 col += rowners_bs[rank+1]; /* global col index */ 2360 for (krow=0; krow<bs; krow++){ 2361 atmp = PetscAbsScalar(*ba); ba++; 2362 row = brow + krow; /* local row index */ 2363 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2364 if (work[col] < atmp) work[col] = atmp; 2365 } 2366 } 2367 bj++; 2368 } 2369 } 2370 2371 /* send values to its owners */ 2372 for (dest=rank+1; dest<size; dest++){ 2373 svalues = work + rowners_bs[dest]; 2374 count = rowners_bs[dest+1]-rowners_bs[dest]; 2375 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2376 } 2377 } 2378 2379 /* receive values */ 2380 if (rank){ 2381 rvalues = work; 2382 count = rowners_bs[rank+1]-rowners_bs[rank]; 2383 for (source=0; source<rank; source++){ 2384 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2385 /* process values */ 2386 for (i=0; i<count; i++){ 2387 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2388 } 2389 } 2390 } 2391 2392 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2393 ierr = PetscFree(work);CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #undef __FUNCT__ 2398 #define __FUNCT__ "MatRelax_MPISBAIJ" 2399 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2400 { 2401 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2402 PetscErrorCode ierr; 2403 PetscInt mbs=mat->mbs,bs=matin->bs; 2404 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2405 Vec bb1; 2406 2407 PetscFunctionBegin; 2408 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2409 if (bs > 1) 2410 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2411 2412 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2413 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2414 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2415 its--; 2416 } 2417 2418 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2419 while (its--){ 2420 2421 /* lower triangular part: slvec0b = - B^T*xx */ 2422 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2423 2424 /* copy xx into slvec0a */ 2425 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2426 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2427 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2428 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2429 2430 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2431 2432 /* copy bb into slvec1a */ 2433 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2434 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2435 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2436 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2437 2438 /* set slvec1b = 0 */ 2439 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2440 2441 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2442 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2443 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2444 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2445 2446 /* upper triangular part: bb1 = bb1 - B*x */ 2447 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2448 2449 /* local diagonal sweep */ 2450 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2451 } 2452 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2453 } else { 2454 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2455 } 2456 PetscFunctionReturn(0); 2457 } 2458 2459 #undef __FUNCT__ 2460 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2461 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2462 { 2463 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2464 PetscErrorCode ierr; 2465 PetscScalar mone=-1.0; 2466 Vec lvec1,bb1; 2467 2468 PetscFunctionBegin; 2469 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2470 if (matin->bs > 1) 2471 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2472 2473 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2474 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2475 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2476 its--; 2477 } 2478 2479 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2480 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2481 while (its--){ 2482 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2483 2484 /* lower diagonal part: bb1 = bb - B^T*xx */ 2485 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2486 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2487 2488 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2489 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2490 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2491 2492 /* upper diagonal part: bb1 = bb1 - B*x */ 2493 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2494 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2495 2496 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2497 2498 /* diagonal sweep */ 2499 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2500 } 2501 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2502 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2503 } else { 2504 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2505 } 2506 PetscFunctionReturn(0); 2507 } 2508 2509