1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 if (nrow >= rmax) { \ 104 /* there is no extra room in row, therefore enlarge */ \ 105 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 106 MatScalar *new_a; \ 107 \ 108 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 109 \ 110 /* malloc new storage space */ \ 111 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);\ 112 \ 113 /* copy over old data into new slots */ \ 114 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 115 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 116 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 117 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 118 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 119 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 120 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 121 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 122 /* free up old matrix storage */ \ 123 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr);\ 124 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 125 a->singlemalloc = PETSC_TRUE; \ 126 \ 127 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 128 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 129 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 130 a->maxnz += bs2*CHUNKSIZE; \ 131 a->reallocs++; \ 132 a->nz++; \ 133 } \ 134 N = nrow++ - 1; \ 135 /* shift up all the later entries in this row */ \ 136 for (ii=N; ii>=_i; ii--) { \ 137 rp[ii+1] = rp[ii]; \ 138 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 139 } \ 140 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 141 rp[_i] = bcol; \ 142 ap[bs2*_i + bs*cidx + ridx] = value; \ 143 a_noinsert:; \ 144 ailen[brow] = nrow; \ 145 } 146 #ifndef MatSetValues_SeqBAIJ_B_Private 147 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 148 { \ 149 brow = row/bs; \ 150 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 151 rmax = bimax[brow]; nrow = bilen[brow]; \ 152 bcol = col/bs; \ 153 ridx = row % bs; cidx = col % bs; \ 154 low = 0; high = nrow; \ 155 while (high-low > 3) { \ 156 t = (low+high)/2; \ 157 if (rp[t] > bcol) high = t; \ 158 else low = t; \ 159 } \ 160 for (_i=low; _i<high; _i++) { \ 161 if (rp[_i] > bcol) break; \ 162 if (rp[_i] == bcol) { \ 163 bap = ap + bs2*_i + bs*cidx + ridx; \ 164 if (addv == ADD_VALUES) *bap += value; \ 165 else *bap = value; \ 166 goto b_noinsert; \ 167 } \ 168 } \ 169 if (b->nonew == 1) goto b_noinsert; \ 170 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 171 if (nrow >= rmax) { \ 172 /* there is no extra room in row, therefore enlarge */ \ 173 PetscInt new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 174 MatScalar *new_a; \ 175 \ 176 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 177 \ 178 /* malloc new storage space */ \ 179 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,b->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);\ 180 \ 181 /* copy over old data into new slots */ \ 182 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 183 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 184 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 185 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 186 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 187 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 188 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 189 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE),ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 190 /* free up old matrix storage */ \ 191 ierr = MatSeqXAIJFreeAIJ(b->singlemalloc,b->a,b->j,b->i);CHKERRQ(ierr);\ 192 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 193 b->singlemalloc = PETSC_TRUE; \ 194 \ 195 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 196 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 197 ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 198 b->maxnz += bs2*CHUNKSIZE; \ 199 b->reallocs++; \ 200 b->nz++; \ 201 } \ 202 N = nrow++ - 1; \ 203 /* shift up all the later entries in this row */ \ 204 for (ii=N; ii>=_i; ii--) { \ 205 rp[ii+1] = rp[ii]; \ 206 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 207 } \ 208 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 209 rp[_i] = bcol; \ 210 ap[bs2*_i + bs*cidx + ridx] = value; \ 211 b_noinsert:; \ 212 bilen[brow] = nrow; \ 213 } 214 #endif 215 216 #if defined(PETSC_USE_MAT_SINGLE) 217 #undef __FUNCT__ 218 #define __FUNCT__ "MatSetValues_MPISBAIJ" 219 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 220 { 221 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 222 PetscErrorCode ierr; 223 PetscInt i,N = m*n; 224 MatScalar *vsingle; 225 226 PetscFunctionBegin; 227 if (N > b->setvalueslen) { 228 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 229 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 230 b->setvalueslen = N; 231 } 232 vsingle = b->setvaluescopy; 233 234 for (i=0; i<N; i++) { 235 vsingle[i] = v[i]; 236 } 237 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 243 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 244 { 245 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 246 PetscErrorCode ierr; 247 PetscInt i,N = m*n*b->bs2; 248 MatScalar *vsingle; 249 250 PetscFunctionBegin; 251 if (N > b->setvalueslen) { 252 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 253 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 254 b->setvalueslen = N; 255 } 256 vsingle = b->setvaluescopy; 257 for (i=0; i<N; i++) { 258 vsingle[i] = v[i]; 259 } 260 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 #endif 264 265 /* Only add/insert a(i,j) with i<=j (blocks). 266 Any a(i,j) with i>j input by user is ingored. 267 */ 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 270 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 271 { 272 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 273 MatScalar value; 274 PetscTruth roworiented = baij->roworiented; 275 PetscErrorCode ierr; 276 PetscInt i,j,row,col; 277 PetscInt rstart_orig=baij->rstart_bs; 278 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 279 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 280 281 /* Some Variables required in the macro */ 282 Mat A = baij->A; 283 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 284 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 285 MatScalar *aa=a->a; 286 287 Mat B = baij->B; 288 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 289 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 290 MatScalar *ba=b->a; 291 292 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 293 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 294 MatScalar *ap,*bap; 295 296 /* for stash */ 297 PetscInt n_loc, *in_loc=0; 298 MatScalar *v_loc=0; 299 300 PetscFunctionBegin; 301 302 if(!baij->donotstash){ 303 ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr); 304 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 305 } 306 307 for (i=0; i<m; i++) { 308 if (im[i] < 0) continue; 309 #if defined(PETSC_USE_DEBUG) 310 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 311 #endif 312 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 313 row = im[i] - rstart_orig; /* local row index */ 314 for (j=0; j<n; j++) { 315 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 316 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 317 col = in[j] - cstart_orig; /* local col index */ 318 brow = row/bs; bcol = col/bs; 319 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 320 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 321 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 322 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 323 } else if (in[j] < 0) continue; 324 #if defined(PETSC_USE_DEBUG) 325 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 326 #endif 327 else { /* off-diag entry (B) */ 328 if (mat->was_assembled) { 329 if (!baij->colmap) { 330 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331 } 332 #if defined (PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 334 col = col - 1; 335 #else 336 col = baij->colmap[in[j]/bs] - 1; 337 #endif 338 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 339 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 342 B = baij->B; 343 b = (Mat_SeqBAIJ*)(B)->data; 344 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 345 ba=b->a; 346 } else col += in[j]%bs; 347 } else col = in[j]; 348 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 349 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 350 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 351 } 352 } 353 } else { /* off processor entry */ 354 if (!baij->donotstash) { 355 n_loc = 0; 356 for (j=0; j<n; j++){ 357 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 358 in_loc[n_loc] = in[j]; 359 if (roworiented) { 360 v_loc[n_loc] = v[i*n+j]; 361 } else { 362 v_loc[n_loc] = v[j*m+i]; 363 } 364 n_loc++; 365 } 366 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 367 } 368 } 369 } 370 371 if(!baij->donotstash){ 372 ierr = PetscFree(in_loc);CHKERRQ(ierr); 373 ierr = PetscFree(v_loc);CHKERRQ(ierr); 374 } 375 PetscFunctionReturn(0); 376 } 377 378 #undef __FUNCT__ 379 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 380 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 381 { 382 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 383 const MatScalar *value; 384 MatScalar *barray=baij->barray; 385 PetscTruth roworiented = baij->roworiented; 386 PetscErrorCode ierr; 387 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 388 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 389 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 390 391 PetscFunctionBegin; 392 if(!barray) { 393 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 394 baij->barray = barray; 395 } 396 397 if (roworiented) { 398 stepval = (n-1)*bs; 399 } else { 400 stepval = (m-1)*bs; 401 } 402 for (i=0; i<m; i++) { 403 if (im[i] < 0) continue; 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 406 #endif 407 if (im[i] >= rstart && im[i] < rend) { 408 row = im[i] - rstart; 409 for (j=0; j<n; j++) { 410 /* If NumCol = 1 then a copy is not required */ 411 if ((roworiented) && (n == 1)) { 412 barray = (MatScalar*) v + i*bs2; 413 } else if((!roworiented) && (m == 1)) { 414 barray = (MatScalar*) v + j*bs2; 415 } else { /* Here a copy is required */ 416 if (roworiented) { 417 value = v + i*(stepval+bs)*bs + j*bs; 418 } else { 419 value = v + j*(stepval+bs)*bs + i*bs; 420 } 421 for (ii=0; ii<bs; ii++,value+=stepval) { 422 for (jj=0; jj<bs; jj++) { 423 *barray++ = *value++; 424 } 425 } 426 barray -=bs2; 427 } 428 429 if (in[j] >= cstart && in[j] < cend){ 430 col = in[j] - cstart; 431 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 432 } 433 else if (in[j] < 0) continue; 434 #if defined(PETSC_USE_DEBUG) 435 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 436 #endif 437 else { 438 if (mat->was_assembled) { 439 if (!baij->colmap) { 440 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 441 } 442 443 #if defined(PETSC_USE_DEBUG) 444 #if defined (PETSC_USE_CTABLE) 445 { PetscInt data; 446 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 447 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 448 } 449 #else 450 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 451 #endif 452 #endif 453 #if defined (PETSC_USE_CTABLE) 454 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 455 col = (col - 1)/bs; 456 #else 457 col = (baij->colmap[in[j]] - 1)/bs; 458 #endif 459 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 460 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 461 col = in[j]; 462 } 463 } 464 else col = in[j]; 465 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 466 } 467 } 468 } else { 469 if (!baij->donotstash) { 470 if (roworiented) { 471 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 472 } else { 473 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 474 } 475 } 476 } 477 } 478 PetscFunctionReturn(0); 479 } 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "MatGetValues_MPISBAIJ" 483 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 484 { 485 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 486 PetscErrorCode ierr; 487 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 488 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 489 490 PetscFunctionBegin; 491 for (i=0; i<m; i++) { 492 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 493 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 494 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 495 row = idxm[i] - bsrstart; 496 for (j=0; j<n; j++) { 497 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 498 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 499 if (idxn[j] >= bscstart && idxn[j] < bscend){ 500 col = idxn[j] - bscstart; 501 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 502 } else { 503 if (!baij->colmap) { 504 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 505 } 506 #if defined (PETSC_USE_CTABLE) 507 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 508 data --; 509 #else 510 data = baij->colmap[idxn[j]/bs]-1; 511 #endif 512 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 513 else { 514 col = data + idxn[j]%bs; 515 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 516 } 517 } 518 } 519 } else { 520 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 521 } 522 } 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatNorm_MPISBAIJ" 528 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 529 { 530 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 531 PetscErrorCode ierr; 532 PetscReal sum[2],*lnorm2; 533 534 PetscFunctionBegin; 535 if (baij->size == 1) { 536 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 537 } else { 538 if (type == NORM_FROBENIUS) { 539 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 540 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 541 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 542 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 543 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 544 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 545 *norm = sqrt(sum[0] + 2*sum[1]); 546 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 547 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 548 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 549 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 550 PetscReal *rsum,*rsum2,vabs; 551 PetscInt *jj,*garray=baij->garray,rstart=baij->rstart,nz; 552 PetscInt brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs; 553 MatScalar *v; 554 555 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 556 rsum2 = rsum + mat->N; 557 ierr = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 558 /* Amat */ 559 v = amat->a; jj = amat->j; 560 for (brow=0; brow<mbs; brow++) { 561 grow = bs*(rstart + brow); 562 nz = amat->i[brow+1] - amat->i[brow]; 563 for (bcol=0; bcol<nz; bcol++){ 564 gcol = bs*(rstart + *jj); jj++; 565 for (col=0; col<bs; col++){ 566 for (row=0; row<bs; row++){ 567 vabs = PetscAbsScalar(*v); v++; 568 rsum[gcol+col] += vabs; 569 /* non-diagonal block */ 570 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 571 } 572 } 573 } 574 } 575 /* Bmat */ 576 v = bmat->a; jj = bmat->j; 577 for (brow=0; brow<mbs; brow++) { 578 grow = bs*(rstart + brow); 579 nz = bmat->i[brow+1] - bmat->i[brow]; 580 for (bcol=0; bcol<nz; bcol++){ 581 gcol = bs*garray[*jj]; jj++; 582 for (col=0; col<bs; col++){ 583 for (row=0; row<bs; row++){ 584 vabs = PetscAbsScalar(*v); v++; 585 rsum[gcol+col] += vabs; 586 rsum[grow+row] += vabs; 587 } 588 } 589 } 590 } 591 ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 592 *norm = 0.0; 593 for (col=0; col<mat->N; col++) { 594 if (rsum2[col] > *norm) *norm = rsum2[col]; 595 } 596 ierr = PetscFree(rsum);CHKERRQ(ierr); 597 } else { 598 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 606 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 607 { 608 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 609 PetscErrorCode ierr; 610 PetscInt nstash,reallocs; 611 InsertMode addv; 612 613 PetscFunctionBegin; 614 if (baij->donotstash) { 615 PetscFunctionReturn(0); 616 } 617 618 /* make sure all processors are either in INSERTMODE or ADDMODE */ 619 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 620 if (addv == (ADD_VALUES|INSERT_VALUES)) { 621 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 622 } 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 626 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 627 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 628 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 629 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 630 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 636 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 639 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 640 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 641 PetscErrorCode ierr; 642 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 643 PetscInt *row,*col,other_disassembled; 644 PetscMPIInt n; 645 PetscTruth r1,r2,r3; 646 MatScalar *val; 647 InsertMode addv = mat->insertmode; 648 649 PetscFunctionBegin; 650 651 if (!baij->donotstash) { 652 while (1) { 653 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 654 if (!flg) break; 655 656 for (i=0; i<n;) { 657 /* Now identify the consecutive vals belonging to the same row */ 658 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 659 if (j < n) ncols = j-i; 660 else ncols = n-i; 661 /* Now assemble all these values with a single function call */ 662 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 663 i = j; 664 } 665 } 666 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 667 /* Now process the block-stash. Since the values are stashed column-oriented, 668 set the roworiented flag to column oriented, and after MatSetValues() 669 restore the original flags */ 670 r1 = baij->roworiented; 671 r2 = a->roworiented; 672 r3 = b->roworiented; 673 baij->roworiented = PETSC_FALSE; 674 a->roworiented = PETSC_FALSE; 675 b->roworiented = PETSC_FALSE; 676 while (1) { 677 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 678 if (!flg) break; 679 680 for (i=0; i<n;) { 681 /* Now identify the consecutive vals belonging to the same row */ 682 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 683 if (j < n) ncols = j-i; 684 else ncols = n-i; 685 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 686 i = j; 687 } 688 } 689 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 690 baij->roworiented = r1; 691 a->roworiented = r2; 692 b->roworiented = r3; 693 } 694 695 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 696 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 697 698 /* determine if any processor has disassembled, if so we must 699 also disassemble ourselfs, in order that we may reassemble. */ 700 /* 701 if nonzero structure of submatrix B cannot change then we know that 702 no processor disassembled thus we can skip this stuff 703 */ 704 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 705 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 706 if (mat->was_assembled && !other_disassembled) { 707 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 708 } 709 } 710 711 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 712 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 713 } 714 b->compressedrow.use = PETSC_TRUE; 715 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 716 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 717 718 if (baij->rowvalues) { 719 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 720 baij->rowvalues = 0; 721 } 722 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 728 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 729 { 730 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 731 PetscErrorCode ierr; 732 PetscInt bs = mat->bs; 733 PetscMPIInt size = baij->size,rank = baij->rank; 734 PetscTruth iascii,isdraw; 735 PetscViewer sviewer; 736 PetscViewerFormat format; 737 738 PetscFunctionBegin; 739 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 740 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 741 if (iascii) { 742 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 743 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 744 MatInfo info; 745 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 746 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 747 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 748 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 749 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 750 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 751 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 752 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 753 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 754 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 755 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 756 PetscFunctionReturn(0); 757 } else if (format == PETSC_VIEWER_ASCII_INFO) { 758 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } 761 } 762 763 if (isdraw) { 764 PetscDraw draw; 765 PetscTruth isnull; 766 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 767 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 768 } 769 770 if (size == 1) { 771 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 772 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 773 } else { 774 /* assemble the entire matrix onto first processor. */ 775 Mat A; 776 Mat_SeqSBAIJ *Aloc; 777 Mat_SeqBAIJ *Bloc; 778 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 779 MatScalar *a; 780 781 /* Should this be the same type as mat? */ 782 if (!rank) { 783 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 784 } else { 785 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 786 } 787 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 788 ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 789 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 790 791 /* copy over the A part */ 792 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 793 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 794 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 795 796 for (i=0; i<mbs; i++) { 797 rvals[0] = bs*(baij->rstart + i); 798 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 799 for (j=ai[i]; j<ai[i+1]; j++) { 800 col = (baij->cstart+aj[j])*bs; 801 for (k=0; k<bs; k++) { 802 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 803 col++; a += bs; 804 } 805 } 806 } 807 /* copy over the B part */ 808 Bloc = (Mat_SeqBAIJ*)baij->B->data; 809 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 810 for (i=0; i<mbs; i++) { 811 rvals[0] = bs*(baij->rstart + i); 812 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 813 for (j=ai[i]; j<ai[i+1]; j++) { 814 col = baij->garray[aj[j]]*bs; 815 for (k=0; k<bs; k++) { 816 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 817 col++; a += bs; 818 } 819 } 820 } 821 ierr = PetscFree(rvals);CHKERRQ(ierr); 822 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 824 /* 825 Everyone has to call to draw the matrix since the graphics waits are 826 synchronized across all processors that share the PetscDraw object 827 */ 828 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 829 if (!rank) { 830 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 831 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 832 } 833 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 834 ierr = MatDestroy(A);CHKERRQ(ierr); 835 } 836 PetscFunctionReturn(0); 837 } 838 839 #undef __FUNCT__ 840 #define __FUNCT__ "MatView_MPISBAIJ" 841 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 842 { 843 PetscErrorCode ierr; 844 PetscTruth iascii,isdraw,issocket,isbinary; 845 846 PetscFunctionBegin; 847 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 848 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 849 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 850 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 851 if (iascii || isdraw || issocket || isbinary) { 852 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 853 } else { 854 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 855 } 856 PetscFunctionReturn(0); 857 } 858 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatDestroy_MPISBAIJ" 861 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 862 { 863 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 864 PetscErrorCode ierr; 865 866 PetscFunctionBegin; 867 #if defined(PETSC_USE_LOG) 868 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 869 #endif 870 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 871 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 872 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 873 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 874 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 875 #if defined (PETSC_USE_CTABLE) 876 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 877 #else 878 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 879 #endif 880 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 881 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 882 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 883 if (baij->slvec0) { 884 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 885 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 886 } 887 if (baij->slvec1) { 888 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 889 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 890 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 891 } 892 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 893 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 894 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 895 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 896 #if defined(PETSC_USE_MAT_SINGLE) 897 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 898 #endif 899 ierr = PetscFree(baij);CHKERRQ(ierr); 900 901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 905 PetscFunctionReturn(0); 906 } 907 908 #undef __FUNCT__ 909 #define __FUNCT__ "MatMult_MPISBAIJ" 910 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 911 { 912 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 913 PetscErrorCode ierr; 914 PetscInt nt,mbs=a->mbs,bs=A->bs; 915 PetscScalar *x,*from,zero=0.0; 916 917 PetscFunctionBegin; 918 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 919 if (nt != A->n) { 920 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 921 } 922 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 923 if (nt != A->m) { 924 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 925 } 926 927 /* diagonal part */ 928 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 929 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 930 931 /* subdiagonal part */ 932 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 933 934 /* copy x into the vec slvec0 */ 935 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 936 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 937 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 938 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 939 940 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 941 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 942 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 943 944 /* supperdiagonal part */ 945 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 946 947 PetscFunctionReturn(0); 948 } 949 950 #undef __FUNCT__ 951 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 952 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 953 { 954 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 955 PetscErrorCode ierr; 956 PetscInt nt; 957 958 PetscFunctionBegin; 959 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 960 if (nt != A->n) { 961 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 962 } 963 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 964 if (nt != A->m) { 965 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 966 } 967 968 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 969 /* do diagonal part */ 970 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 971 /* do supperdiagonal part */ 972 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 973 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 974 /* do subdiagonal part */ 975 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 976 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 977 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 978 979 PetscFunctionReturn(0); 980 } 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 984 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 985 { 986 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 987 PetscErrorCode ierr; 988 PetscInt mbs=a->mbs,bs=A->bs; 989 PetscScalar *x,*from,zero=0.0; 990 991 PetscFunctionBegin; 992 /* 993 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 994 PetscSynchronizedFlush(A->comm); 995 */ 996 /* diagonal part */ 997 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 998 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 999 1000 /* subdiagonal part */ 1001 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1002 1003 /* copy x into the vec slvec0 */ 1004 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1005 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1006 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1007 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1008 1009 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1010 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1011 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1012 1013 /* supperdiagonal part */ 1014 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1015 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1021 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1022 { 1023 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1024 PetscErrorCode ierr; 1025 1026 PetscFunctionBegin; 1027 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1028 /* do diagonal part */ 1029 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1030 /* do supperdiagonal part */ 1031 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1032 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1033 1034 /* do subdiagonal part */ 1035 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1036 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1038 1039 PetscFunctionReturn(0); 1040 } 1041 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1044 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1045 { 1046 PetscErrorCode ierr; 1047 1048 PetscFunctionBegin; 1049 ierr = MatMult(A,xx,yy);CHKERRQ(ierr); 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1055 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1056 { 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 /* 1065 This only works correctly for square matrices where the subblock A->A is the 1066 diagonal block 1067 */ 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1070 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1071 { 1072 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1073 PetscErrorCode ierr; 1074 1075 PetscFunctionBegin; 1076 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1077 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatScale_MPISBAIJ" 1083 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1084 { 1085 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1086 PetscErrorCode ierr; 1087 1088 PetscFunctionBegin; 1089 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1090 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1096 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1097 { 1098 PetscFunctionBegin; 1099 SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format"); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1105 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1106 { 1107 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1108 1109 PetscFunctionBegin; 1110 if (!baij->getrowactive) { 1111 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1112 } 1113 baij->getrowactive = PETSC_FALSE; 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1119 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1120 { 1121 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1126 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1132 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1133 { 1134 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1135 Mat A = a->A,B = a->B; 1136 PetscErrorCode ierr; 1137 PetscReal isend[5],irecv[5]; 1138 1139 PetscFunctionBegin; 1140 info->block_size = (PetscReal)matin->bs; 1141 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1142 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1143 isend[3] = info->memory; isend[4] = info->mallocs; 1144 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1145 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1146 isend[3] += info->memory; isend[4] += info->mallocs; 1147 if (flag == MAT_LOCAL) { 1148 info->nz_used = isend[0]; 1149 info->nz_allocated = isend[1]; 1150 info->nz_unneeded = isend[2]; 1151 info->memory = isend[3]; 1152 info->mallocs = isend[4]; 1153 } else if (flag == MAT_GLOBAL_MAX) { 1154 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1155 info->nz_used = irecv[0]; 1156 info->nz_allocated = irecv[1]; 1157 info->nz_unneeded = irecv[2]; 1158 info->memory = irecv[3]; 1159 info->mallocs = irecv[4]; 1160 } else if (flag == MAT_GLOBAL_SUM) { 1161 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1162 info->nz_used = irecv[0]; 1163 info->nz_allocated = irecv[1]; 1164 info->nz_unneeded = irecv[2]; 1165 info->memory = irecv[3]; 1166 info->mallocs = irecv[4]; 1167 } else { 1168 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1169 } 1170 info->rows_global = (PetscReal)A->M; 1171 info->columns_global = (PetscReal)A->N; 1172 info->rows_local = (PetscReal)A->m; 1173 info->columns_local = (PetscReal)A->N; 1174 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1175 info->fill_ratio_needed = 0; 1176 info->factor_mallocs = 0; 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1182 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1183 { 1184 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1185 PetscErrorCode ierr; 1186 1187 PetscFunctionBegin; 1188 switch (op) { 1189 case MAT_NO_NEW_NONZERO_LOCATIONS: 1190 case MAT_YES_NEW_NONZERO_LOCATIONS: 1191 case MAT_COLUMNS_UNSORTED: 1192 case MAT_COLUMNS_SORTED: 1193 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1194 case MAT_KEEP_ZEROED_ROWS: 1195 case MAT_NEW_NONZERO_LOCATION_ERR: 1196 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1197 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1198 break; 1199 case MAT_ROW_ORIENTED: 1200 a->roworiented = PETSC_TRUE; 1201 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1202 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1203 break; 1204 case MAT_ROWS_SORTED: 1205 case MAT_ROWS_UNSORTED: 1206 case MAT_YES_NEW_DIAGONALS: 1207 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1208 break; 1209 case MAT_COLUMN_ORIENTED: 1210 a->roworiented = PETSC_FALSE; 1211 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1212 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1213 break; 1214 case MAT_IGNORE_OFF_PROC_ENTRIES: 1215 a->donotstash = PETSC_TRUE; 1216 break; 1217 case MAT_NO_NEW_DIAGONALS: 1218 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1219 case MAT_USE_HASH_TABLE: 1220 a->ht_flag = PETSC_TRUE; 1221 break; 1222 case MAT_NOT_SYMMETRIC: 1223 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1224 case MAT_HERMITIAN: 1225 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1226 case MAT_SYMMETRIC: 1227 case MAT_STRUCTURALLY_SYMMETRIC: 1228 case MAT_NOT_HERMITIAN: 1229 case MAT_SYMMETRY_ETERNAL: 1230 case MAT_NOT_SYMMETRY_ETERNAL: 1231 break; 1232 default: 1233 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1234 } 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1240 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1241 { 1242 PetscErrorCode ierr; 1243 PetscFunctionBegin; 1244 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1250 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1251 { 1252 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1253 Mat a=baij->A, b=baij->B; 1254 PetscErrorCode ierr; 1255 PetscInt nv,m,n; 1256 PetscTruth flg; 1257 1258 PetscFunctionBegin; 1259 if (ll != rr){ 1260 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1261 if (!flg) 1262 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1263 } 1264 if (!ll) PetscFunctionReturn(0); 1265 1266 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1267 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1268 1269 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1270 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1271 1272 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1273 1274 /* left diagonalscale the off-diagonal part */ 1275 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1276 1277 /* scale the diagonal part */ 1278 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1279 1280 /* right diagonalscale the off-diagonal part */ 1281 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1282 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1288 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1289 { 1290 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1291 MPI_Comm comm = A->comm; 1292 static PetscTruth called = PETSC_FALSE; 1293 PetscErrorCode ierr; 1294 1295 PetscFunctionBegin; 1296 if (!a->rank) { 1297 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1298 } 1299 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1300 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1301 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1307 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1308 { 1309 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1310 PetscErrorCode ierr; 1311 1312 PetscFunctionBegin; 1313 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1318 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatEqual_MPISBAIJ" 1321 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1322 { 1323 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1324 Mat a,b,c,d; 1325 PetscTruth flg; 1326 PetscErrorCode ierr; 1327 1328 PetscFunctionBegin; 1329 a = matA->A; b = matA->B; 1330 c = matB->A; d = matB->B; 1331 1332 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1333 if (flg) { 1334 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1335 } 1336 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1342 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1343 { 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1353 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1354 { 1355 PetscErrorCode ierr; 1356 PetscInt i; 1357 PetscTruth flg; 1358 1359 PetscFunctionBegin; 1360 for (i=0; i<n; i++) { 1361 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1362 if (!flg) { 1363 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1364 } 1365 } 1366 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 1371 /* -------------------------------------------------------------------*/ 1372 static struct _MatOps MatOps_Values = { 1373 MatSetValues_MPISBAIJ, 1374 MatGetRow_MPISBAIJ, 1375 MatRestoreRow_MPISBAIJ, 1376 MatMult_MPISBAIJ, 1377 /* 4*/ MatMultAdd_MPISBAIJ, 1378 MatMultTranspose_MPISBAIJ, 1379 MatMultTransposeAdd_MPISBAIJ, 1380 0, 1381 0, 1382 0, 1383 /*10*/ 0, 1384 0, 1385 0, 1386 MatRelax_MPISBAIJ, 1387 MatTranspose_MPISBAIJ, 1388 /*15*/ MatGetInfo_MPISBAIJ, 1389 MatEqual_MPISBAIJ, 1390 MatGetDiagonal_MPISBAIJ, 1391 MatDiagonalScale_MPISBAIJ, 1392 MatNorm_MPISBAIJ, 1393 /*20*/ MatAssemblyBegin_MPISBAIJ, 1394 MatAssemblyEnd_MPISBAIJ, 1395 0, 1396 MatSetOption_MPISBAIJ, 1397 MatZeroEntries_MPISBAIJ, 1398 /*25*/ 0, 1399 0, 1400 0, 1401 0, 1402 0, 1403 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1404 0, 1405 0, 1406 0, 1407 0, 1408 /*35*/ MatDuplicate_MPISBAIJ, 1409 0, 1410 0, 1411 0, 1412 0, 1413 /*40*/ 0, 1414 MatGetSubMatrices_MPISBAIJ, 1415 MatIncreaseOverlap_MPISBAIJ, 1416 MatGetValues_MPISBAIJ, 1417 0, 1418 /*45*/ MatPrintHelp_MPISBAIJ, 1419 MatScale_MPISBAIJ, 1420 0, 1421 0, 1422 0, 1423 /*50*/ 0, 1424 0, 1425 0, 1426 0, 1427 0, 1428 /*55*/ 0, 1429 0, 1430 MatSetUnfactored_MPISBAIJ, 1431 0, 1432 MatSetValuesBlocked_MPISBAIJ, 1433 /*60*/ 0, 1434 0, 1435 0, 1436 MatGetPetscMaps_Petsc, 1437 0, 1438 /*65*/ 0, 1439 0, 1440 0, 1441 0, 1442 0, 1443 /*70*/ MatGetRowMax_MPISBAIJ, 1444 0, 1445 0, 1446 0, 1447 0, 1448 /*75*/ 0, 1449 0, 1450 0, 1451 0, 1452 0, 1453 /*80*/ 0, 1454 0, 1455 0, 1456 0, 1457 MatLoad_MPISBAIJ, 1458 /*85*/ 0, 1459 0, 1460 0, 1461 0, 1462 0, 1463 /*90*/ 0, 1464 0, 1465 0, 1466 0, 1467 0, 1468 /*95*/ 0, 1469 0, 1470 0, 1471 0}; 1472 1473 1474 EXTERN_C_BEGIN 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1477 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1478 { 1479 PetscFunctionBegin; 1480 *a = ((Mat_MPISBAIJ *)A->data)->A; 1481 *iscopy = PETSC_FALSE; 1482 PetscFunctionReturn(0); 1483 } 1484 EXTERN_C_END 1485 1486 EXTERN_C_BEGIN 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1489 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1490 { 1491 Mat_MPISBAIJ *b; 1492 PetscErrorCode ierr; 1493 PetscInt i,mbs,Mbs; 1494 1495 PetscFunctionBegin; 1496 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1497 1498 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1499 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1500 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1501 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1502 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1503 if (d_nnz) { 1504 for (i=0; i<B->m/bs; i++) { 1505 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1506 } 1507 } 1508 if (o_nnz) { 1509 for (i=0; i<B->m/bs; i++) { 1510 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1511 } 1512 } 1513 B->preallocated = PETSC_TRUE; 1514 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1515 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1516 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1517 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1518 1519 b = (Mat_MPISBAIJ*)B->data; 1520 mbs = B->m/bs; 1521 Mbs = B->M/bs; 1522 if (mbs*bs != B->m) { 1523 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs); 1524 } 1525 1526 B->bs = bs; 1527 b->bs2 = bs*bs; 1528 b->mbs = mbs; 1529 b->nbs = mbs; 1530 b->Mbs = Mbs; 1531 b->Nbs = Mbs; 1532 1533 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1534 b->rowners[0] = 0; 1535 for (i=2; i<=b->size; i++) { 1536 b->rowners[i] += b->rowners[i-1]; 1537 } 1538 b->rstart = b->rowners[b->rank]; 1539 b->rend = b->rowners[b->rank+1]; 1540 b->cstart = b->rstart; 1541 b->cend = b->rend; 1542 for (i=0; i<=b->size; i++) { 1543 b->rowners_bs[i] = b->rowners[i]*bs; 1544 } 1545 b->rstart_bs = b-> rstart*bs; 1546 b->rend_bs = b->rend*bs; 1547 1548 b->cstart_bs = b->cstart*bs; 1549 b->cend_bs = b->cend*bs; 1550 1551 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr); 1552 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1553 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1554 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1555 1556 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr); 1557 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1558 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1559 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1560 1561 /* build cache for off array entries formed */ 1562 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1563 1564 PetscFunctionReturn(0); 1565 } 1566 EXTERN_C_END 1567 1568 /*MC 1569 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1570 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1571 1572 Options Database Keys: 1573 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1574 1575 Level: beginner 1576 1577 .seealso: MatCreateMPISBAIJ 1578 M*/ 1579 1580 EXTERN_C_BEGIN 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatCreate_MPISBAIJ" 1583 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1584 { 1585 Mat_MPISBAIJ *b; 1586 PetscErrorCode ierr; 1587 PetscTruth flg; 1588 1589 PetscFunctionBegin; 1590 1591 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1592 B->data = (void*)b; 1593 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1594 1595 B->ops->destroy = MatDestroy_MPISBAIJ; 1596 B->ops->view = MatView_MPISBAIJ; 1597 B->mapping = 0; 1598 B->factor = 0; 1599 B->assembled = PETSC_FALSE; 1600 1601 B->insertmode = NOT_SET_VALUES; 1602 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1603 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1604 1605 /* build local table of row and column ownerships */ 1606 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 1607 b->cowners = b->rowners + b->size + 2; 1608 b->rowners_bs = b->cowners + b->size + 2; 1609 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1610 1611 /* build cache for off array entries formed */ 1612 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1613 b->donotstash = PETSC_FALSE; 1614 b->colmap = PETSC_NULL; 1615 b->garray = PETSC_NULL; 1616 b->roworiented = PETSC_TRUE; 1617 1618 #if defined(PETSC_USE_MAT_SINGLE) 1619 /* stuff for MatSetValues_XXX in single precision */ 1620 b->setvalueslen = 0; 1621 b->setvaluescopy = PETSC_NULL; 1622 #endif 1623 1624 /* stuff used in block assembly */ 1625 b->barray = 0; 1626 1627 /* stuff used for matrix vector multiply */ 1628 b->lvec = 0; 1629 b->Mvctx = 0; 1630 b->slvec0 = 0; 1631 b->slvec0b = 0; 1632 b->slvec1 = 0; 1633 b->slvec1a = 0; 1634 b->slvec1b = 0; 1635 b->sMvctx = 0; 1636 1637 /* stuff for MatGetRow() */ 1638 b->rowindices = 0; 1639 b->rowvalues = 0; 1640 b->getrowactive = PETSC_FALSE; 1641 1642 /* hash table stuff */ 1643 b->ht = 0; 1644 b->hd = 0; 1645 b->ht_size = 0; 1646 b->ht_flag = PETSC_FALSE; 1647 b->ht_fact = 0; 1648 b->ht_total_ct = 0; 1649 b->ht_insert_ct = 0; 1650 1651 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1652 if (flg) { 1653 PetscReal fact = 1.39; 1654 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1655 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1656 if (fact <= 1.0) fact = 1.39; 1657 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1658 ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 1659 } 1660 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1661 "MatStoreValues_MPISBAIJ", 1662 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1663 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1664 "MatRetrieveValues_MPISBAIJ", 1665 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1666 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1667 "MatGetDiagonalBlock_MPISBAIJ", 1668 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1669 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1670 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1671 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1672 B->symmetric = PETSC_TRUE; 1673 B->structurally_symmetric = PETSC_TRUE; 1674 B->symmetric_set = PETSC_TRUE; 1675 B->structurally_symmetric_set = PETSC_TRUE; 1676 PetscFunctionReturn(0); 1677 } 1678 EXTERN_C_END 1679 1680 /*MC 1681 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1682 1683 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1684 and MATMPISBAIJ otherwise. 1685 1686 Options Database Keys: 1687 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1688 1689 Level: beginner 1690 1691 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1692 M*/ 1693 1694 EXTERN_C_BEGIN 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatCreate_SBAIJ" 1697 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1698 { 1699 PetscErrorCode ierr; 1700 PetscMPIInt size; 1701 1702 PetscFunctionBegin; 1703 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1704 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1705 if (size == 1) { 1706 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1707 } else { 1708 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1709 } 1710 PetscFunctionReturn(0); 1711 } 1712 EXTERN_C_END 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1716 /*@C 1717 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1718 the user should preallocate the matrix storage by setting the parameters 1719 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1720 performance can be increased by more than a factor of 50. 1721 1722 Collective on Mat 1723 1724 Input Parameters: 1725 + A - the matrix 1726 . bs - size of blockk 1727 . d_nz - number of block nonzeros per block row in diagonal portion of local 1728 submatrix (same for all local rows) 1729 . d_nnz - array containing the number of block nonzeros in the various block rows 1730 in the upper triangular and diagonal part of the in diagonal portion of the local 1731 (possibly different for each block row) or PETSC_NULL. You must leave room 1732 for the diagonal entry even if it is zero. 1733 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1734 submatrix (same for all local rows). 1735 - o_nnz - array containing the number of nonzeros in the various block rows of the 1736 off-diagonal portion of the local submatrix (possibly different for 1737 each block row) or PETSC_NULL. 1738 1739 1740 Options Database Keys: 1741 . -mat_no_unroll - uses code that does not unroll the loops in the 1742 block calculations (much slower) 1743 . -mat_block_size - size of the blocks to use 1744 1745 Notes: 1746 1747 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1748 than it must be used on all processors that share the object for that argument. 1749 1750 If the *_nnz parameter is given then the *_nz parameter is ignored 1751 1752 Storage Information: 1753 For a square global matrix we define each processor's diagonal portion 1754 to be its local rows and the corresponding columns (a square submatrix); 1755 each processor's off-diagonal portion encompasses the remainder of the 1756 local matrix (a rectangular submatrix). 1757 1758 The user can specify preallocated storage for the diagonal part of 1759 the local submatrix with either d_nz or d_nnz (not both). Set 1760 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1761 memory allocation. Likewise, specify preallocated storage for the 1762 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1763 1764 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1765 the figure below we depict these three local rows and all columns (0-11). 1766 1767 .vb 1768 0 1 2 3 4 5 6 7 8 9 10 11 1769 ------------------- 1770 row 3 | o o o d d d o o o o o o 1771 row 4 | o o o d d d o o o o o o 1772 row 5 | o o o d d d o o o o o o 1773 ------------------- 1774 .ve 1775 1776 Thus, any entries in the d locations are stored in the d (diagonal) 1777 submatrix, and any entries in the o locations are stored in the 1778 o (off-diagonal) submatrix. Note that the d matrix is stored in 1779 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1780 1781 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1782 plus the diagonal part of the d matrix, 1783 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1784 In general, for PDE problems in which most nonzeros are near the diagonal, 1785 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1786 or you will get TERRIBLE performance; see the users' manual chapter on 1787 matrices. 1788 1789 Level: intermediate 1790 1791 .keywords: matrix, block, aij, compressed row, sparse, parallel 1792 1793 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1794 @*/ 1795 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1796 { 1797 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1798 1799 PetscFunctionBegin; 1800 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1801 if (f) { 1802 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1803 } 1804 PetscFunctionReturn(0); 1805 } 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatCreateMPISBAIJ" 1809 /*@C 1810 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1811 (block compressed row). For good matrix assembly performance 1812 the user should preallocate the matrix storage by setting the parameters 1813 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1814 performance can be increased by more than a factor of 50. 1815 1816 Collective on MPI_Comm 1817 1818 Input Parameters: 1819 + comm - MPI communicator 1820 . bs - size of blockk 1821 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1822 This value should be the same as the local size used in creating the 1823 y vector for the matrix-vector product y = Ax. 1824 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1825 This value should be the same as the local size used in creating the 1826 x vector for the matrix-vector product y = Ax. 1827 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1828 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1829 . d_nz - number of block nonzeros per block row in diagonal portion of local 1830 submatrix (same for all local rows) 1831 . d_nnz - array containing the number of block nonzeros in the various block rows 1832 in the upper triangular portion of the in diagonal portion of the local 1833 (possibly different for each block block row) or PETSC_NULL. 1834 You must leave room for the diagonal entry even if it is zero. 1835 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1836 submatrix (same for all local rows). 1837 - o_nnz - array containing the number of nonzeros in the various block rows of the 1838 off-diagonal portion of the local submatrix (possibly different for 1839 each block row) or PETSC_NULL. 1840 1841 Output Parameter: 1842 . A - the matrix 1843 1844 Options Database Keys: 1845 . -mat_no_unroll - uses code that does not unroll the loops in the 1846 block calculations (much slower) 1847 . -mat_block_size - size of the blocks to use 1848 . -mat_mpi - use the parallel matrix data structures even on one processor 1849 (defaults to using SeqBAIJ format on one processor) 1850 1851 Notes: 1852 The number of rows and columns must be divisible by blocksize. 1853 1854 The user MUST specify either the local or global matrix dimensions 1855 (possibly both). 1856 1857 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1858 than it must be used on all processors that share the object for that argument. 1859 1860 If the *_nnz parameter is given then the *_nz parameter is ignored 1861 1862 Storage Information: 1863 For a square global matrix we define each processor's diagonal portion 1864 to be its local rows and the corresponding columns (a square submatrix); 1865 each processor's off-diagonal portion encompasses the remainder of the 1866 local matrix (a rectangular submatrix). 1867 1868 The user can specify preallocated storage for the diagonal part of 1869 the local submatrix with either d_nz or d_nnz (not both). Set 1870 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1871 memory allocation. Likewise, specify preallocated storage for the 1872 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1873 1874 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1875 the figure below we depict these three local rows and all columns (0-11). 1876 1877 .vb 1878 0 1 2 3 4 5 6 7 8 9 10 11 1879 ------------------- 1880 row 3 | o o o d d d o o o o o o 1881 row 4 | o o o d d d o o o o o o 1882 row 5 | o o o d d d o o o o o o 1883 ------------------- 1884 .ve 1885 1886 Thus, any entries in the d locations are stored in the d (diagonal) 1887 submatrix, and any entries in the o locations are stored in the 1888 o (off-diagonal) submatrix. Note that the d matrix is stored in 1889 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1890 1891 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1892 plus the diagonal part of the d matrix, 1893 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1894 In general, for PDE problems in which most nonzeros are near the diagonal, 1895 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1896 or you will get TERRIBLE performance; see the users' manual chapter on 1897 matrices. 1898 1899 Level: intermediate 1900 1901 .keywords: matrix, block, aij, compressed row, sparse, parallel 1902 1903 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1904 @*/ 1905 1906 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 1907 { 1908 PetscErrorCode ierr; 1909 PetscMPIInt size; 1910 1911 PetscFunctionBegin; 1912 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1913 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1914 if (size > 1) { 1915 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1916 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1917 } else { 1918 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1919 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1920 } 1921 PetscFunctionReturn(0); 1922 } 1923 1924 1925 #undef __FUNCT__ 1926 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1927 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1928 { 1929 Mat mat; 1930 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1931 PetscErrorCode ierr; 1932 PetscInt len=0,nt,bs=matin->bs,mbs=oldmat->mbs; 1933 PetscScalar *array; 1934 1935 PetscFunctionBegin; 1936 *newmat = 0; 1937 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1938 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1939 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1940 1941 mat->factor = matin->factor; 1942 mat->preallocated = PETSC_TRUE; 1943 mat->assembled = PETSC_TRUE; 1944 mat->insertmode = NOT_SET_VALUES; 1945 1946 a = (Mat_MPISBAIJ*)mat->data; 1947 mat->bs = matin->bs; 1948 a->bs2 = oldmat->bs2; 1949 a->mbs = oldmat->mbs; 1950 a->nbs = oldmat->nbs; 1951 a->Mbs = oldmat->Mbs; 1952 a->Nbs = oldmat->Nbs; 1953 1954 a->rstart = oldmat->rstart; 1955 a->rend = oldmat->rend; 1956 a->cstart = oldmat->cstart; 1957 a->cend = oldmat->cend; 1958 a->size = oldmat->size; 1959 a->rank = oldmat->rank; 1960 a->donotstash = oldmat->donotstash; 1961 a->roworiented = oldmat->roworiented; 1962 a->rowindices = 0; 1963 a->rowvalues = 0; 1964 a->getrowactive = PETSC_FALSE; 1965 a->barray = 0; 1966 a->rstart_bs = oldmat->rstart_bs; 1967 a->rend_bs = oldmat->rend_bs; 1968 a->cstart_bs = oldmat->cstart_bs; 1969 a->cend_bs = oldmat->cend_bs; 1970 1971 /* hash table stuff */ 1972 a->ht = 0; 1973 a->hd = 0; 1974 a->ht_size = 0; 1975 a->ht_flag = oldmat->ht_flag; 1976 a->ht_fact = oldmat->ht_fact; 1977 a->ht_total_ct = 0; 1978 a->ht_insert_ct = 0; 1979 1980 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1981 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1982 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 1983 if (oldmat->colmap) { 1984 #if defined (PETSC_USE_CTABLE) 1985 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1986 #else 1987 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1988 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1989 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1990 #endif 1991 } else a->colmap = 0; 1992 1993 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 1994 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1995 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1996 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 1997 } else a->garray = 0; 1998 1999 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2000 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2001 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2002 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2003 2004 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2005 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2006 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2007 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2008 2009 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2010 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2011 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2012 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2013 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2014 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2015 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2016 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2017 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2018 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2019 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2020 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2021 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2022 2023 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2024 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2025 a->sMvctx = oldmat->sMvctx; 2026 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2027 2028 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2029 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2030 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2031 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2032 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2033 *newmat = mat; 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #include "petscsys.h" 2038 2039 #undef __FUNCT__ 2040 #define __FUNCT__ "MatLoad_MPISBAIJ" 2041 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2042 { 2043 Mat A; 2044 PetscErrorCode ierr; 2045 PetscInt i,nz,j,rstart,rend; 2046 PetscScalar *vals,*buf; 2047 MPI_Comm comm = ((PetscObject)viewer)->comm; 2048 MPI_Status status; 2049 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners; 2050 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2051 PetscInt *locrowlens,*procsnz = 0,jj,*mycols,*ibuf; 2052 PetscInt bs=1,Mbs,mbs,extra_rows; 2053 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2054 PetscInt dcount,kmax,k,nzcount,tmp; 2055 int fd; 2056 2057 PetscFunctionBegin; 2058 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2059 2060 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2061 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2062 if (!rank) { 2063 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2064 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2065 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2066 if (header[3] < 0) { 2067 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2068 } 2069 } 2070 2071 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2072 M = header[1]; N = header[2]; 2073 2074 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2075 2076 /* 2077 This code adds extra rows to make sure the number of rows is 2078 divisible by the blocksize 2079 */ 2080 Mbs = M/bs; 2081 extra_rows = bs - M + bs*(Mbs); 2082 if (extra_rows == bs) extra_rows = 0; 2083 else Mbs++; 2084 if (extra_rows &&!rank) { 2085 ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2086 } 2087 2088 /* determine ownership of all rows */ 2089 mbs = Mbs/size + ((Mbs % size) > rank); 2090 m = mbs*bs; 2091 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2092 browners = rowners + size + 1; 2093 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2094 rowners[0] = 0; 2095 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2096 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2097 rstart = rowners[rank]; 2098 rend = rowners[rank+1]; 2099 2100 /* distribute row lengths to all processors */ 2101 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2102 if (!rank) { 2103 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2104 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2105 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2106 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2107 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2108 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2109 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2110 } else { 2111 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2112 } 2113 2114 if (!rank) { /* procs[0] */ 2115 /* calculate the number of nonzeros on each processor */ 2116 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2117 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2118 for (i=0; i<size; i++) { 2119 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2120 procsnz[i] += rowlengths[j]; 2121 } 2122 } 2123 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2124 2125 /* determine max buffer needed and allocate it */ 2126 maxnz = 0; 2127 for (i=0; i<size; i++) { 2128 maxnz = PetscMax(maxnz,procsnz[i]); 2129 } 2130 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2131 2132 /* read in my part of the matrix column indices */ 2133 nz = procsnz[0]; 2134 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2135 mycols = ibuf; 2136 if (size == 1) nz -= extra_rows; 2137 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2138 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2139 2140 /* read in every ones (except the last) and ship off */ 2141 for (i=1; i<size-1; i++) { 2142 nz = procsnz[i]; 2143 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2144 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2145 } 2146 /* read in the stuff for the last proc */ 2147 if (size != 1) { 2148 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2149 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2150 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2151 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2152 } 2153 ierr = PetscFree(cols);CHKERRQ(ierr); 2154 } else { /* procs[i], i>0 */ 2155 /* determine buffer space needed for message */ 2156 nz = 0; 2157 for (i=0; i<m; i++) { 2158 nz += locrowlens[i]; 2159 } 2160 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2161 mycols = ibuf; 2162 /* receive message of column indices*/ 2163 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2164 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2165 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2166 } 2167 2168 /* loop over local rows, determining number of off diagonal entries */ 2169 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2170 odlens = dlens + (rend-rstart); 2171 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2172 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2173 masked1 = mask + Mbs; 2174 masked2 = masked1 + Mbs; 2175 rowcount = 0; nzcount = 0; 2176 for (i=0; i<mbs; i++) { 2177 dcount = 0; 2178 odcount = 0; 2179 for (j=0; j<bs; j++) { 2180 kmax = locrowlens[rowcount]; 2181 for (k=0; k<kmax; k++) { 2182 tmp = mycols[nzcount++]/bs; /* block col. index */ 2183 if (!mask[tmp]) { 2184 mask[tmp] = 1; 2185 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2186 else masked1[dcount++] = tmp; /* entry in diag portion */ 2187 } 2188 } 2189 rowcount++; 2190 } 2191 2192 dlens[i] = dcount; /* d_nzz[i] */ 2193 odlens[i] = odcount; /* o_nzz[i] */ 2194 2195 /* zero out the mask elements we set */ 2196 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2197 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2198 } 2199 2200 /* create our matrix */ 2201 ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr); 2202 ierr = MatSetType(A,type);CHKERRQ(ierr); 2203 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2204 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2205 2206 if (!rank) { 2207 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2208 /* read in my part of the matrix numerical values */ 2209 nz = procsnz[0]; 2210 vals = buf; 2211 mycols = ibuf; 2212 if (size == 1) nz -= extra_rows; 2213 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2214 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2215 2216 /* insert into matrix */ 2217 jj = rstart*bs; 2218 for (i=0; i<m; i++) { 2219 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2220 mycols += locrowlens[i]; 2221 vals += locrowlens[i]; 2222 jj++; 2223 } 2224 2225 /* read in other processors (except the last one) and ship out */ 2226 for (i=1; i<size-1; i++) { 2227 nz = procsnz[i]; 2228 vals = buf; 2229 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2230 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2231 } 2232 /* the last proc */ 2233 if (size != 1){ 2234 nz = procsnz[i] - extra_rows; 2235 vals = buf; 2236 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2237 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2238 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2239 } 2240 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2241 2242 } else { 2243 /* receive numeric values */ 2244 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2245 2246 /* receive message of values*/ 2247 vals = buf; 2248 mycols = ibuf; 2249 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2250 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2251 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2252 2253 /* insert into matrix */ 2254 jj = rstart*bs; 2255 for (i=0; i<m; i++) { 2256 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2257 mycols += locrowlens[i]; 2258 vals += locrowlens[i]; 2259 jj++; 2260 } 2261 } 2262 2263 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2264 ierr = PetscFree(buf);CHKERRQ(ierr); 2265 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2266 ierr = PetscFree(rowners);CHKERRQ(ierr); 2267 ierr = PetscFree(dlens);CHKERRQ(ierr); 2268 ierr = PetscFree(mask);CHKERRQ(ierr); 2269 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2270 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2271 *newmat = A; 2272 PetscFunctionReturn(0); 2273 } 2274 2275 #undef __FUNCT__ 2276 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2277 /*XXXXX@ 2278 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2279 2280 Input Parameters: 2281 . mat - the matrix 2282 . fact - factor 2283 2284 Collective on Mat 2285 2286 Level: advanced 2287 2288 Notes: 2289 This can also be set by the command line option: -mat_use_hash_table fact 2290 2291 .keywords: matrix, hashtable, factor, HT 2292 2293 .seealso: MatSetOption() 2294 @XXXXX*/ 2295 2296 2297 #undef __FUNCT__ 2298 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2299 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2300 { 2301 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2302 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2303 PetscReal atmp; 2304 PetscReal *work,*svalues,*rvalues; 2305 PetscErrorCode ierr; 2306 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2307 PetscMPIInt rank,size; 2308 PetscInt *rowners_bs,dest,count,source; 2309 PetscScalar *va; 2310 MatScalar *ba; 2311 MPI_Status stat; 2312 2313 PetscFunctionBegin; 2314 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2315 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2316 2317 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2318 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2319 2320 bs = A->bs; 2321 mbs = a->mbs; 2322 Mbs = a->Mbs; 2323 ba = b->a; 2324 bi = b->i; 2325 bj = b->j; 2326 2327 /* find ownerships */ 2328 rowners_bs = a->rowners_bs; 2329 2330 /* each proc creates an array to be distributed */ 2331 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2332 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2333 2334 /* row_max for B */ 2335 if (rank != size-1){ 2336 for (i=0; i<mbs; i++) { 2337 ncols = bi[1] - bi[0]; bi++; 2338 brow = bs*i; 2339 for (j=0; j<ncols; j++){ 2340 bcol = bs*(*bj); 2341 for (kcol=0; kcol<bs; kcol++){ 2342 col = bcol + kcol; /* local col index */ 2343 col += rowners_bs[rank+1]; /* global col index */ 2344 for (krow=0; krow<bs; krow++){ 2345 atmp = PetscAbsScalar(*ba); ba++; 2346 row = brow + krow; /* local row index */ 2347 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2348 if (work[col] < atmp) work[col] = atmp; 2349 } 2350 } 2351 bj++; 2352 } 2353 } 2354 2355 /* send values to its owners */ 2356 for (dest=rank+1; dest<size; dest++){ 2357 svalues = work + rowners_bs[dest]; 2358 count = rowners_bs[dest+1]-rowners_bs[dest]; 2359 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2360 } 2361 } 2362 2363 /* receive values */ 2364 if (rank){ 2365 rvalues = work; 2366 count = rowners_bs[rank+1]-rowners_bs[rank]; 2367 for (source=0; source<rank; source++){ 2368 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2369 /* process values */ 2370 for (i=0; i<count; i++){ 2371 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2372 } 2373 } 2374 } 2375 2376 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2377 ierr = PetscFree(work);CHKERRQ(ierr); 2378 PetscFunctionReturn(0); 2379 } 2380 2381 #undef __FUNCT__ 2382 #define __FUNCT__ "MatRelax_MPISBAIJ" 2383 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2384 { 2385 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2386 PetscErrorCode ierr; 2387 PetscInt mbs=mat->mbs,bs=matin->bs; 2388 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2389 Vec bb1; 2390 2391 PetscFunctionBegin; 2392 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2393 if (bs > 1) 2394 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2395 2396 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2397 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2398 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2399 its--; 2400 } 2401 2402 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2403 while (its--){ 2404 2405 /* lower triangular part: slvec0b = - B^T*xx */ 2406 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2407 2408 /* copy xx into slvec0a */ 2409 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2410 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2411 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2412 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2413 2414 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2415 2416 /* copy bb into slvec1a */ 2417 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2418 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2419 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2420 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2421 2422 /* set slvec1b = 0 */ 2423 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2424 2425 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2426 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2427 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2428 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2429 2430 /* upper triangular part: bb1 = bb1 - B*x */ 2431 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2432 2433 /* local diagonal sweep */ 2434 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2435 } 2436 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2437 } else { 2438 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2439 } 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2445 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2446 { 2447 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2448 PetscErrorCode ierr; 2449 PetscScalar mone=-1.0; 2450 Vec lvec1,bb1; 2451 2452 PetscFunctionBegin; 2453 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2454 if (matin->bs > 1) 2455 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2456 2457 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2458 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2459 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2460 its--; 2461 } 2462 2463 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2464 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2465 while (its--){ 2466 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2467 2468 /* lower diagonal part: bb1 = bb - B^T*xx */ 2469 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2470 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2471 2472 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2473 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2474 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2475 2476 /* upper diagonal part: bb1 = bb1 - B*x */ 2477 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2478 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2479 2480 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2481 2482 /* diagonal sweep */ 2483 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2484 } 2485 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2486 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2487 } else { 2488 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2489 } 2490 PetscFunctionReturn(0); 2491 } 2492 2493