1 2 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 3 #include "mpisbaij.h" 4 #include "src/mat/impls/sbaij/seq/sbaij.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,int,IS[],int); 10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 22 23 /* UGLY, ugly, ugly 24 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 25 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 26 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 27 converts the entries into single precision and then calls ..._MatScalar() to put them 28 into the single precision data structures. 29 */ 30 #if defined(PETSC_USE_MAT_SINGLE) 31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 36 #else 37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 38 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 40 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 42 #endif 43 44 EXTERN_C_BEGIN 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 48 { 49 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 EXTERN_C_END 58 59 EXTERN_C_BEGIN 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 63 { 64 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 69 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 EXTERN_C_END 73 74 75 #define CHUNKSIZE 10 76 77 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 78 { \ 79 \ 80 brow = row/bs; \ 81 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 82 rmax = aimax[brow]; nrow = ailen[brow]; \ 83 bcol = col/bs; \ 84 ridx = row % bs; cidx = col % bs; \ 85 low = 0; high = nrow; \ 86 while (high-low > 3) { \ 87 t = (low+high)/2; \ 88 if (rp[t] > bcol) high = t; \ 89 else low = t; \ 90 } \ 91 for (_i=low; _i<high; _i++) { \ 92 if (rp[_i] > bcol) break; \ 93 if (rp[_i] == bcol) { \ 94 bap = ap + bs2*_i + bs*cidx + ridx; \ 95 if (addv == ADD_VALUES) *bap += value; \ 96 else *bap = value; \ 97 goto a_noinsert; \ 98 } \ 99 } \ 100 if (a->nonew == 1) goto a_noinsert; \ 101 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 102 if (nrow >= rmax) { \ 103 /* there is no extra room in row, therefore enlarge */ \ 104 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 105 MatScalar *new_a; \ 106 \ 107 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 108 \ 109 /* malloc new storage space */ \ 110 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 111 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 112 new_j = (int*)(new_a + bs2*new_nz); \ 113 new_i = new_j + new_nz; \ 114 \ 115 /* copy over old data into new slots */ \ 116 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 117 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 118 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 119 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 120 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 121 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 122 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 123 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 124 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 125 /* free up old matrix storage */ \ 126 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 127 if (!a->singlemalloc) { \ 128 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 129 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 130 } \ 131 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 132 a->singlemalloc = PETSC_TRUE; \ 133 \ 134 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 135 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 136 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 137 a->maxnz += bs2*CHUNKSIZE; \ 138 a->reallocs++; \ 139 a->nz++; \ 140 } \ 141 N = nrow++ - 1; \ 142 /* shift up all the later entries in this row */ \ 143 for (ii=N; ii>=_i; ii--) { \ 144 rp[ii+1] = rp[ii]; \ 145 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 146 } \ 147 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 148 rp[_i] = bcol; \ 149 ap[bs2*_i + bs*cidx + ridx] = value; \ 150 a_noinsert:; \ 151 ailen[brow] = nrow; \ 152 } 153 #ifndef MatSetValues_SeqBAIJ_B_Private 154 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 155 { \ 156 brow = row/bs; \ 157 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 158 rmax = bimax[brow]; nrow = bilen[brow]; \ 159 bcol = col/bs; \ 160 ridx = row % bs; cidx = col % bs; \ 161 low = 0; high = nrow; \ 162 while (high-low > 3) { \ 163 t = (low+high)/2; \ 164 if (rp[t] > bcol) high = t; \ 165 else low = t; \ 166 } \ 167 for (_i=low; _i<high; _i++) { \ 168 if (rp[_i] > bcol) break; \ 169 if (rp[_i] == bcol) { \ 170 bap = ap + bs2*_i + bs*cidx + ridx; \ 171 if (addv == ADD_VALUES) *bap += value; \ 172 else *bap = value; \ 173 goto b_noinsert; \ 174 } \ 175 } \ 176 if (b->nonew == 1) goto b_noinsert; \ 177 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 178 if (nrow >= rmax) { \ 179 /* there is no extra room in row, therefore enlarge */ \ 180 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 181 MatScalar *new_a; \ 182 \ 183 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 184 \ 185 /* malloc new storage space */ \ 186 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 187 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 188 new_j = (int*)(new_a + bs2*new_nz); \ 189 new_i = new_j + new_nz; \ 190 \ 191 /* copy over old data into new slots */ \ 192 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 193 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 194 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 195 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 196 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 197 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 199 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 200 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 201 /* free up old matrix storage */ \ 202 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 203 if (!b->singlemalloc) { \ 204 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 205 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 206 } \ 207 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 208 b->singlemalloc = PETSC_TRUE; \ 209 \ 210 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 211 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 212 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 213 b->maxnz += bs2*CHUNKSIZE; \ 214 b->reallocs++; \ 215 b->nz++; \ 216 } \ 217 N = nrow++ - 1; \ 218 /* shift up all the later entries in this row */ \ 219 for (ii=N; ii>=_i; ii--) { \ 220 rp[ii+1] = rp[ii]; \ 221 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 222 } \ 223 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 224 rp[_i] = bcol; \ 225 ap[bs2*_i + bs*cidx + ridx] = value; \ 226 b_noinsert:; \ 227 bilen[brow] = nrow; \ 228 } 229 #endif 230 231 #if defined(PETSC_USE_MAT_SINGLE) 232 #undef __FUNCT__ 233 #define __FUNCT__ "MatSetValues_MPISBAIJ" 234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 235 { 236 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 237 PetscErrorCode ierr; 238 int i,N = m*n; 239 MatScalar *vsingle; 240 241 PetscFunctionBegin; 242 if (N > b->setvalueslen) { 243 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 244 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 245 b->setvalueslen = N; 246 } 247 vsingle = b->setvaluescopy; 248 249 for (i=0; i<N; i++) { 250 vsingle[i] = v[i]; 251 } 252 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 259 { 260 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 261 PetscErrorCode ierr; 262 int i,N = m*n*b->bs2; 263 MatScalar *vsingle; 264 265 PetscFunctionBegin; 266 if (N > b->setvalueslen) { 267 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 268 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 269 b->setvalueslen = N; 270 } 271 vsingle = b->setvaluescopy; 272 for (i=0; i<N; i++) { 273 vsingle[i] = v[i]; 274 } 275 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 281 PetscErrorCode MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 282 { 283 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 284 PetscErrorCode ierr; 285 int i,N = m*n; 286 MatScalar *vsingle; 287 288 PetscFunctionBegin; 289 SETERRQ(1,"Function not yet written for SBAIJ format"); 290 /* PetscFunctionReturn(0); */ 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 295 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 296 { 297 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 298 PetscErrorCode ierr; 299 int i,N = m*n*b->bs2; 300 MatScalar *vsingle; 301 302 PetscFunctionBegin; 303 SETERRQ(1,"Function not yet written for SBAIJ format"); 304 /* PetscFunctionReturn(0); */ 305 } 306 #endif 307 308 /* Only add/insert a(i,j) with i<=j (blocks). 309 Any a(i,j) with i>j input by user is ingored. 310 */ 311 #undef __FUNCT__ 312 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 313 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 314 { 315 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 316 MatScalar value; 317 PetscTruth roworiented = baij->roworiented; 318 PetscErrorCode ierr; 319 int i,j,row,col; 320 int rstart_orig=baij->rstart_bs; 321 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 322 int cend_orig=baij->cend_bs,bs=baij->bs; 323 324 /* Some Variables required in the macro */ 325 Mat A = baij->A; 326 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 327 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 328 MatScalar *aa=a->a; 329 330 Mat B = baij->B; 331 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 332 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 333 MatScalar *ba=b->a; 334 335 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 336 int low,high,t,ridx,cidx,bs2=a->bs2; 337 MatScalar *ap,*bap; 338 339 /* for stash */ 340 int n_loc, *in_loc=0; 341 MatScalar *v_loc=0; 342 343 PetscFunctionBegin; 344 345 if(!baij->donotstash){ 346 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 347 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 348 } 349 350 for (i=0; i<m; i++) { 351 if (im[i] < 0) continue; 352 #if defined(PETSC_USE_BOPT_g) 353 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 354 #endif 355 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 356 row = im[i] - rstart_orig; /* local row index */ 357 for (j=0; j<n; j++) { 358 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 359 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 360 col = in[j] - cstart_orig; /* local col index */ 361 brow = row/bs; bcol = col/bs; 362 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 363 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 364 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 365 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 366 } else if (in[j] < 0) continue; 367 #if defined(PETSC_USE_BOPT_g) 368 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 369 #endif 370 else { /* off-diag entry (B) */ 371 if (mat->was_assembled) { 372 if (!baij->colmap) { 373 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 374 } 375 #if defined (PETSC_USE_CTABLE) 376 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 377 col = col - 1; 378 #else 379 col = baij->colmap[in[j]/bs] - 1; 380 #endif 381 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 382 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 383 col = in[j]; 384 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 385 B = baij->B; 386 b = (Mat_SeqBAIJ*)(B)->data; 387 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 388 ba=b->a; 389 } else col += in[j]%bs; 390 } else col = in[j]; 391 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 392 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 393 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 394 } 395 } 396 } else { /* off processor entry */ 397 if (!baij->donotstash) { 398 n_loc = 0; 399 for (j=0; j<n; j++){ 400 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 401 in_loc[n_loc] = in[j]; 402 if (roworiented) { 403 v_loc[n_loc] = v[i*n+j]; 404 } else { 405 v_loc[n_loc] = v[j*m+i]; 406 } 407 n_loc++; 408 } 409 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 410 } 411 } 412 } 413 414 if(!baij->donotstash){ 415 ierr = PetscFree(in_loc);CHKERRQ(ierr); 416 ierr = PetscFree(v_loc);CHKERRQ(ierr); 417 } 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 423 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 424 { 425 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 426 const MatScalar *value; 427 MatScalar *barray=baij->barray; 428 PetscTruth roworiented = baij->roworiented; 429 PetscErrorCode ierr; 430 int i,j,ii,jj,row,col,rstart=baij->rstart; 431 int rend=baij->rend,cstart=baij->cstart,stepval; 432 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 433 434 PetscFunctionBegin; 435 if(!barray) { 436 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 437 baij->barray = barray; 438 } 439 440 if (roworiented) { 441 stepval = (n-1)*bs; 442 } else { 443 stepval = (m-1)*bs; 444 } 445 for (i=0; i<m; i++) { 446 if (im[i] < 0) continue; 447 #if defined(PETSC_USE_BOPT_g) 448 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 449 #endif 450 if (im[i] >= rstart && im[i] < rend) { 451 row = im[i] - rstart; 452 for (j=0; j<n; j++) { 453 /* If NumCol = 1 then a copy is not required */ 454 if ((roworiented) && (n == 1)) { 455 barray = (MatScalar*) v + i*bs2; 456 } else if((!roworiented) && (m == 1)) { 457 barray = (MatScalar*) v + j*bs2; 458 } else { /* Here a copy is required */ 459 if (roworiented) { 460 value = v + i*(stepval+bs)*bs + j*bs; 461 } else { 462 value = v + j*(stepval+bs)*bs + i*bs; 463 } 464 for (ii=0; ii<bs; ii++,value+=stepval) { 465 for (jj=0; jj<bs; jj++) { 466 *barray++ = *value++; 467 } 468 } 469 barray -=bs2; 470 } 471 472 if (in[j] >= cstart && in[j] < cend){ 473 col = in[j] - cstart; 474 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 475 } 476 else if (in[j] < 0) continue; 477 #if defined(PETSC_USE_BOPT_g) 478 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 479 #endif 480 else { 481 if (mat->was_assembled) { 482 if (!baij->colmap) { 483 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 484 } 485 486 #if defined(PETSC_USE_BOPT_g) 487 #if defined (PETSC_USE_CTABLE) 488 { int data; 489 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 490 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 491 } 492 #else 493 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 494 #endif 495 #endif 496 #if defined (PETSC_USE_CTABLE) 497 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 498 col = (col - 1)/bs; 499 #else 500 col = (baij->colmap[in[j]] - 1)/bs; 501 #endif 502 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 503 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 504 col = in[j]; 505 } 506 } 507 else col = in[j]; 508 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 509 } 510 } 511 } else { 512 if (!baij->donotstash) { 513 if (roworiented) { 514 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 515 } else { 516 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 517 } 518 } 519 } 520 } 521 PetscFunctionReturn(0); 522 } 523 524 #define HASH_KEY 0.6180339887 525 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 526 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 527 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 528 #undef __FUNCT__ 529 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 530 PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 531 { 532 PetscFunctionBegin; 533 SETERRQ(1,"Function not yet written for SBAIJ format"); 534 /* PetscFunctionReturn(0); */ 535 } 536 537 #undef __FUNCT__ 538 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 539 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 540 { 541 PetscFunctionBegin; 542 SETERRQ(1,"Function not yet written for SBAIJ format"); 543 /* PetscFunctionReturn(0); */ 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 552 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 553 554 PetscFunctionBegin; 555 for (i=0; i<m; i++) { 556 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 557 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 558 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 559 row = idxm[i] - bsrstart; 560 for (j=0; j<n; j++) { 561 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]); 562 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 563 if (idxn[j] >= bscstart && idxn[j] < bscend){ 564 col = idxn[j] - bscstart; 565 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 566 } else { 567 if (!baij->colmap) { 568 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 569 } 570 #if defined (PETSC_USE_CTABLE) 571 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 572 data --; 573 #else 574 data = baij->colmap[idxn[j]/bs]-1; 575 #endif 576 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 577 else { 578 col = data + idxn[j]%bs; 579 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 580 } 581 } 582 } 583 } else { 584 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatNorm_MPISBAIJ" 592 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 593 { 594 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 595 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 596 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 597 PetscErrorCode ierr; 598 PetscReal sum[2],*lnorm2; 599 600 PetscFunctionBegin; 601 if (baij->size == 1) { 602 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 603 } else { 604 if (type == NORM_FROBENIUS) { 605 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 606 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 607 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 608 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 609 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 610 /* 611 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 612 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 613 */ 614 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 615 /* 616 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 617 PetscSynchronizedFlush(mat->comm); */ 618 619 *norm = sqrt(sum[0] + 2*sum[1]); 620 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 621 } else { 622 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 /* 629 Creates the hash table, and sets the table 630 This table is created only once. 631 If new entried need to be added to the matrix 632 then the hash table has to be destroyed and 633 recreated. 634 */ 635 #undef __FUNCT__ 636 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 637 PetscErrorCode MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 638 { 639 PetscFunctionBegin; 640 SETERRQ(1,"Function not yet written for SBAIJ format"); 641 /* PetscFunctionReturn(0); */ 642 } 643 644 #undef __FUNCT__ 645 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 646 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 647 { 648 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 649 PetscErrorCode ierr; 650 int nstash,reallocs; 651 InsertMode addv; 652 653 PetscFunctionBegin; 654 if (baij->donotstash) { 655 PetscFunctionReturn(0); 656 } 657 658 /* make sure all processors are either in INSERTMODE or ADDMODE */ 659 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 660 if (addv == (ADD_VALUES|INSERT_VALUES)) { 661 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 662 } 663 mat->insertmode = addv; /* in case this processor had no cache */ 664 665 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 666 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 667 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 668 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 669 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 670 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 676 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 677 { 678 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 679 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 680 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 681 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 682 int *row,*col,other_disassembled; 683 PetscTruth r1,r2,r3; 684 MatScalar *val; 685 InsertMode addv = mat->insertmode; 686 687 PetscFunctionBegin; 688 689 if (!baij->donotstash) { 690 while (1) { 691 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 692 /* 693 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 694 PetscSynchronizedFlush(mat->comm); 695 */ 696 if (!flg) break; 697 698 for (i=0; i<n;) { 699 /* Now identify the consecutive vals belonging to the same row */ 700 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 701 if (j < n) ncols = j-i; 702 else ncols = n-i; 703 /* Now assemble all these values with a single function call */ 704 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 705 i = j; 706 } 707 } 708 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 709 /* Now process the block-stash. Since the values are stashed column-oriented, 710 set the roworiented flag to column oriented, and after MatSetValues() 711 restore the original flags */ 712 r1 = baij->roworiented; 713 r2 = a->roworiented; 714 r3 = b->roworiented; 715 baij->roworiented = PETSC_FALSE; 716 a->roworiented = PETSC_FALSE; 717 b->roworiented = PETSC_FALSE; 718 while (1) { 719 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 720 if (!flg) break; 721 722 for (i=0; i<n;) { 723 /* Now identify the consecutive vals belonging to the same row */ 724 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 725 if (j < n) ncols = j-i; 726 else ncols = n-i; 727 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 728 i = j; 729 } 730 } 731 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 732 baij->roworiented = r1; 733 a->roworiented = r2; 734 b->roworiented = r3; 735 } 736 737 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 738 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 739 740 /* determine if any processor has disassembled, if so we must 741 also disassemble ourselfs, in order that we may reassemble. */ 742 /* 743 if nonzero structure of submatrix B cannot change then we know that 744 no processor disassembled thus we can skip this stuff 745 */ 746 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 747 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 748 if (mat->was_assembled && !other_disassembled) { 749 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 750 } 751 } 752 753 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 754 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 755 } 756 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 757 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 758 759 #if defined(PETSC_USE_BOPT_g) 760 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 761 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 762 baij->ht_total_ct = 0; 763 baij->ht_insert_ct = 0; 764 } 765 #endif 766 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 767 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 768 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 769 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 770 } 771 772 if (baij->rowvalues) { 773 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 774 baij->rowvalues = 0; 775 } 776 777 PetscFunctionReturn(0); 778 } 779 780 #undef __FUNCT__ 781 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 782 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 783 { 784 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 785 PetscErrorCode ierr; 786 int bs = baij->bs,size = baij->size,rank = baij->rank; 787 PetscTruth iascii,isdraw; 788 PetscViewer sviewer; 789 PetscViewerFormat format; 790 791 PetscFunctionBegin; 792 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 793 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 794 if (iascii) { 795 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 796 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 797 MatInfo info; 798 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 799 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 800 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 801 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 802 baij->bs,(int)info.memory);CHKERRQ(ierr); 803 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 804 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 805 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 806 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 807 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 808 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } else if (format == PETSC_VIEWER_ASCII_INFO) { 811 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 812 PetscFunctionReturn(0); 813 } 814 } 815 816 if (isdraw) { 817 PetscDraw draw; 818 PetscTruth isnull; 819 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 820 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 821 } 822 823 if (size == 1) { 824 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 825 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 826 } else { 827 /* assemble the entire matrix onto first processor. */ 828 Mat A; 829 Mat_SeqSBAIJ *Aloc; 830 Mat_SeqBAIJ *Bloc; 831 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 832 MatScalar *a; 833 834 /* Should this be the same type as mat? */ 835 if (!rank) { 836 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 837 } else { 838 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 839 } 840 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 841 ierr = MatMPISBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 842 PetscLogObjectParent(mat,A); 843 844 /* copy over the A part */ 845 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 846 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 847 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 848 849 for (i=0; i<mbs; i++) { 850 rvals[0] = bs*(baij->rstart + i); 851 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 852 for (j=ai[i]; j<ai[i+1]; j++) { 853 col = (baij->cstart+aj[j])*bs; 854 for (k=0; k<bs; k++) { 855 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 856 col++; a += bs; 857 } 858 } 859 } 860 /* copy over the B part */ 861 Bloc = (Mat_SeqBAIJ*)baij->B->data; 862 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 863 for (i=0; i<mbs; i++) { 864 rvals[0] = bs*(baij->rstart + i); 865 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 866 for (j=ai[i]; j<ai[i+1]; j++) { 867 col = baij->garray[aj[j]]*bs; 868 for (k=0; k<bs; k++) { 869 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 870 col++; a += bs; 871 } 872 } 873 } 874 ierr = PetscFree(rvals);CHKERRQ(ierr); 875 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 876 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 877 /* 878 Everyone has to call to draw the matrix since the graphics waits are 879 synchronized across all processors that share the PetscDraw object 880 */ 881 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 882 if (!rank) { 883 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 884 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 885 } 886 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 887 ierr = MatDestroy(A);CHKERRQ(ierr); 888 } 889 PetscFunctionReturn(0); 890 } 891 892 #undef __FUNCT__ 893 #define __FUNCT__ "MatView_MPISBAIJ" 894 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 895 { 896 PetscErrorCode ierr; 897 PetscTruth iascii,isdraw,issocket,isbinary; 898 899 PetscFunctionBegin; 900 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 901 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 902 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 903 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 904 if (iascii || isdraw || issocket || isbinary) { 905 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 906 } else { 907 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 908 } 909 PetscFunctionReturn(0); 910 } 911 912 #undef __FUNCT__ 913 #define __FUNCT__ "MatDestroy_MPISBAIJ" 914 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 915 { 916 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 #if defined(PETSC_USE_LOG) 921 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 922 #endif 923 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 924 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 925 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 926 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 927 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 928 #if defined (PETSC_USE_CTABLE) 929 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 930 #else 931 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 932 #endif 933 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 934 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 935 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 936 if (baij->slvec0) { 937 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 938 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 939 } 940 if (baij->slvec1) { 941 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 942 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 943 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 944 } 945 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 946 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 947 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 948 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 949 #if defined(PETSC_USE_MAT_SINGLE) 950 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 951 #endif 952 ierr = PetscFree(baij);CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } 955 956 #undef __FUNCT__ 957 #define __FUNCT__ "MatMult_MPISBAIJ" 958 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 959 { 960 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 961 PetscErrorCode ierr; 962 int nt,mbs=a->mbs,bs=a->bs; 963 PetscScalar *x,*from,zero=0.0; 964 965 PetscFunctionBegin; 966 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 967 if (nt != A->n) { 968 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 969 } 970 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 971 if (nt != A->m) { 972 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 973 } 974 975 /* diagonal part */ 976 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 977 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 978 979 /* subdiagonal part */ 980 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 981 982 /* copy x into the vec slvec0 */ 983 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 984 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 985 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 986 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 987 988 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 989 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 990 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 991 992 /* supperdiagonal part */ 993 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 994 995 PetscFunctionReturn(0); 996 } 997 998 #undef __FUNCT__ 999 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1000 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1001 { 1002 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1003 PetscErrorCode ierr; 1004 int nt; 1005 1006 PetscFunctionBegin; 1007 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1008 if (nt != A->n) { 1009 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1010 } 1011 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1012 if (nt != A->m) { 1013 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1014 } 1015 1016 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1017 /* do diagonal part */ 1018 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1019 /* do supperdiagonal part */ 1020 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1021 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1022 /* do subdiagonal part */ 1023 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1024 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1026 1027 PetscFunctionReturn(0); 1028 } 1029 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1032 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1033 { 1034 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1035 PetscErrorCode ierr; 1036 int mbs=a->mbs,bs=a->bs; 1037 PetscScalar *x,*from,zero=0.0; 1038 1039 PetscFunctionBegin; 1040 /* 1041 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1042 PetscSynchronizedFlush(A->comm); 1043 */ 1044 /* diagonal part */ 1045 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1046 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1047 1048 /* subdiagonal part */ 1049 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1050 1051 /* copy x into the vec slvec0 */ 1052 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1053 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1054 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1055 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1056 1057 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1058 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1059 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1060 1061 /* supperdiagonal part */ 1062 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1063 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1069 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1070 { 1071 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1072 PetscErrorCode ierr; 1073 1074 PetscFunctionBegin; 1075 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1076 /* do diagonal part */ 1077 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1078 /* do supperdiagonal part */ 1079 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1080 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1081 1082 /* do subdiagonal part */ 1083 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1084 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1085 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1086 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1092 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1093 { 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = MatMult(A,xx,yy);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1103 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1104 { 1105 PetscErrorCode ierr; 1106 1107 PetscFunctionBegin; 1108 ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 /* 1113 This only works correctly for square matrices where the subblock A->A is the 1114 diagonal block 1115 */ 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1118 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1119 { 1120 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1121 PetscErrorCode ierr; 1122 1123 PetscFunctionBegin; 1124 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1125 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatScale_MPISBAIJ" 1131 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1132 { 1133 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 1136 PetscFunctionBegin; 1137 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1138 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1144 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1145 { 1146 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1147 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1148 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1149 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1150 int *cmap,*idx_p,cstart = mat->cstart; 1151 1152 PetscFunctionBegin; 1153 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1154 mat->getrowactive = PETSC_TRUE; 1155 1156 if (!mat->rowvalues && (idx || v)) { 1157 /* 1158 allocate enough space to hold information from the longest row. 1159 */ 1160 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1161 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1162 int max = 1,mbs = mat->mbs,tmp; 1163 for (i=0; i<mbs; i++) { 1164 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1165 if (max < tmp) { max = tmp; } 1166 } 1167 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1168 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1169 } 1170 1171 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1172 lrow = row - brstart; /* local row index */ 1173 1174 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1175 if (!v) {pvA = 0; pvB = 0;} 1176 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1177 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1178 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1179 nztot = nzA + nzB; 1180 1181 cmap = mat->garray; 1182 if (v || idx) { 1183 if (nztot) { 1184 /* Sort by increasing column numbers, assuming A and B already sorted */ 1185 int imark = -1; 1186 if (v) { 1187 *v = v_p = mat->rowvalues; 1188 for (i=0; i<nzB; i++) { 1189 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1190 else break; 1191 } 1192 imark = i; 1193 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1194 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1195 } 1196 if (idx) { 1197 *idx = idx_p = mat->rowindices; 1198 if (imark > -1) { 1199 for (i=0; i<imark; i++) { 1200 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1201 } 1202 } else { 1203 for (i=0; i<nzB; i++) { 1204 if (cmap[cworkB[i]/bs] < cstart) 1205 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1206 else break; 1207 } 1208 imark = i; 1209 } 1210 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1211 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1212 } 1213 } else { 1214 if (idx) *idx = 0; 1215 if (v) *v = 0; 1216 } 1217 } 1218 *nz = nztot; 1219 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1220 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1221 PetscFunctionReturn(0); 1222 } 1223 1224 #undef __FUNCT__ 1225 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1226 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1227 { 1228 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1229 1230 PetscFunctionBegin; 1231 if (baij->getrowactive == PETSC_FALSE) { 1232 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1233 } 1234 baij->getrowactive = PETSC_FALSE; 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1240 PetscErrorCode MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1241 { 1242 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1243 1244 PetscFunctionBegin; 1245 *bs = baij->bs; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1251 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1252 { 1253 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1258 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1264 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1265 { 1266 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1267 Mat A = a->A,B = a->B; 1268 PetscErrorCode ierr; 1269 PetscReal isend[5],irecv[5]; 1270 1271 PetscFunctionBegin; 1272 info->block_size = (PetscReal)a->bs; 1273 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1274 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1275 isend[3] = info->memory; isend[4] = info->mallocs; 1276 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1277 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1278 isend[3] += info->memory; isend[4] += info->mallocs; 1279 if (flag == MAT_LOCAL) { 1280 info->nz_used = isend[0]; 1281 info->nz_allocated = isend[1]; 1282 info->nz_unneeded = isend[2]; 1283 info->memory = isend[3]; 1284 info->mallocs = isend[4]; 1285 } else if (flag == MAT_GLOBAL_MAX) { 1286 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1287 info->nz_used = irecv[0]; 1288 info->nz_allocated = irecv[1]; 1289 info->nz_unneeded = irecv[2]; 1290 info->memory = irecv[3]; 1291 info->mallocs = irecv[4]; 1292 } else if (flag == MAT_GLOBAL_SUM) { 1293 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1294 info->nz_used = irecv[0]; 1295 info->nz_allocated = irecv[1]; 1296 info->nz_unneeded = irecv[2]; 1297 info->memory = irecv[3]; 1298 info->mallocs = irecv[4]; 1299 } else { 1300 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1301 } 1302 info->rows_global = (PetscReal)A->M; 1303 info->columns_global = (PetscReal)A->N; 1304 info->rows_local = (PetscReal)A->m; 1305 info->columns_local = (PetscReal)A->N; 1306 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1307 info->fill_ratio_needed = 0; 1308 info->factor_mallocs = 0; 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1314 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1315 { 1316 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1317 PetscErrorCode ierr; 1318 1319 PetscFunctionBegin; 1320 switch (op) { 1321 case MAT_NO_NEW_NONZERO_LOCATIONS: 1322 case MAT_YES_NEW_NONZERO_LOCATIONS: 1323 case MAT_COLUMNS_UNSORTED: 1324 case MAT_COLUMNS_SORTED: 1325 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1326 case MAT_KEEP_ZEROED_ROWS: 1327 case MAT_NEW_NONZERO_LOCATION_ERR: 1328 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1329 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1330 break; 1331 case MAT_ROW_ORIENTED: 1332 a->roworiented = PETSC_TRUE; 1333 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1334 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1335 break; 1336 case MAT_ROWS_SORTED: 1337 case MAT_ROWS_UNSORTED: 1338 case MAT_YES_NEW_DIAGONALS: 1339 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1340 break; 1341 case MAT_COLUMN_ORIENTED: 1342 a->roworiented = PETSC_FALSE; 1343 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1344 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1345 break; 1346 case MAT_IGNORE_OFF_PROC_ENTRIES: 1347 a->donotstash = PETSC_TRUE; 1348 break; 1349 case MAT_NO_NEW_DIAGONALS: 1350 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1351 case MAT_USE_HASH_TABLE: 1352 a->ht_flag = PETSC_TRUE; 1353 break; 1354 case MAT_NOT_SYMMETRIC: 1355 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1356 case MAT_HERMITIAN: 1357 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1358 case MAT_SYMMETRIC: 1359 case MAT_STRUCTURALLY_SYMMETRIC: 1360 case MAT_NOT_HERMITIAN: 1361 case MAT_SYMMETRY_ETERNAL: 1362 case MAT_NOT_SYMMETRY_ETERNAL: 1363 break; 1364 default: 1365 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1366 } 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1372 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1373 { 1374 PetscErrorCode ierr; 1375 PetscFunctionBegin; 1376 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1382 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1383 { 1384 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1385 Mat a = baij->A,b = baij->B; 1386 PetscErrorCode ierr; 1387 int s1,s2,s3; 1388 1389 PetscFunctionBegin; 1390 if (ll != rr) { 1391 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1392 } 1393 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1394 if (rr) { 1395 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1396 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1397 /* Overlap communication with computation. */ 1398 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1399 /*} if (ll) { */ 1400 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1401 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1402 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1403 /* } */ 1404 /* scale the diagonal block */ 1405 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1406 1407 /* if (rr) { */ 1408 /* Do a scatter end and then right scale the off-diagonal block */ 1409 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1410 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1411 } 1412 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1418 PetscErrorCode MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag) 1419 { 1420 PetscFunctionBegin; 1421 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1426 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1427 { 1428 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1429 MPI_Comm comm = A->comm; 1430 static int called = 0; 1431 PetscErrorCode ierr; 1432 1433 PetscFunctionBegin; 1434 if (!a->rank) { 1435 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1436 } 1437 if (called) {PetscFunctionReturn(0);} else called = 1; 1438 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1439 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1440 PetscFunctionReturn(0); 1441 } 1442 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1445 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1446 { 1447 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1448 PetscErrorCode ierr; 1449 1450 PetscFunctionBegin; 1451 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1452 PetscFunctionReturn(0); 1453 } 1454 1455 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatEqual_MPISBAIJ" 1459 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1460 { 1461 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1462 Mat a,b,c,d; 1463 PetscTruth flg; 1464 PetscErrorCode ierr; 1465 1466 PetscFunctionBegin; 1467 a = matA->A; b = matA->B; 1468 c = matB->A; d = matB->B; 1469 1470 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1471 if (flg == PETSC_TRUE) { 1472 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1473 } 1474 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1480 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1481 { 1482 PetscErrorCode ierr; 1483 1484 PetscFunctionBegin; 1485 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1491 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1492 { 1493 int i,ierr; 1494 PetscTruth flg; 1495 1496 for (i=0; i<n; i++) { 1497 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1498 if (!flg) { 1499 SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1500 } 1501 } 1502 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 1507 /* -------------------------------------------------------------------*/ 1508 static struct _MatOps MatOps_Values = { 1509 MatSetValues_MPISBAIJ, 1510 MatGetRow_MPISBAIJ, 1511 MatRestoreRow_MPISBAIJ, 1512 MatMult_MPISBAIJ, 1513 /* 4*/ MatMultAdd_MPISBAIJ, 1514 MatMultTranspose_MPISBAIJ, 1515 MatMultTransposeAdd_MPISBAIJ, 1516 0, 1517 0, 1518 0, 1519 /*10*/ 0, 1520 0, 1521 0, 1522 MatRelax_MPISBAIJ, 1523 MatTranspose_MPISBAIJ, 1524 /*15*/ MatGetInfo_MPISBAIJ, 1525 MatEqual_MPISBAIJ, 1526 MatGetDiagonal_MPISBAIJ, 1527 MatDiagonalScale_MPISBAIJ, 1528 MatNorm_MPISBAIJ, 1529 /*20*/ MatAssemblyBegin_MPISBAIJ, 1530 MatAssemblyEnd_MPISBAIJ, 1531 0, 1532 MatSetOption_MPISBAIJ, 1533 MatZeroEntries_MPISBAIJ, 1534 /*25*/ MatZeroRows_MPISBAIJ, 1535 0, 1536 0, 1537 0, 1538 0, 1539 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1540 0, 1541 0, 1542 0, 1543 0, 1544 /*35*/ MatDuplicate_MPISBAIJ, 1545 0, 1546 0, 1547 0, 1548 0, 1549 /*40*/ 0, 1550 MatGetSubMatrices_MPISBAIJ, 1551 MatIncreaseOverlap_MPISBAIJ, 1552 MatGetValues_MPISBAIJ, 1553 0, 1554 /*45*/ MatPrintHelp_MPISBAIJ, 1555 MatScale_MPISBAIJ, 1556 0, 1557 0, 1558 0, 1559 /*50*/ MatGetBlockSize_MPISBAIJ, 1560 0, 1561 0, 1562 0, 1563 0, 1564 /*55*/ 0, 1565 0, 1566 MatSetUnfactored_MPISBAIJ, 1567 0, 1568 MatSetValuesBlocked_MPISBAIJ, 1569 /*60*/ 0, 1570 0, 1571 0, 1572 MatGetPetscMaps_Petsc, 1573 0, 1574 /*65*/ 0, 1575 0, 1576 0, 1577 0, 1578 0, 1579 /*70*/ MatGetRowMax_MPISBAIJ, 1580 0, 1581 0, 1582 0, 1583 0, 1584 /*75*/ 0, 1585 0, 1586 0, 1587 0, 1588 0, 1589 /*80*/ 0, 1590 0, 1591 0, 1592 0, 1593 /*85*/ MatLoad_MPISBAIJ 1594 }; 1595 1596 1597 EXTERN_C_BEGIN 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1600 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1601 { 1602 PetscFunctionBegin; 1603 *a = ((Mat_MPISBAIJ *)A->data)->A; 1604 *iscopy = PETSC_FALSE; 1605 PetscFunctionReturn(0); 1606 } 1607 EXTERN_C_END 1608 1609 EXTERN_C_BEGIN 1610 #undef __FUNCT__ 1611 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1612 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1613 { 1614 Mat_MPISBAIJ *b; 1615 PetscErrorCode ierr; 1616 int i,mbs,Mbs; 1617 1618 PetscFunctionBegin; 1619 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1620 1621 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1622 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1623 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1624 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1625 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1626 if (d_nnz) { 1627 for (i=0; i<B->m/bs; i++) { 1628 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1629 } 1630 } 1631 if (o_nnz) { 1632 for (i=0; i<B->m/bs; i++) { 1633 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1634 } 1635 } 1636 B->preallocated = PETSC_TRUE; 1637 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1638 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1639 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1640 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1641 1642 b = (Mat_MPISBAIJ*)B->data; 1643 mbs = B->m/bs; 1644 Mbs = B->M/bs; 1645 if (mbs*bs != B->m) { 1646 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1647 } 1648 1649 b->bs = bs; 1650 b->bs2 = bs*bs; 1651 b->mbs = mbs; 1652 b->nbs = mbs; 1653 b->Mbs = Mbs; 1654 b->Nbs = Mbs; 1655 1656 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1657 b->rowners[0] = 0; 1658 for (i=2; i<=b->size; i++) { 1659 b->rowners[i] += b->rowners[i-1]; 1660 } 1661 b->rstart = b->rowners[b->rank]; 1662 b->rend = b->rowners[b->rank+1]; 1663 b->cstart = b->rstart; 1664 b->cend = b->rend; 1665 for (i=0; i<=b->size; i++) { 1666 b->rowners_bs[i] = b->rowners[i]*bs; 1667 } 1668 b->rstart_bs = b-> rstart*bs; 1669 b->rend_bs = b->rend*bs; 1670 1671 b->cstart_bs = b->cstart*bs; 1672 b->cend_bs = b->cend*bs; 1673 1674 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr); 1675 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1676 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1677 PetscLogObjectParent(B,b->A); 1678 1679 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr); 1680 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1681 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1682 PetscLogObjectParent(B,b->B); 1683 1684 /* build cache for off array entries formed */ 1685 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1686 1687 PetscFunctionReturn(0); 1688 } 1689 EXTERN_C_END 1690 1691 /*MC 1692 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1693 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1694 1695 Options Database Keys: 1696 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1697 1698 Level: beginner 1699 1700 .seealso: MatCreateMPISBAIJ 1701 M*/ 1702 1703 EXTERN_C_BEGIN 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatCreate_MPISBAIJ" 1706 PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1707 { 1708 Mat_MPISBAIJ *b; 1709 PetscErrorCode ierr; 1710 PetscTruth flg; 1711 1712 PetscFunctionBegin; 1713 1714 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1715 B->data = (void*)b; 1716 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1717 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1718 1719 B->ops->destroy = MatDestroy_MPISBAIJ; 1720 B->ops->view = MatView_MPISBAIJ; 1721 B->mapping = 0; 1722 B->factor = 0; 1723 B->assembled = PETSC_FALSE; 1724 1725 B->insertmode = NOT_SET_VALUES; 1726 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1727 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1728 1729 /* build local table of row and column ownerships */ 1730 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1731 b->cowners = b->rowners + b->size + 2; 1732 b->rowners_bs = b->cowners + b->size + 2; 1733 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1734 1735 /* build cache for off array entries formed */ 1736 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1737 b->donotstash = PETSC_FALSE; 1738 b->colmap = PETSC_NULL; 1739 b->garray = PETSC_NULL; 1740 b->roworiented = PETSC_TRUE; 1741 1742 #if defined(PETSC_USE_MAT_SINGLE) 1743 /* stuff for MatSetValues_XXX in single precision */ 1744 b->setvalueslen = 0; 1745 b->setvaluescopy = PETSC_NULL; 1746 #endif 1747 1748 /* stuff used in block assembly */ 1749 b->barray = 0; 1750 1751 /* stuff used for matrix vector multiply */ 1752 b->lvec = 0; 1753 b->Mvctx = 0; 1754 b->slvec0 = 0; 1755 b->slvec0b = 0; 1756 b->slvec1 = 0; 1757 b->slvec1a = 0; 1758 b->slvec1b = 0; 1759 b->sMvctx = 0; 1760 1761 /* stuff for MatGetRow() */ 1762 b->rowindices = 0; 1763 b->rowvalues = 0; 1764 b->getrowactive = PETSC_FALSE; 1765 1766 /* hash table stuff */ 1767 b->ht = 0; 1768 b->hd = 0; 1769 b->ht_size = 0; 1770 b->ht_flag = PETSC_FALSE; 1771 b->ht_fact = 0; 1772 b->ht_total_ct = 0; 1773 b->ht_insert_ct = 0; 1774 1775 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1776 if (flg) { 1777 PetscReal fact = 1.39; 1778 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1779 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1780 if (fact <= 1.0) fact = 1.39; 1781 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1782 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1783 } 1784 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1785 "MatStoreValues_MPISBAIJ", 1786 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1787 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1788 "MatRetrieveValues_MPISBAIJ", 1789 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1790 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1791 "MatGetDiagonalBlock_MPISBAIJ", 1792 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1793 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1794 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1795 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1796 B->symmetric = PETSC_TRUE; 1797 B->structurally_symmetric = PETSC_TRUE; 1798 B->symmetric_set = PETSC_TRUE; 1799 B->structurally_symmetric_set = PETSC_TRUE; 1800 PetscFunctionReturn(0); 1801 } 1802 EXTERN_C_END 1803 1804 /*MC 1805 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1806 1807 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1808 and MATMPISBAIJ otherwise. 1809 1810 Options Database Keys: 1811 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1812 1813 Level: beginner 1814 1815 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1816 M*/ 1817 1818 EXTERN_C_BEGIN 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "MatCreate_SBAIJ" 1821 PetscErrorCode MatCreate_SBAIJ(Mat A) 1822 { 1823 PetscErrorCode ierr,size; 1824 1825 PetscFunctionBegin; 1826 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1827 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1828 if (size == 1) { 1829 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1830 } else { 1831 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1832 } 1833 PetscFunctionReturn(0); 1834 } 1835 EXTERN_C_END 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1839 /*@C 1840 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1841 the user should preallocate the matrix storage by setting the parameters 1842 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1843 performance can be increased by more than a factor of 50. 1844 1845 Collective on Mat 1846 1847 Input Parameters: 1848 + A - the matrix 1849 . bs - size of blockk 1850 . d_nz - number of block nonzeros per block row in diagonal portion of local 1851 submatrix (same for all local rows) 1852 . d_nnz - array containing the number of block nonzeros in the various block rows 1853 in the upper triangular and diagonal part of the in diagonal portion of the local 1854 (possibly different for each block row) or PETSC_NULL. You must leave room 1855 for the diagonal entry even if it is zero. 1856 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1857 submatrix (same for all local rows). 1858 - o_nnz - array containing the number of nonzeros in the various block rows of the 1859 off-diagonal portion of the local submatrix (possibly different for 1860 each block row) or PETSC_NULL. 1861 1862 1863 Options Database Keys: 1864 . -mat_no_unroll - uses code that does not unroll the loops in the 1865 block calculations (much slower) 1866 . -mat_block_size - size of the blocks to use 1867 1868 Notes: 1869 1870 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1871 than it must be used on all processors that share the object for that argument. 1872 1873 Storage Information: 1874 For a square global matrix we define each processor's diagonal portion 1875 to be its local rows and the corresponding columns (a square submatrix); 1876 each processor's off-diagonal portion encompasses the remainder of the 1877 local matrix (a rectangular submatrix). 1878 1879 The user can specify preallocated storage for the diagonal part of 1880 the local submatrix with either d_nz or d_nnz (not both). Set 1881 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1882 memory allocation. Likewise, specify preallocated storage for the 1883 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1884 1885 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1886 the figure below we depict these three local rows and all columns (0-11). 1887 1888 .vb 1889 0 1 2 3 4 5 6 7 8 9 10 11 1890 ------------------- 1891 row 3 | o o o d d d o o o o o o 1892 row 4 | o o o d d d o o o o o o 1893 row 5 | o o o d d d o o o o o o 1894 ------------------- 1895 .ve 1896 1897 Thus, any entries in the d locations are stored in the d (diagonal) 1898 submatrix, and any entries in the o locations are stored in the 1899 o (off-diagonal) submatrix. Note that the d matrix is stored in 1900 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1901 1902 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1903 plus the diagonal part of the d matrix, 1904 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1905 In general, for PDE problems in which most nonzeros are near the diagonal, 1906 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1907 or you will get TERRIBLE performance; see the users' manual chapter on 1908 matrices. 1909 1910 Level: intermediate 1911 1912 .keywords: matrix, block, aij, compressed row, sparse, parallel 1913 1914 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1915 @*/ 1916 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1917 { 1918 PetscErrorCode ierr,(*f)(Mat,int,int,const int[],int,const int[]); 1919 1920 PetscFunctionBegin; 1921 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1922 if (f) { 1923 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1924 } 1925 PetscFunctionReturn(0); 1926 } 1927 1928 #undef __FUNCT__ 1929 #define __FUNCT__ "MatCreateMPISBAIJ" 1930 /*@C 1931 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1932 (block compressed row). For good matrix assembly performance 1933 the user should preallocate the matrix storage by setting the parameters 1934 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1935 performance can be increased by more than a factor of 50. 1936 1937 Collective on MPI_Comm 1938 1939 Input Parameters: 1940 + comm - MPI communicator 1941 . bs - size of blockk 1942 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1943 This value should be the same as the local size used in creating the 1944 y vector for the matrix-vector product y = Ax. 1945 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1946 This value should be the same as the local size used in creating the 1947 x vector for the matrix-vector product y = Ax. 1948 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1949 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1950 . d_nz - number of block nonzeros per block row in diagonal portion of local 1951 submatrix (same for all local rows) 1952 . d_nnz - array containing the number of block nonzeros in the various block rows 1953 in the upper triangular portion of the in diagonal portion of the local 1954 (possibly different for each block block row) or PETSC_NULL. 1955 You must leave room for the diagonal entry even if it is zero. 1956 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1957 submatrix (same for all local rows). 1958 - o_nnz - array containing the number of nonzeros in the various block rows of the 1959 off-diagonal portion of the local submatrix (possibly different for 1960 each block row) or PETSC_NULL. 1961 1962 Output Parameter: 1963 . A - the matrix 1964 1965 Options Database Keys: 1966 . -mat_no_unroll - uses code that does not unroll the loops in the 1967 block calculations (much slower) 1968 . -mat_block_size - size of the blocks to use 1969 . -mat_mpi - use the parallel matrix data structures even on one processor 1970 (defaults to using SeqBAIJ format on one processor) 1971 1972 Notes: 1973 The user MUST specify either the local or global matrix dimensions 1974 (possibly both). 1975 1976 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1977 than it must be used on all processors that share the object for that argument. 1978 1979 Storage Information: 1980 For a square global matrix we define each processor's diagonal portion 1981 to be its local rows and the corresponding columns (a square submatrix); 1982 each processor's off-diagonal portion encompasses the remainder of the 1983 local matrix (a rectangular submatrix). 1984 1985 The user can specify preallocated storage for the diagonal part of 1986 the local submatrix with either d_nz or d_nnz (not both). Set 1987 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1988 memory allocation. Likewise, specify preallocated storage for the 1989 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1990 1991 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1992 the figure below we depict these three local rows and all columns (0-11). 1993 1994 .vb 1995 0 1 2 3 4 5 6 7 8 9 10 11 1996 ------------------- 1997 row 3 | o o o d d d o o o o o o 1998 row 4 | o o o d d d o o o o o o 1999 row 5 | o o o d d d o o o o o o 2000 ------------------- 2001 .ve 2002 2003 Thus, any entries in the d locations are stored in the d (diagonal) 2004 submatrix, and any entries in the o locations are stored in the 2005 o (off-diagonal) submatrix. Note that the d matrix is stored in 2006 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2007 2008 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2009 plus the diagonal part of the d matrix, 2010 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2011 In general, for PDE problems in which most nonzeros are near the diagonal, 2012 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2013 or you will get TERRIBLE performance; see the users' manual chapter on 2014 matrices. 2015 2016 Level: intermediate 2017 2018 .keywords: matrix, block, aij, compressed row, sparse, parallel 2019 2020 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2021 @*/ 2022 2023 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2024 { 2025 PetscErrorCode ierr,size; 2026 2027 PetscFunctionBegin; 2028 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2029 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2030 if (size > 1) { 2031 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2032 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2033 } else { 2034 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2035 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2036 } 2037 PetscFunctionReturn(0); 2038 } 2039 2040 2041 #undef __FUNCT__ 2042 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2043 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2044 { 2045 Mat mat; 2046 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2047 PetscErrorCode ierr; 2048 int len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs; 2049 PetscScalar *array; 2050 2051 PetscFunctionBegin; 2052 *newmat = 0; 2053 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2054 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2055 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2056 2057 mat->factor = matin->factor; 2058 mat->preallocated = PETSC_TRUE; 2059 mat->assembled = PETSC_TRUE; 2060 mat->insertmode = NOT_SET_VALUES; 2061 2062 a = (Mat_MPISBAIJ*)mat->data; 2063 a->bs = oldmat->bs; 2064 a->bs2 = oldmat->bs2; 2065 a->mbs = oldmat->mbs; 2066 a->nbs = oldmat->nbs; 2067 a->Mbs = oldmat->Mbs; 2068 a->Nbs = oldmat->Nbs; 2069 2070 a->rstart = oldmat->rstart; 2071 a->rend = oldmat->rend; 2072 a->cstart = oldmat->cstart; 2073 a->cend = oldmat->cend; 2074 a->size = oldmat->size; 2075 a->rank = oldmat->rank; 2076 a->donotstash = oldmat->donotstash; 2077 a->roworiented = oldmat->roworiented; 2078 a->rowindices = 0; 2079 a->rowvalues = 0; 2080 a->getrowactive = PETSC_FALSE; 2081 a->barray = 0; 2082 a->rstart_bs = oldmat->rstart_bs; 2083 a->rend_bs = oldmat->rend_bs; 2084 a->cstart_bs = oldmat->cstart_bs; 2085 a->cend_bs = oldmat->cend_bs; 2086 2087 /* hash table stuff */ 2088 a->ht = 0; 2089 a->hd = 0; 2090 a->ht_size = 0; 2091 a->ht_flag = oldmat->ht_flag; 2092 a->ht_fact = oldmat->ht_fact; 2093 a->ht_total_ct = 0; 2094 a->ht_insert_ct = 0; 2095 2096 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2097 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2098 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2099 if (oldmat->colmap) { 2100 #if defined (PETSC_USE_CTABLE) 2101 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2102 #else 2103 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2104 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2105 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2106 #endif 2107 } else a->colmap = 0; 2108 2109 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2110 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2111 PetscLogObjectMemory(mat,len*sizeof(int)); 2112 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2113 } else a->garray = 0; 2114 2115 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2116 PetscLogObjectParent(mat,a->lvec); 2117 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2118 PetscLogObjectParent(mat,a->Mvctx); 2119 2120 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2121 PetscLogObjectParent(mat,a->slvec0); 2122 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2123 PetscLogObjectParent(mat,a->slvec1); 2124 2125 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2126 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2127 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2128 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2129 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2130 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2131 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2132 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2133 PetscLogObjectParent(mat,a->slvec0); 2134 PetscLogObjectParent(mat,a->slvec1); 2135 PetscLogObjectParent(mat,a->slvec0b); 2136 PetscLogObjectParent(mat,a->slvec1a); 2137 PetscLogObjectParent(mat,a->slvec1b); 2138 2139 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2140 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2141 a->sMvctx = oldmat->sMvctx; 2142 PetscLogObjectParent(mat,a->sMvctx); 2143 2144 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2145 PetscLogObjectParent(mat,a->A); 2146 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2147 PetscLogObjectParent(mat,a->B); 2148 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2149 *newmat = mat; 2150 PetscFunctionReturn(0); 2151 } 2152 2153 #include "petscsys.h" 2154 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatLoad_MPISBAIJ" 2157 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2158 { 2159 Mat A; 2160 int i,nz,ierr,j,rstart,rend,fd; 2161 PetscScalar *vals,*buf; 2162 MPI_Comm comm = ((PetscObject)viewer)->comm; 2163 MPI_Status status; 2164 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2165 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2166 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2167 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2168 int dcount,kmax,k,nzcount,tmp; 2169 2170 PetscFunctionBegin; 2171 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2172 2173 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2174 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2175 if (!rank) { 2176 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2177 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2178 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2179 if (header[3] < 0) { 2180 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2181 } 2182 } 2183 2184 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2185 M = header[1]; N = header[2]; 2186 2187 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2188 2189 /* 2190 This code adds extra rows to make sure the number of rows is 2191 divisible by the blocksize 2192 */ 2193 Mbs = M/bs; 2194 extra_rows = bs - M + bs*(Mbs); 2195 if (extra_rows == bs) extra_rows = 0; 2196 else Mbs++; 2197 if (extra_rows &&!rank) { 2198 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2199 } 2200 2201 /* determine ownership of all rows */ 2202 mbs = Mbs/size + ((Mbs % size) > rank); 2203 m = mbs*bs; 2204 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2205 browners = rowners + size + 1; 2206 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2207 rowners[0] = 0; 2208 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2209 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2210 rstart = rowners[rank]; 2211 rend = rowners[rank+1]; 2212 2213 /* distribute row lengths to all processors */ 2214 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2215 if (!rank) { 2216 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2217 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2218 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2219 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2220 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2221 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2222 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2223 } else { 2224 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2225 } 2226 2227 if (!rank) { /* procs[0] */ 2228 /* calculate the number of nonzeros on each processor */ 2229 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2230 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2231 for (i=0; i<size; i++) { 2232 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2233 procsnz[i] += rowlengths[j]; 2234 } 2235 } 2236 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2237 2238 /* determine max buffer needed and allocate it */ 2239 maxnz = 0; 2240 for (i=0; i<size; i++) { 2241 maxnz = PetscMax(maxnz,procsnz[i]); 2242 } 2243 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2244 2245 /* read in my part of the matrix column indices */ 2246 nz = procsnz[0]; 2247 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2248 mycols = ibuf; 2249 if (size == 1) nz -= extra_rows; 2250 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2251 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2252 2253 /* read in every ones (except the last) and ship off */ 2254 for (i=1; i<size-1; i++) { 2255 nz = procsnz[i]; 2256 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2257 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2258 } 2259 /* read in the stuff for the last proc */ 2260 if (size != 1) { 2261 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2262 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2263 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2264 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2265 } 2266 ierr = PetscFree(cols);CHKERRQ(ierr); 2267 } else { /* procs[i], i>0 */ 2268 /* determine buffer space needed for message */ 2269 nz = 0; 2270 for (i=0; i<m; i++) { 2271 nz += locrowlens[i]; 2272 } 2273 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2274 mycols = ibuf; 2275 /* receive message of column indices*/ 2276 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2277 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2278 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2279 } 2280 2281 /* loop over local rows, determining number of off diagonal entries */ 2282 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2283 odlens = dlens + (rend-rstart); 2284 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2285 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2286 masked1 = mask + Mbs; 2287 masked2 = masked1 + Mbs; 2288 rowcount = 0; nzcount = 0; 2289 for (i=0; i<mbs; i++) { 2290 dcount = 0; 2291 odcount = 0; 2292 for (j=0; j<bs; j++) { 2293 kmax = locrowlens[rowcount]; 2294 for (k=0; k<kmax; k++) { 2295 tmp = mycols[nzcount++]/bs; /* block col. index */ 2296 if (!mask[tmp]) { 2297 mask[tmp] = 1; 2298 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2299 else masked1[dcount++] = tmp; /* entry in diag portion */ 2300 } 2301 } 2302 rowcount++; 2303 } 2304 2305 dlens[i] = dcount; /* d_nzz[i] */ 2306 odlens[i] = odcount; /* o_nzz[i] */ 2307 2308 /* zero out the mask elements we set */ 2309 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2310 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2311 } 2312 2313 /* create our matrix */ 2314 ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr); 2315 ierr = MatSetType(A,type);CHKERRQ(ierr); 2316 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2317 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2318 2319 if (!rank) { 2320 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2321 /* read in my part of the matrix numerical values */ 2322 nz = procsnz[0]; 2323 vals = buf; 2324 mycols = ibuf; 2325 if (size == 1) nz -= extra_rows; 2326 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2327 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2328 2329 /* insert into matrix */ 2330 jj = rstart*bs; 2331 for (i=0; i<m; i++) { 2332 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2333 mycols += locrowlens[i]; 2334 vals += locrowlens[i]; 2335 jj++; 2336 } 2337 2338 /* read in other processors (except the last one) and ship out */ 2339 for (i=1; i<size-1; i++) { 2340 nz = procsnz[i]; 2341 vals = buf; 2342 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2343 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2344 } 2345 /* the last proc */ 2346 if (size != 1){ 2347 nz = procsnz[i] - extra_rows; 2348 vals = buf; 2349 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2350 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2351 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2352 } 2353 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2354 2355 } else { 2356 /* receive numeric values */ 2357 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2358 2359 /* receive message of values*/ 2360 vals = buf; 2361 mycols = ibuf; 2362 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2363 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2364 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2365 2366 /* insert into matrix */ 2367 jj = rstart*bs; 2368 for (i=0; i<m; i++) { 2369 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2370 mycols += locrowlens[i]; 2371 vals += locrowlens[i]; 2372 jj++; 2373 } 2374 } 2375 2376 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2377 ierr = PetscFree(buf);CHKERRQ(ierr); 2378 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2379 ierr = PetscFree(rowners);CHKERRQ(ierr); 2380 ierr = PetscFree(dlens);CHKERRQ(ierr); 2381 ierr = PetscFree(mask);CHKERRQ(ierr); 2382 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2383 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2384 *newmat = A; 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2390 /*@ 2391 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2392 2393 Input Parameters: 2394 . mat - the matrix 2395 . fact - factor 2396 2397 Collective on Mat 2398 2399 Level: advanced 2400 2401 Notes: 2402 This can also be set by the command line option: -mat_use_hash_table fact 2403 2404 .keywords: matrix, hashtable, factor, HT 2405 2406 .seealso: MatSetOption() 2407 @*/ 2408 PetscErrorCode MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2409 { 2410 PetscFunctionBegin; 2411 SETERRQ(1,"Function not yet written for SBAIJ format"); 2412 /* PetscFunctionReturn(0); */ 2413 } 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2417 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2418 { 2419 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2420 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2421 PetscReal atmp; 2422 PetscReal *work,*svalues,*rvalues; 2423 PetscErrorCode ierr; 2424 int i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2425 int rank,size,*rowners_bs,dest,count,source; 2426 PetscScalar *va; 2427 MatScalar *ba; 2428 MPI_Status stat; 2429 2430 PetscFunctionBegin; 2431 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2432 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2433 2434 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2435 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2436 2437 bs = a->bs; 2438 mbs = a->mbs; 2439 Mbs = a->Mbs; 2440 ba = b->a; 2441 bi = b->i; 2442 bj = b->j; 2443 /* 2444 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2445 PetscSynchronizedFlush(A->comm); 2446 */ 2447 2448 /* find ownerships */ 2449 rowners_bs = a->rowners_bs; 2450 /* 2451 if (!rank){ 2452 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2453 } 2454 */ 2455 2456 /* each proc creates an array to be distributed */ 2457 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2458 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2459 2460 /* row_max for B */ 2461 if (rank != size-1){ 2462 for (i=0; i<mbs; i++) { 2463 ncols = bi[1] - bi[0]; bi++; 2464 brow = bs*i; 2465 for (j=0; j<ncols; j++){ 2466 bcol = bs*(*bj); 2467 for (kcol=0; kcol<bs; kcol++){ 2468 col = bcol + kcol; /* local col index */ 2469 col += rowners_bs[rank+1]; /* global col index */ 2470 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2471 for (krow=0; krow<bs; krow++){ 2472 atmp = PetscAbsScalar(*ba); ba++; 2473 row = brow + krow; /* local row index */ 2474 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2475 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2476 if (work[col] < atmp) work[col] = atmp; 2477 } 2478 } 2479 bj++; 2480 } 2481 } 2482 /* 2483 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2484 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2485 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2486 */ 2487 2488 /* send values to its owners */ 2489 for (dest=rank+1; dest<size; dest++){ 2490 svalues = work + rowners_bs[dest]; 2491 count = rowners_bs[dest+1]-rowners_bs[dest]; 2492 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2493 /* 2494 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2495 PetscSynchronizedFlush(A->comm); 2496 */ 2497 } 2498 } 2499 2500 /* receive values */ 2501 if (rank){ 2502 rvalues = work; 2503 count = rowners_bs[rank+1]-rowners_bs[rank]; 2504 for (source=0; source<rank; source++){ 2505 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2506 /* process values */ 2507 for (i=0; i<count; i++){ 2508 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2509 } 2510 /* 2511 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2512 PetscSynchronizedFlush(A->comm); 2513 */ 2514 } 2515 } 2516 2517 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2518 ierr = PetscFree(work);CHKERRQ(ierr); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatRelax_MPISBAIJ" 2524 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2525 { 2526 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2527 PetscErrorCode ierr; 2528 int mbs=mat->mbs,bs=mat->bs; 2529 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2530 Vec bb1; 2531 2532 PetscFunctionBegin; 2533 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2534 if (bs > 1) 2535 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2536 2537 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2538 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2539 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2540 its--; 2541 } 2542 2543 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2544 while (its--){ 2545 2546 /* lower triangular part: slvec0b = - B^T*xx */ 2547 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2548 2549 /* copy xx into slvec0a */ 2550 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2551 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2552 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2553 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2554 2555 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2556 2557 /* copy bb into slvec1a */ 2558 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2559 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2560 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2561 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2562 2563 /* set slvec1b = 0 */ 2564 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2565 2566 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2567 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2568 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2569 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2570 2571 /* upper triangular part: bb1 = bb1 - B*x */ 2572 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2573 2574 /* local diagonal sweep */ 2575 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2576 } 2577 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2578 } else { 2579 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2580 } 2581 PetscFunctionReturn(0); 2582 } 2583 2584 #undef __FUNCT__ 2585 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2586 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2587 { 2588 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2589 PetscErrorCode ierr; 2590 PetscScalar mone=-1.0; 2591 Vec lvec1,bb1; 2592 2593 PetscFunctionBegin; 2594 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2595 if (mat->bs > 1) 2596 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2597 2598 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2599 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2600 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2601 its--; 2602 } 2603 2604 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2605 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2606 while (its--){ 2607 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2608 2609 /* lower diagonal part: bb1 = bb - B^T*xx */ 2610 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2611 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2612 2613 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2614 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2615 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2616 2617 /* upper diagonal part: bb1 = bb1 - B*x */ 2618 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2619 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2620 2621 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2622 2623 /* diagonal sweep */ 2624 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2625 } 2626 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2627 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2628 } else { 2629 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2630 } 2631 PetscFunctionReturn(0); 2632 } 2633 2634