1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 extern int MatSetUpMultiply_MPISBAIJ(Mat); 8 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern int DisAssemble_MPISBAIJ(Mat); 10 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 11 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 12 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatPrintHelp_SeqSBAIJ(Mat); 18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 19 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 20 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 21 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 22 23 /* UGLY, ugly, ugly 24 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 25 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 26 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 27 converts the entries into single precision and then calls ..._MatScalar() to put them 28 into the single precision data structures. 29 */ 30 #if defined(PETSC_USE_MAT_SINGLE) 31 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 32 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 33 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 34 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 35 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 36 #else 37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 38 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 40 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 42 #endif 43 44 EXTERN_C_BEGIN 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 47 int MatStoreValues_MPISBAIJ(Mat mat) 48 { 49 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 50 int ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 EXTERN_C_END 58 59 EXTERN_C_BEGIN 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 62 int MatRetrieveValues_MPISBAIJ(Mat mat) 63 { 64 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 65 int ierr; 66 67 PetscFunctionBegin; 68 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 69 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 EXTERN_C_END 73 74 75 #define CHUNKSIZE 10 76 77 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 78 { \ 79 \ 80 brow = row/bs; \ 81 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 82 rmax = aimax[brow]; nrow = ailen[brow]; \ 83 bcol = col/bs; \ 84 ridx = row % bs; cidx = col % bs; \ 85 low = 0; high = nrow; \ 86 while (high-low > 3) { \ 87 t = (low+high)/2; \ 88 if (rp[t] > bcol) high = t; \ 89 else low = t; \ 90 } \ 91 for (_i=low; _i<high; _i++) { \ 92 if (rp[_i] > bcol) break; \ 93 if (rp[_i] == bcol) { \ 94 bap = ap + bs2*_i + bs*cidx + ridx; \ 95 if (addv == ADD_VALUES) *bap += value; \ 96 else *bap = value; \ 97 goto a_noinsert; \ 98 } \ 99 } \ 100 if (a->nonew == 1) goto a_noinsert; \ 101 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 102 if (nrow >= rmax) { \ 103 /* there is no extra room in row, therefore enlarge */ \ 104 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 105 MatScalar *new_a; \ 106 \ 107 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 108 \ 109 /* malloc new storage space */ \ 110 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 111 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 112 new_j = (int*)(new_a + bs2*new_nz); \ 113 new_i = new_j + new_nz; \ 114 \ 115 /* copy over old data into new slots */ \ 116 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 117 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 118 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 119 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 120 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 121 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 122 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 123 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 124 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 125 /* free up old matrix storage */ \ 126 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 127 if (!a->singlemalloc) { \ 128 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 129 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 130 } \ 131 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 132 a->singlemalloc = PETSC_TRUE; \ 133 \ 134 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 135 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 136 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 137 a->maxnz += bs2*CHUNKSIZE; \ 138 a->reallocs++; \ 139 a->nz++; \ 140 } \ 141 N = nrow++ - 1; \ 142 /* shift up all the later entries in this row */ \ 143 for (ii=N; ii>=_i; ii--) { \ 144 rp[ii+1] = rp[ii]; \ 145 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 146 } \ 147 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 148 rp[_i] = bcol; \ 149 ap[bs2*_i + bs*cidx + ridx] = value; \ 150 a_noinsert:; \ 151 ailen[brow] = nrow; \ 152 } 153 #ifndef MatSetValues_SeqBAIJ_B_Private 154 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 155 { \ 156 brow = row/bs; \ 157 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 158 rmax = bimax[brow]; nrow = bilen[brow]; \ 159 bcol = col/bs; \ 160 ridx = row % bs; cidx = col % bs; \ 161 low = 0; high = nrow; \ 162 while (high-low > 3) { \ 163 t = (low+high)/2; \ 164 if (rp[t] > bcol) high = t; \ 165 else low = t; \ 166 } \ 167 for (_i=low; _i<high; _i++) { \ 168 if (rp[_i] > bcol) break; \ 169 if (rp[_i] == bcol) { \ 170 bap = ap + bs2*_i + bs*cidx + ridx; \ 171 if (addv == ADD_VALUES) *bap += value; \ 172 else *bap = value; \ 173 goto b_noinsert; \ 174 } \ 175 } \ 176 if (b->nonew == 1) goto b_noinsert; \ 177 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 178 if (nrow >= rmax) { \ 179 /* there is no extra room in row, therefore enlarge */ \ 180 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 181 MatScalar *new_a; \ 182 \ 183 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 184 \ 185 /* malloc new storage space */ \ 186 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 187 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 188 new_j = (int*)(new_a + bs2*new_nz); \ 189 new_i = new_j + new_nz; \ 190 \ 191 /* copy over old data into new slots */ \ 192 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 193 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 194 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 195 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 196 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 197 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 199 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 200 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 201 /* free up old matrix storage */ \ 202 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 203 if (!b->singlemalloc) { \ 204 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 205 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 206 } \ 207 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 208 b->singlemalloc = PETSC_TRUE; \ 209 \ 210 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 211 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 212 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 213 b->maxnz += bs2*CHUNKSIZE; \ 214 b->reallocs++; \ 215 b->nz++; \ 216 } \ 217 N = nrow++ - 1; \ 218 /* shift up all the later entries in this row */ \ 219 for (ii=N; ii>=_i; ii--) { \ 220 rp[ii+1] = rp[ii]; \ 221 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 222 } \ 223 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 224 rp[_i] = bcol; \ 225 ap[bs2*_i + bs*cidx + ridx] = value; \ 226 b_noinsert:; \ 227 bilen[brow] = nrow; \ 228 } 229 #endif 230 231 #if defined(PETSC_USE_MAT_SINGLE) 232 #undef __FUNCT__ 233 #define __FUNCT__ "MatSetValues_MPISBAIJ" 234 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 235 { 236 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 237 int ierr,i,N = m*n; 238 MatScalar *vsingle; 239 240 PetscFunctionBegin; 241 if (N > b->setvalueslen) { 242 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 243 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 244 b->setvalueslen = N; 245 } 246 vsingle = b->setvaluescopy; 247 248 for (i=0; i<N; i++) { 249 vsingle[i] = v[i]; 250 } 251 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 257 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 258 { 259 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 260 int ierr,i,N = m*n*b->bs2; 261 MatScalar *vsingle; 262 263 PetscFunctionBegin; 264 if (N > b->setvalueslen) { 265 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 266 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 267 b->setvalueslen = N; 268 } 269 vsingle = b->setvaluescopy; 270 for (i=0; i<N; i++) { 271 vsingle[i] = v[i]; 272 } 273 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 279 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 280 { 281 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 282 int ierr,i,N = m*n; 283 MatScalar *vsingle; 284 285 PetscFunctionBegin; 286 SETERRQ(1,"Function not yet written for SBAIJ format"); 287 /* PetscFunctionReturn(0); */ 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 292 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 293 { 294 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 295 int ierr,i,N = m*n*b->bs2; 296 MatScalar *vsingle; 297 298 PetscFunctionBegin; 299 SETERRQ(1,"Function not yet written for SBAIJ format"); 300 /* PetscFunctionReturn(0); */ 301 } 302 #endif 303 304 /* Only add/insert a(i,j) with i<=j (blocks). 305 Any a(i,j) with i>j input by user is ingored. 306 */ 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 309 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 310 { 311 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 312 MatScalar value; 313 PetscTruth roworiented = baij->roworiented; 314 int ierr,i,j,row,col; 315 int rstart_orig=baij->rstart_bs; 316 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 317 int cend_orig=baij->cend_bs,bs=baij->bs; 318 319 /* Some Variables required in the macro */ 320 Mat A = baij->A; 321 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 322 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 323 MatScalar *aa=a->a; 324 325 Mat B = baij->B; 326 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 327 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 328 MatScalar *ba=b->a; 329 330 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 331 int low,high,t,ridx,cidx,bs2=a->bs2; 332 MatScalar *ap,*bap; 333 334 /* for stash */ 335 int n_loc, *in_loc=0; 336 MatScalar *v_loc=0; 337 338 PetscFunctionBegin; 339 340 if(!baij->donotstash){ 341 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 342 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 343 } 344 345 for (i=0; i<m; i++) { 346 if (im[i] < 0) continue; 347 #if defined(PETSC_USE_BOPT_g) 348 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 349 #endif 350 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 351 row = im[i] - rstart_orig; /* local row index */ 352 for (j=0; j<n; j++) { 353 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 354 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 355 col = in[j] - cstart_orig; /* local col index */ 356 brow = row/bs; bcol = col/bs; 357 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 358 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 359 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 360 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 361 } else if (in[j] < 0) continue; 362 #if defined(PETSC_USE_BOPT_g) 363 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 364 #endif 365 else { /* off-diag entry (B) */ 366 if (mat->was_assembled) { 367 if (!baij->colmap) { 368 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 369 } 370 #if defined (PETSC_USE_CTABLE) 371 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 372 col = col - 1; 373 #else 374 col = baij->colmap[in[j]/bs] - 1; 375 #endif 376 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 377 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 378 col = in[j]; 379 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 380 B = baij->B; 381 b = (Mat_SeqBAIJ*)(B)->data; 382 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 383 ba=b->a; 384 } else col += in[j]%bs; 385 } else col = in[j]; 386 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 387 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 388 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 389 } 390 } 391 } else { /* off processor entry */ 392 if (!baij->donotstash) { 393 n_loc = 0; 394 for (j=0; j<n; j++){ 395 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 396 in_loc[n_loc] = in[j]; 397 if (roworiented) { 398 v_loc[n_loc] = v[i*n+j]; 399 } else { 400 v_loc[n_loc] = v[j*m+i]; 401 } 402 n_loc++; 403 } 404 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 405 } 406 } 407 } 408 409 if(!baij->donotstash){ 410 ierr = PetscFree(in_loc);CHKERRQ(ierr); 411 ierr = PetscFree(v_loc);CHKERRQ(ierr); 412 } 413 PetscFunctionReturn(0); 414 } 415 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 418 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 419 { 420 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 421 const MatScalar *value; 422 MatScalar *barray=baij->barray; 423 PetscTruth roworiented = baij->roworiented; 424 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 425 int rend=baij->rend,cstart=baij->cstart,stepval; 426 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 427 428 PetscFunctionBegin; 429 if(!barray) { 430 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 431 baij->barray = barray; 432 } 433 434 if (roworiented) { 435 stepval = (n-1)*bs; 436 } else { 437 stepval = (m-1)*bs; 438 } 439 for (i=0; i<m; i++) { 440 if (im[i] < 0) continue; 441 #if defined(PETSC_USE_BOPT_g) 442 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 443 #endif 444 if (im[i] >= rstart && im[i] < rend) { 445 row = im[i] - rstart; 446 for (j=0; j<n; j++) { 447 /* If NumCol = 1 then a copy is not required */ 448 if ((roworiented) && (n == 1)) { 449 barray = (MatScalar*) v + i*bs2; 450 } else if((!roworiented) && (m == 1)) { 451 barray = (MatScalar*) v + j*bs2; 452 } else { /* Here a copy is required */ 453 if (roworiented) { 454 value = v + i*(stepval+bs)*bs + j*bs; 455 } else { 456 value = v + j*(stepval+bs)*bs + i*bs; 457 } 458 for (ii=0; ii<bs; ii++,value+=stepval) { 459 for (jj=0; jj<bs; jj++) { 460 *barray++ = *value++; 461 } 462 } 463 barray -=bs2; 464 } 465 466 if (in[j] >= cstart && in[j] < cend){ 467 col = in[j] - cstart; 468 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 469 } 470 else if (in[j] < 0) continue; 471 #if defined(PETSC_USE_BOPT_g) 472 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 473 #endif 474 else { 475 if (mat->was_assembled) { 476 if (!baij->colmap) { 477 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 478 } 479 480 #if defined(PETSC_USE_BOPT_g) 481 #if defined (PETSC_USE_CTABLE) 482 { int data; 483 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 484 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 485 } 486 #else 487 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 488 #endif 489 #endif 490 #if defined (PETSC_USE_CTABLE) 491 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 492 col = (col - 1)/bs; 493 #else 494 col = (baij->colmap[in[j]] - 1)/bs; 495 #endif 496 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 497 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 498 col = in[j]; 499 } 500 } 501 else col = in[j]; 502 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 503 } 504 } 505 } else { 506 if (!baij->donotstash) { 507 if (roworiented) { 508 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 509 } else { 510 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 511 } 512 } 513 } 514 } 515 PetscFunctionReturn(0); 516 } 517 518 #define HASH_KEY 0.6180339887 519 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 520 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 521 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 522 #undef __FUNCT__ 523 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 524 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 525 { 526 PetscFunctionBegin; 527 SETERRQ(1,"Function not yet written for SBAIJ format"); 528 /* PetscFunctionReturn(0); */ 529 } 530 531 #undef __FUNCT__ 532 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 533 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 534 { 535 PetscFunctionBegin; 536 SETERRQ(1,"Function not yet written for SBAIJ format"); 537 /* PetscFunctionReturn(0); */ 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatGetValues_MPISBAIJ" 542 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 543 { 544 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 545 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 546 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 547 548 PetscFunctionBegin; 549 for (i=0; i<m; i++) { 550 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 551 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 552 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 553 row = idxm[i] - bsrstart; 554 for (j=0; j<n; j++) { 555 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]); 556 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 557 if (idxn[j] >= bscstart && idxn[j] < bscend){ 558 col = idxn[j] - bscstart; 559 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 560 } else { 561 if (!baij->colmap) { 562 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 563 } 564 #if defined (PETSC_USE_CTABLE) 565 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 566 data --; 567 #else 568 data = baij->colmap[idxn[j]/bs]-1; 569 #endif 570 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 571 else { 572 col = data + idxn[j]%bs; 573 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 574 } 575 } 576 } 577 } else { 578 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 579 } 580 } 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatNorm_MPISBAIJ" 586 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 587 { 588 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 589 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 590 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 591 int ierr; 592 PetscReal sum[2],*lnorm2; 593 594 PetscFunctionBegin; 595 if (baij->size == 1) { 596 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 597 } else { 598 if (type == NORM_FROBENIUS) { 599 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 600 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 601 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 602 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 603 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 604 /* 605 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 606 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 607 */ 608 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 609 /* 610 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 611 PetscSynchronizedFlush(mat->comm); */ 612 613 *norm = sqrt(sum[0] + 2*sum[1]); 614 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 615 } else { 616 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 /* 623 Creates the hash table, and sets the table 624 This table is created only once. 625 If new entried need to be added to the matrix 626 then the hash table has to be destroyed and 627 recreated. 628 */ 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 631 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 632 { 633 PetscFunctionBegin; 634 SETERRQ(1,"Function not yet written for SBAIJ format"); 635 /* PetscFunctionReturn(0); */ 636 } 637 638 #undef __FUNCT__ 639 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 640 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 641 { 642 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 643 int ierr,nstash,reallocs; 644 InsertMode addv; 645 646 PetscFunctionBegin; 647 if (baij->donotstash) { 648 PetscFunctionReturn(0); 649 } 650 651 /* make sure all processors are either in INSERTMODE or ADDMODE */ 652 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 653 if (addv == (ADD_VALUES|INSERT_VALUES)) { 654 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 655 } 656 mat->insertmode = addv; /* in case this processor had no cache */ 657 658 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 659 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 660 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 661 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 662 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 663 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 669 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 670 { 671 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 672 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 673 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 674 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 675 int *row,*col,other_disassembled; 676 PetscTruth r1,r2,r3; 677 MatScalar *val; 678 InsertMode addv = mat->insertmode; 679 680 PetscFunctionBegin; 681 682 if (!baij->donotstash) { 683 while (1) { 684 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 685 /* 686 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 687 PetscSynchronizedFlush(mat->comm); 688 */ 689 if (!flg) break; 690 691 for (i=0; i<n;) { 692 /* Now identify the consecutive vals belonging to the same row */ 693 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 694 if (j < n) ncols = j-i; 695 else ncols = n-i; 696 /* Now assemble all these values with a single function call */ 697 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 698 i = j; 699 } 700 } 701 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 702 /* Now process the block-stash. Since the values are stashed column-oriented, 703 set the roworiented flag to column oriented, and after MatSetValues() 704 restore the original flags */ 705 r1 = baij->roworiented; 706 r2 = a->roworiented; 707 r3 = b->roworiented; 708 baij->roworiented = PETSC_FALSE; 709 a->roworiented = PETSC_FALSE; 710 b->roworiented = PETSC_FALSE; 711 while (1) { 712 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 713 if (!flg) break; 714 715 for (i=0; i<n;) { 716 /* Now identify the consecutive vals belonging to the same row */ 717 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 718 if (j < n) ncols = j-i; 719 else ncols = n-i; 720 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 721 i = j; 722 } 723 } 724 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 725 baij->roworiented = r1; 726 a->roworiented = r2; 727 b->roworiented = r3; 728 } 729 730 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 731 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 732 733 /* determine if any processor has disassembled, if so we must 734 also disassemble ourselfs, in order that we may reassemble. */ 735 /* 736 if nonzero structure of submatrix B cannot change then we know that 737 no processor disassembled thus we can skip this stuff 738 */ 739 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 740 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 741 if (mat->was_assembled && !other_disassembled) { 742 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 743 } 744 } 745 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 748 } 749 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 750 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 751 752 #if defined(PETSC_USE_BOPT_g) 753 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 754 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 755 baij->ht_total_ct = 0; 756 baij->ht_insert_ct = 0; 757 } 758 #endif 759 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 760 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 761 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 762 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 763 } 764 765 if (baij->rowvalues) { 766 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 767 baij->rowvalues = 0; 768 } 769 770 PetscFunctionReturn(0); 771 } 772 773 #undef __FUNCT__ 774 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 775 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 776 { 777 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 778 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 779 PetscTruth isascii,isdraw; 780 PetscViewer sviewer; 781 PetscViewerFormat format; 782 783 PetscFunctionBegin; 784 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 785 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 786 if (isascii) { 787 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 788 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 789 MatInfo info; 790 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 791 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 792 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 793 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 794 baij->bs,(int)info.memory);CHKERRQ(ierr); 795 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 796 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 797 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 798 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 799 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 800 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 801 PetscFunctionReturn(0); 802 } else if (format == PETSC_VIEWER_ASCII_INFO) { 803 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 804 PetscFunctionReturn(0); 805 } 806 } 807 808 if (isdraw) { 809 PetscDraw draw; 810 PetscTruth isnull; 811 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 812 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 813 } 814 815 if (size == 1) { 816 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 817 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 818 } else { 819 /* assemble the entire matrix onto first processor. */ 820 Mat A; 821 Mat_SeqSBAIJ *Aloc; 822 Mat_SeqBAIJ *Bloc; 823 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 824 MatScalar *a; 825 826 if (!rank) { 827 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 828 } else { 829 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 830 } 831 PetscLogObjectParent(mat,A); 832 833 /* copy over the A part */ 834 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 835 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 836 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 837 838 for (i=0; i<mbs; i++) { 839 rvals[0] = bs*(baij->rstart + i); 840 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 841 for (j=ai[i]; j<ai[i+1]; j++) { 842 col = (baij->cstart+aj[j])*bs; 843 for (k=0; k<bs; k++) { 844 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 845 col++; a += bs; 846 } 847 } 848 } 849 /* copy over the B part */ 850 Bloc = (Mat_SeqBAIJ*)baij->B->data; 851 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 852 for (i=0; i<mbs; i++) { 853 rvals[0] = bs*(baij->rstart + i); 854 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 855 for (j=ai[i]; j<ai[i+1]; j++) { 856 col = baij->garray[aj[j]]*bs; 857 for (k=0; k<bs; k++) { 858 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 859 col++; a += bs; 860 } 861 } 862 } 863 ierr = PetscFree(rvals);CHKERRQ(ierr); 864 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 865 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 866 /* 867 Everyone has to call to draw the matrix since the graphics waits are 868 synchronized across all processors that share the PetscDraw object 869 */ 870 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 871 if (!rank) { 872 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 873 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 874 } 875 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 876 ierr = MatDestroy(A);CHKERRQ(ierr); 877 } 878 PetscFunctionReturn(0); 879 } 880 881 #undef __FUNCT__ 882 #define __FUNCT__ "MatView_MPISBAIJ" 883 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 884 { 885 int ierr; 886 PetscTruth isascii,isdraw,issocket,isbinary; 887 888 PetscFunctionBegin; 889 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 890 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 891 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 892 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 893 if (isascii || isdraw || issocket || isbinary) { 894 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 895 } else { 896 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 897 } 898 PetscFunctionReturn(0); 899 } 900 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatDestroy_MPISBAIJ" 903 int MatDestroy_MPISBAIJ(Mat mat) 904 { 905 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 906 int ierr; 907 908 PetscFunctionBegin; 909 #if defined(PETSC_USE_LOG) 910 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 911 #endif 912 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 913 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 914 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 915 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 916 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 917 #if defined (PETSC_USE_CTABLE) 918 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 919 #else 920 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 921 #endif 922 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 923 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 924 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 925 if (baij->slvec0) { 926 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 927 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 928 } 929 if (baij->slvec1) { 930 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 931 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 932 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 933 } 934 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 935 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 936 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 937 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 938 #if defined(PETSC_USE_MAT_SINGLE) 939 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 940 #endif 941 ierr = PetscFree(baij);CHKERRQ(ierr); 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatMult_MPISBAIJ" 947 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 948 { 949 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 950 int ierr,nt,mbs=a->mbs,bs=a->bs; 951 PetscScalar *x,*from,zero=0.0; 952 953 PetscFunctionBegin; 954 /* 955 PetscSynchronizedPrintf(A->comm," _1comm is called ...\n"); 956 PetscSynchronizedFlush(A->comm); 957 */ 958 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 959 if (nt != A->n) { 960 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 961 } 962 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 963 if (nt != A->m) { 964 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 965 } 966 967 /* diagonal part */ 968 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 969 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 970 971 /* subdiagonal part */ 972 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 973 974 /* copy x into the vec slvec0 */ 975 ierr = VecGetArrayFast(a->slvec0,&from);CHKERRQ(ierr); 976 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 977 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 978 ierr = VecRestoreArrayFast(a->slvec0,&from);CHKERRQ(ierr); 979 980 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 981 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 982 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 983 984 /* supperdiagonal part */ 985 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 986 987 PetscFunctionReturn(0); 988 } 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 992 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 993 { 994 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 995 int ierr,nt; 996 997 PetscFunctionBegin; 998 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 999 if (nt != A->n) { 1000 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1001 } 1002 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1003 if (nt != A->m) { 1004 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1005 } 1006 1007 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1008 /* do diagonal part */ 1009 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1010 /* do supperdiagonal part */ 1011 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1012 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1013 /* do subdiagonal part */ 1014 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1015 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1016 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1017 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1023 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1024 { 1025 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1026 int ierr,mbs=a->mbs,bs=a->bs; 1027 PetscScalar *x,*from,zero=0.0; 1028 1029 PetscFunctionBegin; 1030 /* 1031 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1032 PetscSynchronizedFlush(A->comm); 1033 */ 1034 /* diagonal part */ 1035 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1036 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1037 1038 /* subdiagonal part */ 1039 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1040 1041 /* copy x into the vec slvec0 */ 1042 ierr = VecGetArrayFast(a->slvec0,&from);CHKERRQ(ierr); 1043 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1044 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1045 ierr = VecRestoreArrayFast(a->slvec0,&from);CHKERRQ(ierr); 1046 1047 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1048 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1049 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1050 1051 /* supperdiagonal part */ 1052 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1053 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1059 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1060 { 1061 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1062 int ierr; 1063 1064 PetscFunctionBegin; 1065 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1066 /* do diagonal part */ 1067 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1068 /* do supperdiagonal part */ 1069 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1070 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1071 1072 /* do subdiagonal part */ 1073 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1074 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1075 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1076 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1082 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1083 { 1084 int ierr; 1085 1086 PetscFunctionBegin; 1087 ierr = MatMult(A,xx,yy);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1093 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1094 { 1095 int ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 /* 1103 This only works correctly for square matrices where the subblock A->A is the 1104 diagonal block 1105 */ 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1108 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1109 { 1110 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1111 int ierr; 1112 1113 PetscFunctionBegin; 1114 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1115 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatScale_MPISBAIJ" 1121 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1122 { 1123 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1124 int ierr; 1125 1126 PetscFunctionBegin; 1127 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1128 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1134 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1135 { 1136 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1137 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1138 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1139 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1140 int *cmap,*idx_p,cstart = mat->cstart; 1141 1142 PetscFunctionBegin; 1143 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1144 mat->getrowactive = PETSC_TRUE; 1145 1146 if (!mat->rowvalues && (idx || v)) { 1147 /* 1148 allocate enough space to hold information from the longest row. 1149 */ 1150 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1151 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1152 int max = 1,mbs = mat->mbs,tmp; 1153 for (i=0; i<mbs; i++) { 1154 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1155 if (max < tmp) { max = tmp; } 1156 } 1157 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1158 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1159 } 1160 1161 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1162 lrow = row - brstart; /* local row index */ 1163 1164 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1165 if (!v) {pvA = 0; pvB = 0;} 1166 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1167 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1168 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1169 nztot = nzA + nzB; 1170 1171 cmap = mat->garray; 1172 if (v || idx) { 1173 if (nztot) { 1174 /* Sort by increasing column numbers, assuming A and B already sorted */ 1175 int imark = -1; 1176 if (v) { 1177 *v = v_p = mat->rowvalues; 1178 for (i=0; i<nzB; i++) { 1179 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1180 else break; 1181 } 1182 imark = i; 1183 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1184 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1185 } 1186 if (idx) { 1187 *idx = idx_p = mat->rowindices; 1188 if (imark > -1) { 1189 for (i=0; i<imark; i++) { 1190 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1191 } 1192 } else { 1193 for (i=0; i<nzB; i++) { 1194 if (cmap[cworkB[i]/bs] < cstart) 1195 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1196 else break; 1197 } 1198 imark = i; 1199 } 1200 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1201 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1202 } 1203 } else { 1204 if (idx) *idx = 0; 1205 if (v) *v = 0; 1206 } 1207 } 1208 *nz = nztot; 1209 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1210 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 #undef __FUNCT__ 1215 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1216 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1217 { 1218 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1219 1220 PetscFunctionBegin; 1221 if (baij->getrowactive == PETSC_FALSE) { 1222 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1223 } 1224 baij->getrowactive = PETSC_FALSE; 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1230 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1231 { 1232 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1233 1234 PetscFunctionBegin; 1235 *bs = baij->bs; 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1241 int MatZeroEntries_MPISBAIJ(Mat A) 1242 { 1243 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1244 int ierr; 1245 1246 PetscFunctionBegin; 1247 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1248 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1254 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1255 { 1256 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1257 Mat A = a->A,B = a->B; 1258 int ierr; 1259 PetscReal isend[5],irecv[5]; 1260 1261 PetscFunctionBegin; 1262 info->block_size = (PetscReal)a->bs; 1263 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1264 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1265 isend[3] = info->memory; isend[4] = info->mallocs; 1266 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1267 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1268 isend[3] += info->memory; isend[4] += info->mallocs; 1269 if (flag == MAT_LOCAL) { 1270 info->nz_used = isend[0]; 1271 info->nz_allocated = isend[1]; 1272 info->nz_unneeded = isend[2]; 1273 info->memory = isend[3]; 1274 info->mallocs = isend[4]; 1275 } else if (flag == MAT_GLOBAL_MAX) { 1276 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1277 info->nz_used = irecv[0]; 1278 info->nz_allocated = irecv[1]; 1279 info->nz_unneeded = irecv[2]; 1280 info->memory = irecv[3]; 1281 info->mallocs = irecv[4]; 1282 } else if (flag == MAT_GLOBAL_SUM) { 1283 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1284 info->nz_used = irecv[0]; 1285 info->nz_allocated = irecv[1]; 1286 info->nz_unneeded = irecv[2]; 1287 info->memory = irecv[3]; 1288 info->mallocs = irecv[4]; 1289 } else { 1290 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1291 } 1292 info->rows_global = (PetscReal)A->M; 1293 info->columns_global = (PetscReal)A->N; 1294 info->rows_local = (PetscReal)A->m; 1295 info->columns_local = (PetscReal)A->N; 1296 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1297 info->fill_ratio_needed = 0; 1298 info->factor_mallocs = 0; 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1304 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1305 { 1306 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1307 int ierr; 1308 1309 PetscFunctionBegin; 1310 switch (op) { 1311 case MAT_NO_NEW_NONZERO_LOCATIONS: 1312 case MAT_YES_NEW_NONZERO_LOCATIONS: 1313 case MAT_COLUMNS_UNSORTED: 1314 case MAT_COLUMNS_SORTED: 1315 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1316 case MAT_KEEP_ZEROED_ROWS: 1317 case MAT_NEW_NONZERO_LOCATION_ERR: 1318 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1319 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1320 break; 1321 case MAT_ROW_ORIENTED: 1322 a->roworiented = PETSC_TRUE; 1323 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1324 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1325 break; 1326 case MAT_ROWS_SORTED: 1327 case MAT_ROWS_UNSORTED: 1328 case MAT_YES_NEW_DIAGONALS: 1329 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1330 break; 1331 case MAT_COLUMN_ORIENTED: 1332 a->roworiented = PETSC_FALSE; 1333 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1334 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1335 break; 1336 case MAT_IGNORE_OFF_PROC_ENTRIES: 1337 a->donotstash = PETSC_TRUE; 1338 break; 1339 case MAT_NO_NEW_DIAGONALS: 1340 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1341 case MAT_USE_HASH_TABLE: 1342 a->ht_flag = PETSC_TRUE; 1343 break; 1344 case MAT_NOT_SYMMETRIC: 1345 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1346 case MAT_HERMITIAN: 1347 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1348 case MAT_SYMMETRIC: 1349 case MAT_STRUCTURALLY_SYMMETRIC: 1350 case MAT_NOT_HERMITIAN: 1351 case MAT_SYMMETRY_ETERNAL: 1352 case MAT_NOT_SYMMETRY_ETERNAL: 1353 break; 1354 default: 1355 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1356 } 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1362 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1363 { 1364 int ierr; 1365 PetscFunctionBegin; 1366 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1372 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1373 { 1374 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1375 Mat a = baij->A,b = baij->B; 1376 int ierr,s1,s2,s3; 1377 1378 PetscFunctionBegin; 1379 if (ll != rr) { 1380 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1381 } 1382 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1383 if (rr) { 1384 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1385 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1386 /* Overlap communication with computation. */ 1387 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1388 /*} if (ll) { */ 1389 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1390 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1391 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1392 /* } */ 1393 /* scale the diagonal block */ 1394 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1395 1396 /* if (rr) { */ 1397 /* Do a scatter end and then right scale the off-diagonal block */ 1398 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1399 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1400 } 1401 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1407 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag) 1408 { 1409 PetscFunctionBegin; 1410 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1415 int MatPrintHelp_MPISBAIJ(Mat A) 1416 { 1417 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1418 MPI_Comm comm = A->comm; 1419 static int called = 0; 1420 int ierr; 1421 1422 PetscFunctionBegin; 1423 if (!a->rank) { 1424 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1425 } 1426 if (called) {PetscFunctionReturn(0);} else called = 1; 1427 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1428 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1434 int MatSetUnfactored_MPISBAIJ(Mat A) 1435 { 1436 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1437 int ierr; 1438 1439 PetscFunctionBegin; 1440 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatEqual_MPISBAIJ" 1448 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1449 { 1450 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1451 Mat a,b,c,d; 1452 PetscTruth flg; 1453 int ierr; 1454 1455 PetscFunctionBegin; 1456 a = matA->A; b = matA->B; 1457 c = matB->A; d = matB->B; 1458 1459 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1460 if (flg == PETSC_TRUE) { 1461 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1462 } 1463 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1469 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1470 { 1471 int ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1480 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1481 { 1482 int i,ierr; 1483 PetscTruth flg; 1484 1485 for (i=0; i<n; i++) { 1486 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1487 if (!flg) { 1488 SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1489 } 1490 } 1491 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1492 PetscFunctionReturn(0); 1493 } 1494 1495 1496 /* -------------------------------------------------------------------*/ 1497 static struct _MatOps MatOps_Values = { 1498 MatSetValues_MPISBAIJ, 1499 MatGetRow_MPISBAIJ, 1500 MatRestoreRow_MPISBAIJ, 1501 MatMult_MPISBAIJ, 1502 /* 4*/ MatMultAdd_MPISBAIJ, 1503 MatMultTranspose_MPISBAIJ, 1504 MatMultTransposeAdd_MPISBAIJ, 1505 0, 1506 0, 1507 0, 1508 /*10*/ 0, 1509 0, 1510 0, 1511 MatRelax_MPISBAIJ, 1512 MatTranspose_MPISBAIJ, 1513 /*15*/ MatGetInfo_MPISBAIJ, 1514 MatEqual_MPISBAIJ, 1515 MatGetDiagonal_MPISBAIJ, 1516 MatDiagonalScale_MPISBAIJ, 1517 MatNorm_MPISBAIJ, 1518 /*20*/ MatAssemblyBegin_MPISBAIJ, 1519 MatAssemblyEnd_MPISBAIJ, 1520 0, 1521 MatSetOption_MPISBAIJ, 1522 MatZeroEntries_MPISBAIJ, 1523 /*25*/ MatZeroRows_MPISBAIJ, 1524 0, 1525 0, 1526 0, 1527 0, 1528 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1529 0, 1530 0, 1531 0, 1532 0, 1533 /*35*/ MatDuplicate_MPISBAIJ, 1534 0, 1535 0, 1536 0, 1537 0, 1538 /*40*/ 0, 1539 MatGetSubMatrices_MPISBAIJ, 1540 0, 1541 MatGetValues_MPISBAIJ, 1542 0, 1543 /*45*/ MatPrintHelp_MPISBAIJ, 1544 MatScale_MPISBAIJ, 1545 0, 1546 0, 1547 0, 1548 /*50*/ MatGetBlockSize_MPISBAIJ, 1549 0, 1550 0, 1551 0, 1552 0, 1553 /*55*/ 0, 1554 0, 1555 MatSetUnfactored_MPISBAIJ, 1556 0, 1557 MatSetValuesBlocked_MPISBAIJ, 1558 /*60*/ 0, 1559 0, 1560 0, 1561 MatGetPetscMaps_Petsc, 1562 0, 1563 /*65*/ 0, 1564 0, 1565 0, 1566 0, 1567 0, 1568 /*70*/ MatGetRowMax_MPISBAIJ, 1569 0, 1570 0, 1571 0, 1572 0, 1573 /*75*/ 0, 1574 0, 1575 0, 1576 0, 1577 0, 1578 /*80*/ 0, 1579 0, 1580 0, 1581 0, 1582 /*85*/ MatLoad_MPISBAIJ 1583 }; 1584 1585 1586 EXTERN_C_BEGIN 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1589 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1590 { 1591 PetscFunctionBegin; 1592 *a = ((Mat_MPISBAIJ *)A->data)->A; 1593 *iscopy = PETSC_FALSE; 1594 PetscFunctionReturn(0); 1595 } 1596 EXTERN_C_END 1597 1598 EXTERN_C_BEGIN 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1601 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1602 { 1603 Mat_MPISBAIJ *b; 1604 int ierr,i,mbs,Mbs; 1605 1606 PetscFunctionBegin; 1607 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1608 1609 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1610 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1611 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1612 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1613 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1614 if (d_nnz) { 1615 for (i=0; i<B->m/bs; i++) { 1616 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1617 } 1618 } 1619 if (o_nnz) { 1620 for (i=0; i<B->m/bs; i++) { 1621 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1622 } 1623 } 1624 B->preallocated = PETSC_TRUE; 1625 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1626 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1627 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1628 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1629 1630 b = (Mat_MPISBAIJ*)B->data; 1631 mbs = B->m/bs; 1632 Mbs = B->M/bs; 1633 if (mbs*bs != B->m) { 1634 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1635 } 1636 1637 b->bs = bs; 1638 b->bs2 = bs*bs; 1639 b->mbs = mbs; 1640 b->nbs = mbs; 1641 b->Mbs = Mbs; 1642 b->Nbs = Mbs; 1643 1644 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1645 b->rowners[0] = 0; 1646 for (i=2; i<=b->size; i++) { 1647 b->rowners[i] += b->rowners[i-1]; 1648 } 1649 b->rstart = b->rowners[b->rank]; 1650 b->rend = b->rowners[b->rank+1]; 1651 b->cstart = b->rstart; 1652 b->cend = b->rend; 1653 for (i=0; i<=b->size; i++) { 1654 b->rowners_bs[i] = b->rowners[i]*bs; 1655 } 1656 b->rstart_bs = b-> rstart*bs; 1657 b->rend_bs = b->rend*bs; 1658 1659 b->cstart_bs = b->cstart*bs; 1660 b->cend_bs = b->cend*bs; 1661 1662 1663 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1664 PetscLogObjectParent(B,b->A); 1665 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1666 PetscLogObjectParent(B,b->B); 1667 1668 /* build cache for off array entries formed */ 1669 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1670 1671 PetscFunctionReturn(0); 1672 } 1673 EXTERN_C_END 1674 1675 /*MC 1676 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1677 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1678 1679 Options Database Keys: 1680 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1681 1682 Level: beginner 1683 1684 .seealso: MatCreateMPISBAIJ 1685 M*/ 1686 1687 EXTERN_C_BEGIN 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatCreate_MPISBAIJ" 1690 int MatCreate_MPISBAIJ(Mat B) 1691 { 1692 Mat_MPISBAIJ *b; 1693 int ierr; 1694 PetscTruth flg; 1695 1696 PetscFunctionBegin; 1697 1698 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1699 B->data = (void*)b; 1700 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1701 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1702 1703 B->ops->destroy = MatDestroy_MPISBAIJ; 1704 B->ops->view = MatView_MPISBAIJ; 1705 B->mapping = 0; 1706 B->factor = 0; 1707 B->assembled = PETSC_FALSE; 1708 1709 B->insertmode = NOT_SET_VALUES; 1710 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1711 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1712 1713 /* build local table of row and column ownerships */ 1714 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1715 b->cowners = b->rowners + b->size + 2; 1716 b->rowners_bs = b->cowners + b->size + 2; 1717 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1718 1719 /* build cache for off array entries formed */ 1720 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1721 b->donotstash = PETSC_FALSE; 1722 b->colmap = PETSC_NULL; 1723 b->garray = PETSC_NULL; 1724 b->roworiented = PETSC_TRUE; 1725 1726 #if defined(PETSC_USE_MAT_SINGLE) 1727 /* stuff for MatSetValues_XXX in single precision */ 1728 b->setvalueslen = 0; 1729 b->setvaluescopy = PETSC_NULL; 1730 #endif 1731 1732 /* stuff used in block assembly */ 1733 b->barray = 0; 1734 1735 /* stuff used for matrix vector multiply */ 1736 b->lvec = 0; 1737 b->Mvctx = 0; 1738 b->slvec0 = 0; 1739 b->slvec0b = 0; 1740 b->slvec1 = 0; 1741 b->slvec1a = 0; 1742 b->slvec1b = 0; 1743 b->sMvctx = 0; 1744 1745 /* stuff for MatGetRow() */ 1746 b->rowindices = 0; 1747 b->rowvalues = 0; 1748 b->getrowactive = PETSC_FALSE; 1749 1750 /* hash table stuff */ 1751 b->ht = 0; 1752 b->hd = 0; 1753 b->ht_size = 0; 1754 b->ht_flag = PETSC_FALSE; 1755 b->ht_fact = 0; 1756 b->ht_total_ct = 0; 1757 b->ht_insert_ct = 0; 1758 1759 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1760 if (flg) { 1761 PetscReal fact = 1.39; 1762 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1763 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1764 if (fact <= 1.0) fact = 1.39; 1765 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1766 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1767 } 1768 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1769 "MatStoreValues_MPISBAIJ", 1770 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1771 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1772 "MatRetrieveValues_MPISBAIJ", 1773 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1774 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1775 "MatGetDiagonalBlock_MPISBAIJ", 1776 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1777 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1778 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1779 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1780 PetscFunctionReturn(0); 1781 } 1782 EXTERN_C_END 1783 1784 /*MC 1785 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1786 1787 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1788 and MATMPISBAIJ otherwise. 1789 1790 Options Database Keys: 1791 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1792 1793 Level: beginner 1794 1795 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1796 M*/ 1797 1798 EXTERN_C_BEGIN 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "MatCreate_SBAIJ" 1801 int MatCreate_SBAIJ(Mat A) { 1802 int ierr,size; 1803 1804 PetscFunctionBegin; 1805 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1806 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1807 if (size == 1) { 1808 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1809 } else { 1810 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1811 } 1812 PetscFunctionReturn(0); 1813 } 1814 EXTERN_C_END 1815 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1818 /*@C 1819 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1820 the user should preallocate the matrix storage by setting the parameters 1821 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1822 performance can be increased by more than a factor of 50. 1823 1824 Collective on Mat 1825 1826 Input Parameters: 1827 + A - the matrix 1828 . bs - size of blockk 1829 . d_nz - number of block nonzeros per block row in diagonal portion of local 1830 submatrix (same for all local rows) 1831 . d_nnz - array containing the number of block nonzeros in the various block rows 1832 in the upper triangular and diagonal part of the in diagonal portion of the local 1833 (possibly different for each block row) or PETSC_NULL. You must leave room 1834 for the diagonal entry even if it is zero. 1835 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1836 submatrix (same for all local rows). 1837 - o_nnz - array containing the number of nonzeros in the various block rows of the 1838 off-diagonal portion of the local submatrix (possibly different for 1839 each block row) or PETSC_NULL. 1840 1841 1842 Options Database Keys: 1843 . -mat_no_unroll - uses code that does not unroll the loops in the 1844 block calculations (much slower) 1845 . -mat_block_size - size of the blocks to use 1846 1847 Notes: 1848 1849 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1850 than it must be used on all processors that share the object for that argument. 1851 1852 Storage Information: 1853 For a square global matrix we define each processor's diagonal portion 1854 to be its local rows and the corresponding columns (a square submatrix); 1855 each processor's off-diagonal portion encompasses the remainder of the 1856 local matrix (a rectangular submatrix). 1857 1858 The user can specify preallocated storage for the diagonal part of 1859 the local submatrix with either d_nz or d_nnz (not both). Set 1860 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1861 memory allocation. Likewise, specify preallocated storage for the 1862 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1863 1864 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1865 the figure below we depict these three local rows and all columns (0-11). 1866 1867 .vb 1868 0 1 2 3 4 5 6 7 8 9 10 11 1869 ------------------- 1870 row 3 | o o o d d d o o o o o o 1871 row 4 | o o o d d d o o o o o o 1872 row 5 | o o o d d d o o o o o o 1873 ------------------- 1874 .ve 1875 1876 Thus, any entries in the d locations are stored in the d (diagonal) 1877 submatrix, and any entries in the o locations are stored in the 1878 o (off-diagonal) submatrix. Note that the d matrix is stored in 1879 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1880 1881 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1882 plus the diagonal part of the d matrix, 1883 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1884 In general, for PDE problems in which most nonzeros are near the diagonal, 1885 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1886 or you will get TERRIBLE performance; see the users' manual chapter on 1887 matrices. 1888 1889 Level: intermediate 1890 1891 .keywords: matrix, block, aij, compressed row, sparse, parallel 1892 1893 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1894 @*/ 1895 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1896 { 1897 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 1898 1899 PetscFunctionBegin; 1900 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1901 if (f) { 1902 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1903 } 1904 PetscFunctionReturn(0); 1905 } 1906 1907 #undef __FUNCT__ 1908 #define __FUNCT__ "MatCreateMPISBAIJ" 1909 /*@C 1910 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1911 (block compressed row). For good matrix assembly performance 1912 the user should preallocate the matrix storage by setting the parameters 1913 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1914 performance can be increased by more than a factor of 50. 1915 1916 Collective on MPI_Comm 1917 1918 Input Parameters: 1919 + comm - MPI communicator 1920 . bs - size of blockk 1921 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1922 This value should be the same as the local size used in creating the 1923 y vector for the matrix-vector product y = Ax. 1924 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1925 This value should be the same as the local size used in creating the 1926 x vector for the matrix-vector product y = Ax. 1927 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1928 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1929 . d_nz - number of block nonzeros per block row in diagonal portion of local 1930 submatrix (same for all local rows) 1931 . d_nnz - array containing the number of block nonzeros in the various block rows 1932 in the upper triangular portion of the in diagonal portion of the local 1933 (possibly different for each block block row) or PETSC_NULL. 1934 You must leave room for the diagonal entry even if it is zero. 1935 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1936 submatrix (same for all local rows). 1937 - o_nnz - array containing the number of nonzeros in the various block rows of the 1938 off-diagonal portion of the local submatrix (possibly different for 1939 each block row) or PETSC_NULL. 1940 1941 Output Parameter: 1942 . A - the matrix 1943 1944 Options Database Keys: 1945 . -mat_no_unroll - uses code that does not unroll the loops in the 1946 block calculations (much slower) 1947 . -mat_block_size - size of the blocks to use 1948 . -mat_mpi - use the parallel matrix data structures even on one processor 1949 (defaults to using SeqBAIJ format on one processor) 1950 1951 Notes: 1952 The user MUST specify either the local or global matrix dimensions 1953 (possibly both). 1954 1955 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1956 than it must be used on all processors that share the object for that argument. 1957 1958 Storage Information: 1959 For a square global matrix we define each processor's diagonal portion 1960 to be its local rows and the corresponding columns (a square submatrix); 1961 each processor's off-diagonal portion encompasses the remainder of the 1962 local matrix (a rectangular submatrix). 1963 1964 The user can specify preallocated storage for the diagonal part of 1965 the local submatrix with either d_nz or d_nnz (not both). Set 1966 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1967 memory allocation. Likewise, specify preallocated storage for the 1968 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1969 1970 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1971 the figure below we depict these three local rows and all columns (0-11). 1972 1973 .vb 1974 0 1 2 3 4 5 6 7 8 9 10 11 1975 ------------------- 1976 row 3 | o o o d d d o o o o o o 1977 row 4 | o o o d d d o o o o o o 1978 row 5 | o o o d d d o o o o o o 1979 ------------------- 1980 .ve 1981 1982 Thus, any entries in the d locations are stored in the d (diagonal) 1983 submatrix, and any entries in the o locations are stored in the 1984 o (off-diagonal) submatrix. Note that the d matrix is stored in 1985 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1986 1987 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1988 plus the diagonal part of the d matrix, 1989 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1990 In general, for PDE problems in which most nonzeros are near the diagonal, 1991 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1992 or you will get TERRIBLE performance; see the users' manual chapter on 1993 matrices. 1994 1995 Level: intermediate 1996 1997 .keywords: matrix, block, aij, compressed row, sparse, parallel 1998 1999 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2000 @*/ 2001 2002 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2003 { 2004 int ierr,size; 2005 2006 PetscFunctionBegin; 2007 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2008 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2009 if (size > 1) { 2010 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2011 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2012 } else { 2013 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2014 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2015 } 2016 PetscFunctionReturn(0); 2017 } 2018 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2022 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2023 { 2024 Mat mat; 2025 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2026 int ierr,len=0; 2027 2028 PetscFunctionBegin; 2029 *newmat = 0; 2030 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2031 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 2032 mat->preallocated = PETSC_TRUE; 2033 a = (Mat_MPISBAIJ*)mat->data; 2034 a->bs = oldmat->bs; 2035 a->bs2 = oldmat->bs2; 2036 a->mbs = oldmat->mbs; 2037 a->nbs = oldmat->nbs; 2038 a->Mbs = oldmat->Mbs; 2039 a->Nbs = oldmat->Nbs; 2040 2041 a->rstart = oldmat->rstart; 2042 a->rend = oldmat->rend; 2043 a->cstart = oldmat->cstart; 2044 a->cend = oldmat->cend; 2045 a->size = oldmat->size; 2046 a->rank = oldmat->rank; 2047 a->donotstash = oldmat->donotstash; 2048 a->roworiented = oldmat->roworiented; 2049 a->rowindices = 0; 2050 a->rowvalues = 0; 2051 a->getrowactive = PETSC_FALSE; 2052 a->barray = 0; 2053 a->rstart_bs = oldmat->rstart_bs; 2054 a->rend_bs = oldmat->rend_bs; 2055 a->cstart_bs = oldmat->cstart_bs; 2056 a->cend_bs = oldmat->cend_bs; 2057 2058 /* hash table stuff */ 2059 a->ht = 0; 2060 a->hd = 0; 2061 a->ht_size = 0; 2062 a->ht_flag = oldmat->ht_flag; 2063 a->ht_fact = oldmat->ht_fact; 2064 a->ht_total_ct = 0; 2065 a->ht_insert_ct = 0; 2066 2067 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 2068 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2069 a->cowners = a->rowners + a->size + 2; 2070 a->rowners_bs = a->cowners + a->size + 2; 2071 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2072 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2073 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2074 if (oldmat->colmap) { 2075 #if defined (PETSC_USE_CTABLE) 2076 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2077 #else 2078 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2079 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2080 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2081 #endif 2082 } else a->colmap = 0; 2083 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2084 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2085 PetscLogObjectMemory(mat,len*sizeof(int)); 2086 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2087 } else a->garray = 0; 2088 2089 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2090 PetscLogObjectParent(mat,a->lvec); 2091 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2092 2093 PetscLogObjectParent(mat,a->Mvctx); 2094 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2095 PetscLogObjectParent(mat,a->A); 2096 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2097 PetscLogObjectParent(mat,a->B); 2098 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2099 *newmat = mat; 2100 PetscFunctionReturn(0); 2101 } 2102 2103 #include "petscsys.h" 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "MatLoad_MPISBAIJ" 2107 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2108 { 2109 Mat A; 2110 int i,nz,ierr,j,rstart,rend,fd; 2111 PetscScalar *vals,*buf; 2112 MPI_Comm comm = ((PetscObject)viewer)->comm; 2113 MPI_Status status; 2114 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2115 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2116 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2117 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2118 int dcount,kmax,k,nzcount,tmp; 2119 2120 PetscFunctionBegin; 2121 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2122 2123 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2124 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2125 if (!rank) { 2126 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2127 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2128 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2129 if (header[3] < 0) { 2130 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2131 } 2132 } 2133 2134 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2135 M = header[1]; N = header[2]; 2136 2137 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2138 2139 /* 2140 This code adds extra rows to make sure the number of rows is 2141 divisible by the blocksize 2142 */ 2143 Mbs = M/bs; 2144 extra_rows = bs - M + bs*(Mbs); 2145 if (extra_rows == bs) extra_rows = 0; 2146 else Mbs++; 2147 if (extra_rows &&!rank) { 2148 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2149 } 2150 2151 /* determine ownership of all rows */ 2152 mbs = Mbs/size + ((Mbs % size) > rank); 2153 m = mbs*bs; 2154 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2155 browners = rowners + size + 1; 2156 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2157 rowners[0] = 0; 2158 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2159 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2160 rstart = rowners[rank]; 2161 rend = rowners[rank+1]; 2162 2163 /* distribute row lengths to all processors */ 2164 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2165 if (!rank) { 2166 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2167 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2168 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2169 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2170 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2171 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2172 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2173 } else { 2174 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2175 } 2176 2177 if (!rank) { /* procs[0] */ 2178 /* calculate the number of nonzeros on each processor */ 2179 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2180 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2181 for (i=0; i<size; i++) { 2182 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2183 procsnz[i] += rowlengths[j]; 2184 } 2185 } 2186 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2187 2188 /* determine max buffer needed and allocate it */ 2189 maxnz = 0; 2190 for (i=0; i<size; i++) { 2191 maxnz = PetscMax(maxnz,procsnz[i]); 2192 } 2193 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2194 2195 /* read in my part of the matrix column indices */ 2196 nz = procsnz[0]; 2197 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2198 mycols = ibuf; 2199 if (size == 1) nz -= extra_rows; 2200 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2201 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2202 2203 /* read in every ones (except the last) and ship off */ 2204 for (i=1; i<size-1; i++) { 2205 nz = procsnz[i]; 2206 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2207 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2208 } 2209 /* read in the stuff for the last proc */ 2210 if (size != 1) { 2211 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2212 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2213 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2214 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2215 } 2216 ierr = PetscFree(cols);CHKERRQ(ierr); 2217 } else { /* procs[i], i>0 */ 2218 /* determine buffer space needed for message */ 2219 nz = 0; 2220 for (i=0; i<m; i++) { 2221 nz += locrowlens[i]; 2222 } 2223 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2224 mycols = ibuf; 2225 /* receive message of column indices*/ 2226 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2227 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2228 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2229 } 2230 2231 /* loop over local rows, determining number of off diagonal entries */ 2232 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2233 odlens = dlens + (rend-rstart); 2234 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2235 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2236 masked1 = mask + Mbs; 2237 masked2 = masked1 + Mbs; 2238 rowcount = 0; nzcount = 0; 2239 for (i=0; i<mbs; i++) { 2240 dcount = 0; 2241 odcount = 0; 2242 for (j=0; j<bs; j++) { 2243 kmax = locrowlens[rowcount]; 2244 for (k=0; k<kmax; k++) { 2245 tmp = mycols[nzcount++]/bs; /* block col. index */ 2246 if (!mask[tmp]) { 2247 mask[tmp] = 1; 2248 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2249 else masked1[dcount++] = tmp; /* entry in diag portion */ 2250 } 2251 } 2252 rowcount++; 2253 } 2254 2255 dlens[i] = dcount; /* d_nzz[i] */ 2256 odlens[i] = odcount; /* o_nzz[i] */ 2257 2258 /* zero out the mask elements we set */ 2259 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2260 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2261 } 2262 2263 /* create our matrix */ 2264 ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr); 2265 ierr = MatSetType(A,type);CHKERRQ(ierr); 2266 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2267 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2268 2269 if (!rank) { 2270 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2271 /* read in my part of the matrix numerical values */ 2272 nz = procsnz[0]; 2273 vals = buf; 2274 mycols = ibuf; 2275 if (size == 1) nz -= extra_rows; 2276 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2277 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2278 2279 /* insert into matrix */ 2280 jj = rstart*bs; 2281 for (i=0; i<m; i++) { 2282 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2283 mycols += locrowlens[i]; 2284 vals += locrowlens[i]; 2285 jj++; 2286 } 2287 2288 /* read in other processors (except the last one) and ship out */ 2289 for (i=1; i<size-1; i++) { 2290 nz = procsnz[i]; 2291 vals = buf; 2292 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2293 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2294 } 2295 /* the last proc */ 2296 if (size != 1){ 2297 nz = procsnz[i] - extra_rows; 2298 vals = buf; 2299 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2300 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2301 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2302 } 2303 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2304 2305 } else { 2306 /* receive numeric values */ 2307 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2308 2309 /* receive message of values*/ 2310 vals = buf; 2311 mycols = ibuf; 2312 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2313 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2314 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2315 2316 /* insert into matrix */ 2317 jj = rstart*bs; 2318 for (i=0; i<m; i++) { 2319 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2320 mycols += locrowlens[i]; 2321 vals += locrowlens[i]; 2322 jj++; 2323 } 2324 } 2325 2326 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2327 ierr = PetscFree(buf);CHKERRQ(ierr); 2328 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2329 ierr = PetscFree(rowners);CHKERRQ(ierr); 2330 ierr = PetscFree(dlens);CHKERRQ(ierr); 2331 ierr = PetscFree(mask);CHKERRQ(ierr); 2332 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2333 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2334 *newmat = A; 2335 PetscFunctionReturn(0); 2336 } 2337 2338 #undef __FUNCT__ 2339 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2340 /*@ 2341 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2342 2343 Input Parameters: 2344 . mat - the matrix 2345 . fact - factor 2346 2347 Collective on Mat 2348 2349 Level: advanced 2350 2351 Notes: 2352 This can also be set by the command line option: -mat_use_hash_table fact 2353 2354 .keywords: matrix, hashtable, factor, HT 2355 2356 .seealso: MatSetOption() 2357 @*/ 2358 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2359 { 2360 PetscFunctionBegin; 2361 SETERRQ(1,"Function not yet written for SBAIJ format"); 2362 /* PetscFunctionReturn(0); */ 2363 } 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2367 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2368 { 2369 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2370 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2371 PetscReal atmp; 2372 PetscReal *work,*svalues,*rvalues; 2373 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2374 int rank,size,*rowners_bs,dest,count,source; 2375 PetscScalar *va; 2376 MatScalar *ba; 2377 MPI_Status stat; 2378 2379 PetscFunctionBegin; 2380 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2381 ierr = VecGetArrayFast(v,&va);CHKERRQ(ierr); 2382 2383 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2384 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2385 2386 bs = a->bs; 2387 mbs = a->mbs; 2388 Mbs = a->Mbs; 2389 ba = b->a; 2390 bi = b->i; 2391 bj = b->j; 2392 /* 2393 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2394 PetscSynchronizedFlush(A->comm); 2395 */ 2396 2397 /* find ownerships */ 2398 rowners_bs = a->rowners_bs; 2399 /* 2400 if (!rank){ 2401 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2402 } 2403 */ 2404 2405 /* each proc creates an array to be distributed */ 2406 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2407 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2408 2409 /* row_max for B */ 2410 if (rank != size-1){ 2411 for (i=0; i<mbs; i++) { 2412 ncols = bi[1] - bi[0]; bi++; 2413 brow = bs*i; 2414 for (j=0; j<ncols; j++){ 2415 bcol = bs*(*bj); 2416 for (kcol=0; kcol<bs; kcol++){ 2417 col = bcol + kcol; /* local col index */ 2418 col += rowners_bs[rank+1]; /* global col index */ 2419 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2420 for (krow=0; krow<bs; krow++){ 2421 atmp = PetscAbsScalar(*ba); ba++; 2422 row = brow + krow; /* local row index */ 2423 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2424 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2425 if (work[col] < atmp) work[col] = atmp; 2426 } 2427 } 2428 bj++; 2429 } 2430 } 2431 /* 2432 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2433 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2434 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2435 */ 2436 2437 /* send values to its owners */ 2438 for (dest=rank+1; dest<size; dest++){ 2439 svalues = work + rowners_bs[dest]; 2440 count = rowners_bs[dest+1]-rowners_bs[dest]; 2441 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2442 /* 2443 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2444 PetscSynchronizedFlush(A->comm); 2445 */ 2446 } 2447 } 2448 2449 /* receive values */ 2450 if (rank){ 2451 rvalues = work; 2452 count = rowners_bs[rank+1]-rowners_bs[rank]; 2453 for (source=0; source<rank; source++){ 2454 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2455 /* process values */ 2456 for (i=0; i<count; i++){ 2457 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2458 } 2459 /* 2460 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2461 PetscSynchronizedFlush(A->comm); 2462 */ 2463 } 2464 } 2465 2466 ierr = VecRestoreArrayFast(v,&va);CHKERRQ(ierr); 2467 ierr = PetscFree(work);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatRelax_MPISBAIJ" 2473 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2474 { 2475 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2476 int ierr,mbs=mat->mbs,bs=mat->bs; 2477 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2478 Vec bb1; 2479 2480 PetscFunctionBegin; 2481 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2482 if (bs > 1) 2483 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2484 2485 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2486 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2487 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2488 its--; 2489 } 2490 2491 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2492 while (its--){ 2493 2494 /* lower triangular part: slvec0b = - B^T*xx */ 2495 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2496 2497 /* copy xx into slvec0a */ 2498 ierr = VecGetArrayFast(mat->slvec0,&ptr);CHKERRQ(ierr); 2499 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 2500 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2501 ierr = VecRestoreArrayFast(mat->slvec0,&ptr);CHKERRQ(ierr); 2502 2503 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2504 2505 /* copy bb into slvec1a */ 2506 ierr = VecGetArrayFast(mat->slvec1,&ptr);CHKERRQ(ierr); 2507 ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr); 2508 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2509 ierr = VecRestoreArrayFast(mat->slvec1,&ptr);CHKERRQ(ierr); 2510 2511 /* set slvec1b = 0 */ 2512 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2513 2514 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2515 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 2516 ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr); 2517 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2518 2519 /* upper triangular part: bb1 = bb1 - B*x */ 2520 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2521 2522 /* local diagonal sweep */ 2523 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2524 } 2525 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2526 } else { 2527 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2528 } 2529 PetscFunctionReturn(0); 2530 } 2531 2532 #undef __FUNCT__ 2533 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2534 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2535 { 2536 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2537 int ierr; 2538 PetscScalar mone=-1.0; 2539 Vec lvec1,bb1; 2540 2541 PetscFunctionBegin; 2542 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2543 if (mat->bs > 1) 2544 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2545 2546 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2547 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2548 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2549 its--; 2550 } 2551 2552 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2553 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2554 while (its--){ 2555 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2556 2557 /* lower diagonal part: bb1 = bb - B^T*xx */ 2558 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2559 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2560 2561 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2562 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2563 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2564 2565 /* upper diagonal part: bb1 = bb1 - B*x */ 2566 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2567 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2568 2569 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2570 2571 /* diagonal sweep */ 2572 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2573 } 2574 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2575 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2576 } else { 2577 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2578 } 2579 PetscFunctionReturn(0); 2580 } 2581 2582