1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 18 extern int MatPrintHelp_SeqSBAIJ(Mat); 19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 int MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 int ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 int MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 int ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 103 if (nrow >= rmax) { \ 104 /* there is no extra room in row, therefore enlarge */ \ 105 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 106 MatScalar *new_a; \ 107 \ 108 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 109 \ 110 /* malloc new storage space */ \ 111 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 112 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 113 new_j = (int*)(new_a + bs2*new_nz); \ 114 new_i = new_j + new_nz; \ 115 \ 116 /* copy over old data into new slots */ \ 117 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 118 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 119 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 120 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 121 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 122 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 124 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 125 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 126 /* free up old matrix storage */ \ 127 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 128 if (!a->singlemalloc) { \ 129 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 130 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 131 } \ 132 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 133 a->singlemalloc = PETSC_TRUE; \ 134 \ 135 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 136 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 137 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 138 a->maxnz += bs2*CHUNKSIZE; \ 139 a->reallocs++; \ 140 a->nz++; \ 141 } \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 a_noinsert:; \ 152 ailen[brow] = nrow; \ 153 } 154 #ifndef MatSetValues_SeqBAIJ_B_Private 155 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 156 { \ 157 brow = row/bs; \ 158 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 159 rmax = bimax[brow]; nrow = bilen[brow]; \ 160 bcol = col/bs; \ 161 ridx = row % bs; cidx = col % bs; \ 162 low = 0; high = nrow; \ 163 while (high-low > 3) { \ 164 t = (low+high)/2; \ 165 if (rp[t] > bcol) high = t; \ 166 else low = t; \ 167 } \ 168 for (_i=low; _i<high; _i++) { \ 169 if (rp[_i] > bcol) break; \ 170 if (rp[_i] == bcol) { \ 171 bap = ap + bs2*_i + bs*cidx + ridx; \ 172 if (addv == ADD_VALUES) *bap += value; \ 173 else *bap = value; \ 174 goto b_noinsert; \ 175 } \ 176 } \ 177 if (b->nonew == 1) goto b_noinsert; \ 178 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 179 if (nrow >= rmax) { \ 180 /* there is no extra room in row, therefore enlarge */ \ 181 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 182 MatScalar *new_a; \ 183 \ 184 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 185 \ 186 /* malloc new storage space */ \ 187 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 188 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 189 new_j = (int*)(new_a + bs2*new_nz); \ 190 new_i = new_j + new_nz; \ 191 \ 192 /* copy over old data into new slots */ \ 193 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 194 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 195 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 196 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 197 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 198 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 199 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 200 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 201 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 202 /* free up old matrix storage */ \ 203 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 204 if (!b->singlemalloc) { \ 205 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 206 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 207 } \ 208 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 209 b->singlemalloc = PETSC_TRUE; \ 210 \ 211 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 212 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 213 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 214 b->maxnz += bs2*CHUNKSIZE; \ 215 b->reallocs++; \ 216 b->nz++; \ 217 } \ 218 N = nrow++ - 1; \ 219 /* shift up all the later entries in this row */ \ 220 for (ii=N; ii>=_i; ii--) { \ 221 rp[ii+1] = rp[ii]; \ 222 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 223 } \ 224 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 225 rp[_i] = bcol; \ 226 ap[bs2*_i + bs*cidx + ridx] = value; \ 227 b_noinsert:; \ 228 bilen[brow] = nrow; \ 229 } 230 #endif 231 232 #if defined(PETSC_USE_MAT_SINGLE) 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatSetValues_MPISBAIJ" 235 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 236 { 237 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 238 int ierr,i,N = m*n; 239 MatScalar *vsingle; 240 241 PetscFunctionBegin; 242 if (N > b->setvalueslen) { 243 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 244 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 245 b->setvalueslen = N; 246 } 247 vsingle = b->setvaluescopy; 248 249 for (i=0; i<N; i++) { 250 vsingle[i] = v[i]; 251 } 252 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 258 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 259 { 260 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 261 int ierr,i,N = m*n*b->bs2; 262 MatScalar *vsingle; 263 264 PetscFunctionBegin; 265 if (N > b->setvalueslen) { 266 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 267 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 268 b->setvalueslen = N; 269 } 270 vsingle = b->setvaluescopy; 271 for (i=0; i<N; i++) { 272 vsingle[i] = v[i]; 273 } 274 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 280 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 281 { 282 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 283 int ierr,i,N = m*n; 284 MatScalar *vsingle; 285 286 PetscFunctionBegin; 287 SETERRQ(1,"Function not yet written for SBAIJ format"); 288 /* PetscFunctionReturn(0); */ 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 293 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 294 { 295 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 296 int ierr,i,N = m*n*b->bs2; 297 MatScalar *vsingle; 298 299 PetscFunctionBegin; 300 SETERRQ(1,"Function not yet written for SBAIJ format"); 301 /* PetscFunctionReturn(0); */ 302 } 303 #endif 304 305 /* Only add/insert a(i,j) with i<=j (blocks). 306 Any a(i,j) with i>j input by user is ingored. 307 */ 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 310 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 311 { 312 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 313 MatScalar value; 314 PetscTruth roworiented = baij->roworiented; 315 int ierr,i,j,row,col; 316 int rstart_orig=baij->rstart_bs; 317 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 318 int cend_orig=baij->cend_bs,bs=baij->bs; 319 320 /* Some Variables required in the macro */ 321 Mat A = baij->A; 322 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 323 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 324 MatScalar *aa=a->a; 325 326 Mat B = baij->B; 327 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 328 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 329 MatScalar *ba=b->a; 330 331 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 332 int low,high,t,ridx,cidx,bs2=a->bs2; 333 MatScalar *ap,*bap; 334 335 /* for stash */ 336 int n_loc, *in_loc=0; 337 MatScalar *v_loc=0; 338 339 PetscFunctionBegin; 340 341 if(!baij->donotstash){ 342 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 343 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 344 } 345 346 for (i=0; i<m; i++) { 347 if (im[i] < 0) continue; 348 #if defined(PETSC_USE_BOPT_g) 349 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 350 #endif 351 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 352 row = im[i] - rstart_orig; /* local row index */ 353 for (j=0; j<n; j++) { 354 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 355 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 356 col = in[j] - cstart_orig; /* local col index */ 357 brow = row/bs; bcol = col/bs; 358 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 359 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 360 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 361 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 362 } else if (in[j] < 0) continue; 363 #if defined(PETSC_USE_BOPT_g) 364 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 365 #endif 366 else { /* off-diag entry (B) */ 367 if (mat->was_assembled) { 368 if (!baij->colmap) { 369 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 370 } 371 #if defined (PETSC_USE_CTABLE) 372 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 373 col = col - 1; 374 #else 375 col = baij->colmap[in[j]/bs] - 1; 376 #endif 377 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 378 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 379 col = in[j]; 380 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 381 B = baij->B; 382 b = (Mat_SeqBAIJ*)(B)->data; 383 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 384 ba=b->a; 385 } else col += in[j]%bs; 386 } else col = in[j]; 387 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 388 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 389 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 390 } 391 } 392 } else { /* off processor entry */ 393 if (!baij->donotstash) { 394 n_loc = 0; 395 for (j=0; j<n; j++){ 396 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 397 in_loc[n_loc] = in[j]; 398 if (roworiented) { 399 v_loc[n_loc] = v[i*n+j]; 400 } else { 401 v_loc[n_loc] = v[j*m+i]; 402 } 403 n_loc++; 404 } 405 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 406 } 407 } 408 } 409 410 if(!baij->donotstash){ 411 ierr = PetscFree(in_loc);CHKERRQ(ierr); 412 ierr = PetscFree(v_loc);CHKERRQ(ierr); 413 } 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 419 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 420 { 421 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 422 const MatScalar *value; 423 MatScalar *barray=baij->barray; 424 PetscTruth roworiented = baij->roworiented; 425 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 426 int rend=baij->rend,cstart=baij->cstart,stepval; 427 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 428 429 PetscFunctionBegin; 430 if(!barray) { 431 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 432 baij->barray = barray; 433 } 434 435 if (roworiented) { 436 stepval = (n-1)*bs; 437 } else { 438 stepval = (m-1)*bs; 439 } 440 for (i=0; i<m; i++) { 441 if (im[i] < 0) continue; 442 #if defined(PETSC_USE_BOPT_g) 443 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 444 #endif 445 if (im[i] >= rstart && im[i] < rend) { 446 row = im[i] - rstart; 447 for (j=0; j<n; j++) { 448 /* If NumCol = 1 then a copy is not required */ 449 if ((roworiented) && (n == 1)) { 450 barray = (MatScalar*) v + i*bs2; 451 } else if((!roworiented) && (m == 1)) { 452 barray = (MatScalar*) v + j*bs2; 453 } else { /* Here a copy is required */ 454 if (roworiented) { 455 value = v + i*(stepval+bs)*bs + j*bs; 456 } else { 457 value = v + j*(stepval+bs)*bs + i*bs; 458 } 459 for (ii=0; ii<bs; ii++,value+=stepval) { 460 for (jj=0; jj<bs; jj++) { 461 *barray++ = *value++; 462 } 463 } 464 barray -=bs2; 465 } 466 467 if (in[j] >= cstart && in[j] < cend){ 468 col = in[j] - cstart; 469 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 470 } 471 else if (in[j] < 0) continue; 472 #if defined(PETSC_USE_BOPT_g) 473 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 474 #endif 475 else { 476 if (mat->was_assembled) { 477 if (!baij->colmap) { 478 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 479 } 480 481 #if defined(PETSC_USE_BOPT_g) 482 #if defined (PETSC_USE_CTABLE) 483 { int data; 484 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 485 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 486 } 487 #else 488 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 489 #endif 490 #endif 491 #if defined (PETSC_USE_CTABLE) 492 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 493 col = (col - 1)/bs; 494 #else 495 col = (baij->colmap[in[j]] - 1)/bs; 496 #endif 497 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 498 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 499 col = in[j]; 500 } 501 } 502 else col = in[j]; 503 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 504 } 505 } 506 } else { 507 if (!baij->donotstash) { 508 if (roworiented) { 509 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 510 } else { 511 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 512 } 513 } 514 } 515 } 516 PetscFunctionReturn(0); 517 } 518 519 #define HASH_KEY 0.6180339887 520 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 521 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 522 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 525 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 526 { 527 PetscFunctionBegin; 528 SETERRQ(1,"Function not yet written for SBAIJ format"); 529 /* PetscFunctionReturn(0); */ 530 } 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 534 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 535 { 536 PetscFunctionBegin; 537 SETERRQ(1,"Function not yet written for SBAIJ format"); 538 /* PetscFunctionReturn(0); */ 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatGetValues_MPISBAIJ" 543 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 544 { 545 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 546 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 547 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 552 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 553 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 554 row = idxm[i] - bsrstart; 555 for (j=0; j<n; j++) { 556 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]); 557 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 558 if (idxn[j] >= bscstart && idxn[j] < bscend){ 559 col = idxn[j] - bscstart; 560 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 561 } else { 562 if (!baij->colmap) { 563 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 564 } 565 #if defined (PETSC_USE_CTABLE) 566 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 567 data --; 568 #else 569 data = baij->colmap[idxn[j]/bs]-1; 570 #endif 571 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 572 else { 573 col = data + idxn[j]%bs; 574 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 575 } 576 } 577 } 578 } else { 579 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 580 } 581 } 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatNorm_MPISBAIJ" 587 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 588 { 589 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 590 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 591 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 592 int ierr; 593 PetscReal sum[2],*lnorm2; 594 595 PetscFunctionBegin; 596 if (baij->size == 1) { 597 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 598 } else { 599 if (type == NORM_FROBENIUS) { 600 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 601 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 602 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 603 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 605 /* 606 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 607 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 608 */ 609 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 610 /* 611 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 612 PetscSynchronizedFlush(mat->comm); */ 613 614 *norm = sqrt(sum[0] + 2*sum[1]); 615 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 616 } else { 617 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 /* 624 Creates the hash table, and sets the table 625 This table is created only once. 626 If new entried need to be added to the matrix 627 then the hash table has to be destroyed and 628 recreated. 629 */ 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 632 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 633 { 634 PetscFunctionBegin; 635 SETERRQ(1,"Function not yet written for SBAIJ format"); 636 /* PetscFunctionReturn(0); */ 637 } 638 639 #undef __FUNCT__ 640 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 641 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 642 { 643 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 644 int ierr,nstash,reallocs; 645 InsertMode addv; 646 647 PetscFunctionBegin; 648 if (baij->donotstash) { 649 PetscFunctionReturn(0); 650 } 651 652 /* make sure all processors are either in INSERTMODE or ADDMODE */ 653 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 654 if (addv == (ADD_VALUES|INSERT_VALUES)) { 655 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 656 } 657 mat->insertmode = addv; /* in case this processor had no cache */ 658 659 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 660 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 663 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 664 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNCT__ 669 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 670 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 673 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 674 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 675 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 676 int *row,*col,other_disassembled; 677 PetscTruth r1,r2,r3; 678 MatScalar *val; 679 InsertMode addv = mat->insertmode; 680 681 PetscFunctionBegin; 682 683 if (!baij->donotstash) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 /* 687 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 688 PetscSynchronizedFlush(mat->comm); 689 */ 690 if (!flg) break; 691 692 for (i=0; i<n;) { 693 /* Now identify the consecutive vals belonging to the same row */ 694 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 695 if (j < n) ncols = j-i; 696 else ncols = n-i; 697 /* Now assemble all these values with a single function call */ 698 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 699 i = j; 700 } 701 } 702 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 703 /* Now process the block-stash. Since the values are stashed column-oriented, 704 set the roworiented flag to column oriented, and after MatSetValues() 705 restore the original flags */ 706 r1 = baij->roworiented; 707 r2 = a->roworiented; 708 r3 = b->roworiented; 709 baij->roworiented = PETSC_FALSE; 710 a->roworiented = PETSC_FALSE; 711 b->roworiented = PETSC_FALSE; 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n;) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 719 if (j < n) ncols = j-i; 720 else ncols = n-i; 721 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 722 i = j; 723 } 724 } 725 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 726 baij->roworiented = r1; 727 a->roworiented = r2; 728 b->roworiented = r3; 729 } 730 731 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 741 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 747 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 748 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 749 } 750 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 752 753 #if defined(PETSC_USE_BOPT_g) 754 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 755 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 756 baij->ht_total_ct = 0; 757 baij->ht_insert_ct = 0; 758 } 759 #endif 760 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 761 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 762 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 763 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 764 } 765 766 if (baij->rowvalues) { 767 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 768 baij->rowvalues = 0; 769 } 770 771 PetscFunctionReturn(0); 772 } 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 776 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 777 { 778 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 779 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 780 PetscTruth isascii,isdraw; 781 PetscViewer sviewer; 782 PetscViewerFormat format; 783 784 PetscFunctionBegin; 785 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 786 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 787 if (isascii) { 788 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 789 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 790 MatInfo info; 791 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 792 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 793 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 794 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 795 baij->bs,(int)info.memory);CHKERRQ(ierr); 796 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 797 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 798 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 799 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 800 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 801 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 802 PetscFunctionReturn(0); 803 } else if (format == PETSC_VIEWER_ASCII_INFO) { 804 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 } 808 809 if (isdraw) { 810 PetscDraw draw; 811 PetscTruth isnull; 812 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 813 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 814 } 815 816 if (size == 1) { 817 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 818 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 819 } else { 820 /* assemble the entire matrix onto first processor. */ 821 Mat A; 822 Mat_SeqSBAIJ *Aloc; 823 Mat_SeqBAIJ *Bloc; 824 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 825 MatScalar *a; 826 827 if (!rank) { 828 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 829 } else { 830 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 831 } 832 PetscLogObjectParent(mat,A); 833 834 /* copy over the A part */ 835 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 836 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 837 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 838 839 for (i=0; i<mbs; i++) { 840 rvals[0] = bs*(baij->rstart + i); 841 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 842 for (j=ai[i]; j<ai[i+1]; j++) { 843 col = (baij->cstart+aj[j])*bs; 844 for (k=0; k<bs; k++) { 845 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 846 col++; a += bs; 847 } 848 } 849 } 850 /* copy over the B part */ 851 Bloc = (Mat_SeqBAIJ*)baij->B->data; 852 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 853 for (i=0; i<mbs; i++) { 854 rvals[0] = bs*(baij->rstart + i); 855 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 856 for (j=ai[i]; j<ai[i+1]; j++) { 857 col = baij->garray[aj[j]]*bs; 858 for (k=0; k<bs; k++) { 859 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 860 col++; a += bs; 861 } 862 } 863 } 864 ierr = PetscFree(rvals);CHKERRQ(ierr); 865 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 866 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 867 /* 868 Everyone has to call to draw the matrix since the graphics waits are 869 synchronized across all processors that share the PetscDraw object 870 */ 871 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 872 if (!rank) { 873 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 874 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 875 } 876 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 877 ierr = MatDestroy(A);CHKERRQ(ierr); 878 } 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "MatView_MPISBAIJ" 884 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 885 { 886 int ierr; 887 PetscTruth isascii,isdraw,issocket,isbinary; 888 889 PetscFunctionBegin; 890 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 891 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 892 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 893 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 894 if (isascii || isdraw || issocket || isbinary) { 895 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 896 } else { 897 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 898 } 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatDestroy_MPISBAIJ" 904 int MatDestroy_MPISBAIJ(Mat mat) 905 { 906 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 907 int ierr; 908 909 PetscFunctionBegin; 910 #if defined(PETSC_USE_LOG) 911 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 912 #endif 913 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 914 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 915 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 916 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 917 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 918 #if defined (PETSC_USE_CTABLE) 919 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 920 #else 921 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 922 #endif 923 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 924 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 925 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 926 if (baij->slvec0) { 927 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 928 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 929 } 930 if (baij->slvec1) { 931 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 932 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 933 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 934 } 935 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 936 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 937 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 938 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 939 #if defined(PETSC_USE_MAT_SINGLE) 940 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 941 #endif 942 ierr = PetscFree(baij);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 #undef __FUNCT__ 947 #define __FUNCT__ "MatMult_MPISBAIJ" 948 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 949 { 950 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 951 int ierr,nt,mbs=a->mbs,bs=a->bs; 952 PetscScalar *x,*from,zero=0.0; 953 954 PetscFunctionBegin; 955 /* 956 PetscSynchronizedPrintf(A->comm," _1comm is called ...\n"); 957 PetscSynchronizedFlush(A->comm); 958 */ 959 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 960 if (nt != A->n) { 961 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 962 } 963 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 964 if (nt != A->m) { 965 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 966 } 967 968 /* diagonal part */ 969 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 970 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 971 972 /* subdiagonal part */ 973 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 974 975 /* copy x into the vec slvec0 */ 976 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 977 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 978 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 979 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 980 981 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 982 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 983 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 984 985 /* supperdiagonal part */ 986 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 987 988 PetscFunctionReturn(0); 989 } 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 993 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 994 { 995 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 996 int ierr,nt; 997 998 PetscFunctionBegin; 999 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1000 if (nt != A->n) { 1001 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1002 } 1003 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1004 if (nt != A->m) { 1005 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1006 } 1007 1008 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1009 /* do diagonal part */ 1010 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1011 /* do supperdiagonal part */ 1012 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1013 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1014 /* do subdiagonal part */ 1015 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1016 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1017 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1018 1019 PetscFunctionReturn(0); 1020 } 1021 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1024 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1025 { 1026 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1027 int ierr,mbs=a->mbs,bs=a->bs; 1028 PetscScalar *x,*from,zero=0.0; 1029 1030 PetscFunctionBegin; 1031 /* 1032 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1033 PetscSynchronizedFlush(A->comm); 1034 */ 1035 /* diagonal part */ 1036 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1037 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1038 1039 /* subdiagonal part */ 1040 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1041 1042 /* copy x into the vec slvec0 */ 1043 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1044 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1045 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1046 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1047 1048 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1049 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1050 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1051 1052 /* supperdiagonal part */ 1053 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1054 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1060 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1061 { 1062 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1063 int ierr; 1064 1065 PetscFunctionBegin; 1066 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1067 /* do diagonal part */ 1068 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1069 /* do supperdiagonal part */ 1070 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1071 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1072 1073 /* do subdiagonal part */ 1074 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1075 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1076 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1077 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1083 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1084 { 1085 PetscFunctionBegin; 1086 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1087 /* PetscFunctionReturn(0); */ 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1092 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1093 { 1094 PetscFunctionBegin; 1095 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1096 /* PetscFunctionReturn(0); */ 1097 } 1098 1099 /* 1100 This only works correctly for square matrices where the subblock A->A is the 1101 diagonal block 1102 */ 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1105 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1106 { 1107 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1108 int ierr; 1109 1110 PetscFunctionBegin; 1111 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1112 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatScale_MPISBAIJ" 1118 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1119 { 1120 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1121 int ierr; 1122 1123 PetscFunctionBegin; 1124 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1125 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1131 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1132 { 1133 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1134 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1135 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1136 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1137 int *cmap,*idx_p,cstart = mat->cstart; 1138 1139 PetscFunctionBegin; 1140 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1141 mat->getrowactive = PETSC_TRUE; 1142 1143 if (!mat->rowvalues && (idx || v)) { 1144 /* 1145 allocate enough space to hold information from the longest row. 1146 */ 1147 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1148 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1149 int max = 1,mbs = mat->mbs,tmp; 1150 for (i=0; i<mbs; i++) { 1151 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1152 if (max < tmp) { max = tmp; } 1153 } 1154 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1155 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1156 } 1157 1158 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1159 lrow = row - brstart; /* local row index */ 1160 1161 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1162 if (!v) {pvA = 0; pvB = 0;} 1163 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1164 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1165 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1166 nztot = nzA + nzB; 1167 1168 cmap = mat->garray; 1169 if (v || idx) { 1170 if (nztot) { 1171 /* Sort by increasing column numbers, assuming A and B already sorted */ 1172 int imark = -1; 1173 if (v) { 1174 *v = v_p = mat->rowvalues; 1175 for (i=0; i<nzB; i++) { 1176 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1177 else break; 1178 } 1179 imark = i; 1180 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1181 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1182 } 1183 if (idx) { 1184 *idx = idx_p = mat->rowindices; 1185 if (imark > -1) { 1186 for (i=0; i<imark; i++) { 1187 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1188 } 1189 } else { 1190 for (i=0; i<nzB; i++) { 1191 if (cmap[cworkB[i]/bs] < cstart) 1192 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1193 else break; 1194 } 1195 imark = i; 1196 } 1197 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1198 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1199 } 1200 } else { 1201 if (idx) *idx = 0; 1202 if (v) *v = 0; 1203 } 1204 } 1205 *nz = nztot; 1206 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1207 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1213 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1214 { 1215 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1216 1217 PetscFunctionBegin; 1218 if (baij->getrowactive == PETSC_FALSE) { 1219 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1220 } 1221 baij->getrowactive = PETSC_FALSE; 1222 PetscFunctionReturn(0); 1223 } 1224 1225 #undef __FUNCT__ 1226 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1227 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1228 { 1229 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1230 1231 PetscFunctionBegin; 1232 *bs = baij->bs; 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1238 int MatZeroEntries_MPISBAIJ(Mat A) 1239 { 1240 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1241 int ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1245 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1251 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1252 { 1253 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1254 Mat A = a->A,B = a->B; 1255 int ierr; 1256 PetscReal isend[5],irecv[5]; 1257 1258 PetscFunctionBegin; 1259 info->block_size = (PetscReal)a->bs; 1260 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1261 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1262 isend[3] = info->memory; isend[4] = info->mallocs; 1263 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1264 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1265 isend[3] += info->memory; isend[4] += info->mallocs; 1266 if (flag == MAT_LOCAL) { 1267 info->nz_used = isend[0]; 1268 info->nz_allocated = isend[1]; 1269 info->nz_unneeded = isend[2]; 1270 info->memory = isend[3]; 1271 info->mallocs = isend[4]; 1272 } else if (flag == MAT_GLOBAL_MAX) { 1273 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1274 info->nz_used = irecv[0]; 1275 info->nz_allocated = irecv[1]; 1276 info->nz_unneeded = irecv[2]; 1277 info->memory = irecv[3]; 1278 info->mallocs = irecv[4]; 1279 } else if (flag == MAT_GLOBAL_SUM) { 1280 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1281 info->nz_used = irecv[0]; 1282 info->nz_allocated = irecv[1]; 1283 info->nz_unneeded = irecv[2]; 1284 info->memory = irecv[3]; 1285 info->mallocs = irecv[4]; 1286 } else { 1287 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1288 } 1289 info->rows_global = (PetscReal)A->M; 1290 info->columns_global = (PetscReal)A->N; 1291 info->rows_local = (PetscReal)A->m; 1292 info->columns_local = (PetscReal)A->N; 1293 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1294 info->fill_ratio_needed = 0; 1295 info->factor_mallocs = 0; 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1301 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1302 { 1303 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1304 int ierr; 1305 1306 PetscFunctionBegin; 1307 switch (op) { 1308 case MAT_NO_NEW_NONZERO_LOCATIONS: 1309 case MAT_YES_NEW_NONZERO_LOCATIONS: 1310 case MAT_COLUMNS_UNSORTED: 1311 case MAT_COLUMNS_SORTED: 1312 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1313 case MAT_KEEP_ZEROED_ROWS: 1314 case MAT_NEW_NONZERO_LOCATION_ERR: 1315 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1316 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1317 break; 1318 case MAT_ROW_ORIENTED: 1319 a->roworiented = PETSC_TRUE; 1320 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1321 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1322 break; 1323 case MAT_ROWS_SORTED: 1324 case MAT_ROWS_UNSORTED: 1325 case MAT_YES_NEW_DIAGONALS: 1326 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1327 break; 1328 case MAT_COLUMN_ORIENTED: 1329 a->roworiented = PETSC_FALSE; 1330 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1331 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1332 break; 1333 case MAT_IGNORE_OFF_PROC_ENTRIES: 1334 a->donotstash = PETSC_TRUE; 1335 break; 1336 case MAT_NO_NEW_DIAGONALS: 1337 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1338 case MAT_USE_HASH_TABLE: 1339 a->ht_flag = PETSC_TRUE; 1340 break; 1341 case MAT_NOT_SYMMETRIC: 1342 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1343 case MAT_HERMITIAN: 1344 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1345 case MAT_SYMMETRIC: 1346 case MAT_STRUCTURALLY_SYMMETRIC: 1347 case MAT_NOT_HERMITIAN: 1348 case MAT_SYMMETRY_ETERNAL: 1349 case MAT_NOT_SYMMETRY_ETERNAL: 1350 break; 1351 default: 1352 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1353 } 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1359 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1360 { 1361 int ierr; 1362 PetscFunctionBegin; 1363 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1369 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1370 { 1371 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1372 Mat a = baij->A,b = baij->B; 1373 int ierr,s1,s2,s3; 1374 1375 PetscFunctionBegin; 1376 if (ll != rr) { 1377 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1378 } 1379 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1380 if (rr) { 1381 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1382 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1383 /* Overlap communication with computation. */ 1384 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1385 /*} if (ll) { */ 1386 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1387 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1388 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1389 /* } */ 1390 /* scale the diagonal block */ 1391 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1392 1393 /* if (rr) { */ 1394 /* Do a scatter end and then right scale the off-diagonal block */ 1395 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1396 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1397 } 1398 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1404 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag) 1405 { 1406 PetscFunctionBegin; 1407 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1412 int MatPrintHelp_MPISBAIJ(Mat A) 1413 { 1414 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1415 MPI_Comm comm = A->comm; 1416 static int called = 0; 1417 int ierr; 1418 1419 PetscFunctionBegin; 1420 if (!a->rank) { 1421 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1422 } 1423 if (called) {PetscFunctionReturn(0);} else called = 1; 1424 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1425 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1431 int MatSetUnfactored_MPISBAIJ(Mat A) 1432 { 1433 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1434 int ierr; 1435 1436 PetscFunctionBegin; 1437 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1442 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatEqual_MPISBAIJ" 1445 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1446 { 1447 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1448 Mat a,b,c,d; 1449 PetscTruth flg; 1450 int ierr; 1451 1452 PetscFunctionBegin; 1453 a = matA->A; b = matA->B; 1454 c = matB->A; d = matB->B; 1455 1456 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1457 if (flg == PETSC_TRUE) { 1458 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1459 } 1460 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1466 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1467 { 1468 int ierr; 1469 1470 PetscFunctionBegin; 1471 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1477 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1478 { 1479 int i,ierr; 1480 PetscTruth flg; 1481 1482 for (i=0; i<n; i++) { 1483 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1484 if (!flg) { 1485 SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1486 } 1487 } 1488 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1489 PetscFunctionReturn(0); 1490 } 1491 1492 1493 /* -------------------------------------------------------------------*/ 1494 static struct _MatOps MatOps_Values = { 1495 MatSetValues_MPISBAIJ, 1496 MatGetRow_MPISBAIJ, 1497 MatRestoreRow_MPISBAIJ, 1498 MatMult_MPISBAIJ, 1499 /* 4*/ MatMultAdd_MPISBAIJ, 1500 MatMultTranspose_MPISBAIJ, 1501 MatMultTransposeAdd_MPISBAIJ, 1502 0, 1503 0, 1504 0, 1505 /*10*/ 0, 1506 0, 1507 0, 1508 MatRelax_MPISBAIJ, 1509 MatTranspose_MPISBAIJ, 1510 /*15*/ MatGetInfo_MPISBAIJ, 1511 MatEqual_MPISBAIJ, 1512 MatGetDiagonal_MPISBAIJ, 1513 MatDiagonalScale_MPISBAIJ, 1514 MatNorm_MPISBAIJ, 1515 /*20*/ MatAssemblyBegin_MPISBAIJ, 1516 MatAssemblyEnd_MPISBAIJ, 1517 0, 1518 MatSetOption_MPISBAIJ, 1519 MatZeroEntries_MPISBAIJ, 1520 /*25*/ MatZeroRows_MPISBAIJ, 1521 0, 1522 0, 1523 0, 1524 0, 1525 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1526 0, 1527 0, 1528 0, 1529 0, 1530 /*35*/ MatDuplicate_MPISBAIJ, 1531 0, 1532 0, 1533 0, 1534 0, 1535 /*40*/ 0, 1536 MatGetSubMatrices_MPISBAIJ, 1537 0, 1538 MatGetValues_MPISBAIJ, 1539 0, 1540 /*45*/ MatPrintHelp_MPISBAIJ, 1541 MatScale_MPISBAIJ, 1542 0, 1543 0, 1544 0, 1545 /*50*/ MatGetBlockSize_MPISBAIJ, 1546 0, 1547 0, 1548 0, 1549 0, 1550 /*55*/ 0, 1551 0, 1552 MatSetUnfactored_MPISBAIJ, 1553 0, 1554 MatSetValuesBlocked_MPISBAIJ, 1555 /*60*/ 0, 1556 0, 1557 0, 1558 MatGetPetscMaps_Petsc, 1559 0, 1560 /*65*/ 0, 1561 0, 1562 0, 1563 0, 1564 0, 1565 /*70*/ MatGetRowMax_MPISBAIJ, 1566 0, 1567 0, 1568 0, 1569 0, 1570 /*75*/ 0, 1571 0, 1572 0, 1573 0, 1574 0, 1575 /*80*/ 0, 1576 0, 1577 0, 1578 0, 1579 /*85*/ MatLoad_MPISBAIJ 1580 }; 1581 1582 1583 EXTERN_C_BEGIN 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1586 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1587 { 1588 PetscFunctionBegin; 1589 *a = ((Mat_MPISBAIJ *)A->data)->A; 1590 *iscopy = PETSC_FALSE; 1591 PetscFunctionReturn(0); 1592 } 1593 EXTERN_C_END 1594 1595 EXTERN_C_BEGIN 1596 #undef __FUNCT__ 1597 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1598 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1599 { 1600 Mat_MPISBAIJ *b; 1601 int ierr,i,mbs,Mbs; 1602 1603 PetscFunctionBegin; 1604 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1605 1606 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1607 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1608 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1609 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1610 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1611 if (d_nnz) { 1612 for (i=0; i<B->m/bs; i++) { 1613 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1614 } 1615 } 1616 if (o_nnz) { 1617 for (i=0; i<B->m/bs; i++) { 1618 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1619 } 1620 } 1621 B->preallocated = PETSC_TRUE; 1622 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1623 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1624 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1625 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1626 1627 b = (Mat_MPISBAIJ*)B->data; 1628 mbs = B->m/bs; 1629 Mbs = B->M/bs; 1630 if (mbs*bs != B->m) { 1631 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1632 } 1633 1634 b->bs = bs; 1635 b->bs2 = bs*bs; 1636 b->mbs = mbs; 1637 b->nbs = mbs; 1638 b->Mbs = Mbs; 1639 b->Nbs = Mbs; 1640 1641 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1642 b->rowners[0] = 0; 1643 for (i=2; i<=b->size; i++) { 1644 b->rowners[i] += b->rowners[i-1]; 1645 } 1646 b->rstart = b->rowners[b->rank]; 1647 b->rend = b->rowners[b->rank+1]; 1648 b->cstart = b->rstart; 1649 b->cend = b->rend; 1650 for (i=0; i<=b->size; i++) { 1651 b->rowners_bs[i] = b->rowners[i]*bs; 1652 } 1653 b->rstart_bs = b-> rstart*bs; 1654 b->rend_bs = b->rend*bs; 1655 1656 b->cstart_bs = b->cstart*bs; 1657 b->cend_bs = b->cend*bs; 1658 1659 1660 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1661 PetscLogObjectParent(B,b->A); 1662 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1663 PetscLogObjectParent(B,b->B); 1664 1665 /* build cache for off array entries formed */ 1666 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1667 1668 PetscFunctionReturn(0); 1669 } 1670 EXTERN_C_END 1671 1672 /*MC 1673 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1674 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1675 1676 Options Database Keys: 1677 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1678 1679 Level: beginner 1680 1681 .seealso: MatCreateMPISBAIJ 1682 M*/ 1683 1684 EXTERN_C_BEGIN 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatCreate_MPISBAIJ" 1687 int MatCreate_MPISBAIJ(Mat B) 1688 { 1689 Mat_MPISBAIJ *b; 1690 int ierr; 1691 PetscTruth flg; 1692 1693 PetscFunctionBegin; 1694 1695 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1696 B->data = (void*)b; 1697 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1698 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1699 1700 B->ops->destroy = MatDestroy_MPISBAIJ; 1701 B->ops->view = MatView_MPISBAIJ; 1702 B->mapping = 0; 1703 B->factor = 0; 1704 B->assembled = PETSC_FALSE; 1705 1706 B->insertmode = NOT_SET_VALUES; 1707 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1708 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1709 1710 /* build local table of row and column ownerships */ 1711 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1712 b->cowners = b->rowners + b->size + 2; 1713 b->rowners_bs = b->cowners + b->size + 2; 1714 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1715 1716 /* build cache for off array entries formed */ 1717 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1718 b->donotstash = PETSC_FALSE; 1719 b->colmap = PETSC_NULL; 1720 b->garray = PETSC_NULL; 1721 b->roworiented = PETSC_TRUE; 1722 1723 #if defined(PETSC_USE_MAT_SINGLE) 1724 /* stuff for MatSetValues_XXX in single precision */ 1725 b->setvalueslen = 0; 1726 b->setvaluescopy = PETSC_NULL; 1727 #endif 1728 1729 /* stuff used in block assembly */ 1730 b->barray = 0; 1731 1732 /* stuff used for matrix vector multiply */ 1733 b->lvec = 0; 1734 b->Mvctx = 0; 1735 b->slvec0 = 0; 1736 b->slvec0b = 0; 1737 b->slvec1 = 0; 1738 b->slvec1a = 0; 1739 b->slvec1b = 0; 1740 b->sMvctx = 0; 1741 1742 /* stuff for MatGetRow() */ 1743 b->rowindices = 0; 1744 b->rowvalues = 0; 1745 b->getrowactive = PETSC_FALSE; 1746 1747 /* hash table stuff */ 1748 b->ht = 0; 1749 b->hd = 0; 1750 b->ht_size = 0; 1751 b->ht_flag = PETSC_FALSE; 1752 b->ht_fact = 0; 1753 b->ht_total_ct = 0; 1754 b->ht_insert_ct = 0; 1755 1756 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1757 if (flg) { 1758 PetscReal fact = 1.39; 1759 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1760 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1761 if (fact <= 1.0) fact = 1.39; 1762 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1763 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1764 } 1765 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1766 "MatStoreValues_MPISBAIJ", 1767 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1768 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1769 "MatRetrieveValues_MPISBAIJ", 1770 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1771 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1772 "MatGetDiagonalBlock_MPISBAIJ", 1773 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1774 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1775 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1776 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 EXTERN_C_END 1780 1781 /*MC 1782 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1783 1784 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1785 and MATMPISBAIJ otherwise. 1786 1787 Options Database Keys: 1788 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1789 1790 Level: beginner 1791 1792 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1793 M*/ 1794 1795 EXTERN_C_BEGIN 1796 #undef __FUNCT__ 1797 #define __FUNCT__ "MatCreate_SBAIJ" 1798 int MatCreate_SBAIJ(Mat A) { 1799 int ierr,size; 1800 1801 PetscFunctionBegin; 1802 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1803 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1804 if (size == 1) { 1805 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1806 } else { 1807 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1808 } 1809 PetscFunctionReturn(0); 1810 } 1811 EXTERN_C_END 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1815 /*@C 1816 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1817 the user should preallocate the matrix storage by setting the parameters 1818 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1819 performance can be increased by more than a factor of 50. 1820 1821 Collective on Mat 1822 1823 Input Parameters: 1824 + A - the matrix 1825 . bs - size of blockk 1826 . d_nz - number of block nonzeros per block row in diagonal portion of local 1827 submatrix (same for all local rows) 1828 . d_nnz - array containing the number of block nonzeros in the various block rows 1829 in the upper triangular and diagonal part of the in diagonal portion of the local 1830 (possibly different for each block row) or PETSC_NULL. You must leave room 1831 for the diagonal entry even if it is zero. 1832 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1833 submatrix (same for all local rows). 1834 - o_nnz - array containing the number of nonzeros in the various block rows of the 1835 off-diagonal portion of the local submatrix (possibly different for 1836 each block row) or PETSC_NULL. 1837 1838 1839 Options Database Keys: 1840 . -mat_no_unroll - uses code that does not unroll the loops in the 1841 block calculations (much slower) 1842 . -mat_block_size - size of the blocks to use 1843 1844 Notes: 1845 1846 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1847 than it must be used on all processors that share the object for that argument. 1848 1849 Storage Information: 1850 For a square global matrix we define each processor's diagonal portion 1851 to be its local rows and the corresponding columns (a square submatrix); 1852 each processor's off-diagonal portion encompasses the remainder of the 1853 local matrix (a rectangular submatrix). 1854 1855 The user can specify preallocated storage for the diagonal part of 1856 the local submatrix with either d_nz or d_nnz (not both). Set 1857 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1858 memory allocation. Likewise, specify preallocated storage for the 1859 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1860 1861 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1862 the figure below we depict these three local rows and all columns (0-11). 1863 1864 .vb 1865 0 1 2 3 4 5 6 7 8 9 10 11 1866 ------------------- 1867 row 3 | o o o d d d o o o o o o 1868 row 4 | o o o d d d o o o o o o 1869 row 5 | o o o d d d o o o o o o 1870 ------------------- 1871 .ve 1872 1873 Thus, any entries in the d locations are stored in the d (diagonal) 1874 submatrix, and any entries in the o locations are stored in the 1875 o (off-diagonal) submatrix. Note that the d matrix is stored in 1876 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1877 1878 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1879 plus the diagonal part of the d matrix, 1880 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1881 In general, for PDE problems in which most nonzeros are near the diagonal, 1882 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1883 or you will get TERRIBLE performance; see the users' manual chapter on 1884 matrices. 1885 1886 Level: intermediate 1887 1888 .keywords: matrix, block, aij, compressed row, sparse, parallel 1889 1890 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1891 @*/ 1892 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1893 { 1894 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 1895 1896 PetscFunctionBegin; 1897 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1898 if (f) { 1899 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1900 } 1901 PetscFunctionReturn(0); 1902 } 1903 1904 #undef __FUNCT__ 1905 #define __FUNCT__ "MatCreateMPISBAIJ" 1906 /*@C 1907 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1908 (block compressed row). For good matrix assembly performance 1909 the user should preallocate the matrix storage by setting the parameters 1910 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1911 performance can be increased by more than a factor of 50. 1912 1913 Collective on MPI_Comm 1914 1915 Input Parameters: 1916 + comm - MPI communicator 1917 . bs - size of blockk 1918 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1919 This value should be the same as the local size used in creating the 1920 y vector for the matrix-vector product y = Ax. 1921 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1922 This value should be the same as the local size used in creating the 1923 x vector for the matrix-vector product y = Ax. 1924 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1925 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1926 . d_nz - number of block nonzeros per block row in diagonal portion of local 1927 submatrix (same for all local rows) 1928 . d_nnz - array containing the number of block nonzeros in the various block rows 1929 in the upper triangular portion of the in diagonal portion of the local 1930 (possibly different for each block block row) or PETSC_NULL. 1931 You must leave room for the diagonal entry even if it is zero. 1932 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1933 submatrix (same for all local rows). 1934 - o_nnz - array containing the number of nonzeros in the various block rows of the 1935 off-diagonal portion of the local submatrix (possibly different for 1936 each block row) or PETSC_NULL. 1937 1938 Output Parameter: 1939 . A - the matrix 1940 1941 Options Database Keys: 1942 . -mat_no_unroll - uses code that does not unroll the loops in the 1943 block calculations (much slower) 1944 . -mat_block_size - size of the blocks to use 1945 . -mat_mpi - use the parallel matrix data structures even on one processor 1946 (defaults to using SeqBAIJ format on one processor) 1947 1948 Notes: 1949 The user MUST specify either the local or global matrix dimensions 1950 (possibly both). 1951 1952 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1953 than it must be used on all processors that share the object for that argument. 1954 1955 Storage Information: 1956 For a square global matrix we define each processor's diagonal portion 1957 to be its local rows and the corresponding columns (a square submatrix); 1958 each processor's off-diagonal portion encompasses the remainder of the 1959 local matrix (a rectangular submatrix). 1960 1961 The user can specify preallocated storage for the diagonal part of 1962 the local submatrix with either d_nz or d_nnz (not both). Set 1963 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1964 memory allocation. Likewise, specify preallocated storage for the 1965 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1966 1967 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1968 the figure below we depict these three local rows and all columns (0-11). 1969 1970 .vb 1971 0 1 2 3 4 5 6 7 8 9 10 11 1972 ------------------- 1973 row 3 | o o o d d d o o o o o o 1974 row 4 | o o o d d d o o o o o o 1975 row 5 | o o o d d d o o o o o o 1976 ------------------- 1977 .ve 1978 1979 Thus, any entries in the d locations are stored in the d (diagonal) 1980 submatrix, and any entries in the o locations are stored in the 1981 o (off-diagonal) submatrix. Note that the d matrix is stored in 1982 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1983 1984 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1985 plus the diagonal part of the d matrix, 1986 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1987 In general, for PDE problems in which most nonzeros are near the diagonal, 1988 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1989 or you will get TERRIBLE performance; see the users' manual chapter on 1990 matrices. 1991 1992 Level: intermediate 1993 1994 .keywords: matrix, block, aij, compressed row, sparse, parallel 1995 1996 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1997 @*/ 1998 1999 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2000 { 2001 int ierr,size; 2002 2003 PetscFunctionBegin; 2004 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2005 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2006 if (size > 1) { 2007 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2008 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2009 } else { 2010 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2011 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 2017 #undef __FUNCT__ 2018 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2019 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2020 { 2021 Mat mat; 2022 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2023 int ierr,len=0; 2024 2025 PetscFunctionBegin; 2026 *newmat = 0; 2027 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2028 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 2029 mat->preallocated = PETSC_TRUE; 2030 a = (Mat_MPISBAIJ*)mat->data; 2031 a->bs = oldmat->bs; 2032 a->bs2 = oldmat->bs2; 2033 a->mbs = oldmat->mbs; 2034 a->nbs = oldmat->nbs; 2035 a->Mbs = oldmat->Mbs; 2036 a->Nbs = oldmat->Nbs; 2037 2038 a->rstart = oldmat->rstart; 2039 a->rend = oldmat->rend; 2040 a->cstart = oldmat->cstart; 2041 a->cend = oldmat->cend; 2042 a->size = oldmat->size; 2043 a->rank = oldmat->rank; 2044 a->donotstash = oldmat->donotstash; 2045 a->roworiented = oldmat->roworiented; 2046 a->rowindices = 0; 2047 a->rowvalues = 0; 2048 a->getrowactive = PETSC_FALSE; 2049 a->barray = 0; 2050 a->rstart_bs = oldmat->rstart_bs; 2051 a->rend_bs = oldmat->rend_bs; 2052 a->cstart_bs = oldmat->cstart_bs; 2053 a->cend_bs = oldmat->cend_bs; 2054 2055 /* hash table stuff */ 2056 a->ht = 0; 2057 a->hd = 0; 2058 a->ht_size = 0; 2059 a->ht_flag = oldmat->ht_flag; 2060 a->ht_fact = oldmat->ht_fact; 2061 a->ht_total_ct = 0; 2062 a->ht_insert_ct = 0; 2063 2064 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 2065 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2066 a->cowners = a->rowners + a->size + 2; 2067 a->rowners_bs = a->cowners + a->size + 2; 2068 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2069 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2070 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2071 if (oldmat->colmap) { 2072 #if defined (PETSC_USE_CTABLE) 2073 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2074 #else 2075 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2076 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2077 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2078 #endif 2079 } else a->colmap = 0; 2080 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2081 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2082 PetscLogObjectMemory(mat,len*sizeof(int)); 2083 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2084 } else a->garray = 0; 2085 2086 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2087 PetscLogObjectParent(mat,a->lvec); 2088 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2089 2090 PetscLogObjectParent(mat,a->Mvctx); 2091 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2092 PetscLogObjectParent(mat,a->A); 2093 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2094 PetscLogObjectParent(mat,a->B); 2095 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2096 *newmat = mat; 2097 PetscFunctionReturn(0); 2098 } 2099 2100 #include "petscsys.h" 2101 2102 #undef __FUNCT__ 2103 #define __FUNCT__ "MatLoad_MPISBAIJ" 2104 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2105 { 2106 Mat A; 2107 int i,nz,ierr,j,rstart,rend,fd; 2108 PetscScalar *vals,*buf; 2109 MPI_Comm comm = ((PetscObject)viewer)->comm; 2110 MPI_Status status; 2111 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2112 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2113 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2114 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2115 int dcount,kmax,k,nzcount,tmp; 2116 2117 PetscFunctionBegin; 2118 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2119 2120 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2121 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2122 if (!rank) { 2123 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2124 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2125 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2126 if (header[3] < 0) { 2127 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2128 } 2129 } 2130 2131 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2132 M = header[1]; N = header[2]; 2133 2134 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2135 2136 /* 2137 This code adds extra rows to make sure the number of rows is 2138 divisible by the blocksize 2139 */ 2140 Mbs = M/bs; 2141 extra_rows = bs - M + bs*(Mbs); 2142 if (extra_rows == bs) extra_rows = 0; 2143 else Mbs++; 2144 if (extra_rows &&!rank) { 2145 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2146 } 2147 2148 /* determine ownership of all rows */ 2149 mbs = Mbs/size + ((Mbs % size) > rank); 2150 m = mbs*bs; 2151 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2152 browners = rowners + size + 1; 2153 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2154 rowners[0] = 0; 2155 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2156 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2157 rstart = rowners[rank]; 2158 rend = rowners[rank+1]; 2159 2160 /* distribute row lengths to all processors */ 2161 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2162 if (!rank) { 2163 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2164 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2165 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2166 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2167 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2168 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2169 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2170 } else { 2171 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2172 } 2173 2174 if (!rank) { /* procs[0] */ 2175 /* calculate the number of nonzeros on each processor */ 2176 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2177 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2178 for (i=0; i<size; i++) { 2179 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2180 procsnz[i] += rowlengths[j]; 2181 } 2182 } 2183 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2184 2185 /* determine max buffer needed and allocate it */ 2186 maxnz = 0; 2187 for (i=0; i<size; i++) { 2188 maxnz = PetscMax(maxnz,procsnz[i]); 2189 } 2190 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2191 2192 /* read in my part of the matrix column indices */ 2193 nz = procsnz[0]; 2194 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2195 mycols = ibuf; 2196 if (size == 1) nz -= extra_rows; 2197 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2198 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2199 2200 /* read in every ones (except the last) and ship off */ 2201 for (i=1; i<size-1; i++) { 2202 nz = procsnz[i]; 2203 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2204 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2205 } 2206 /* read in the stuff for the last proc */ 2207 if (size != 1) { 2208 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2209 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2210 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2211 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2212 } 2213 ierr = PetscFree(cols);CHKERRQ(ierr); 2214 } else { /* procs[i], i>0 */ 2215 /* determine buffer space needed for message */ 2216 nz = 0; 2217 for (i=0; i<m; i++) { 2218 nz += locrowlens[i]; 2219 } 2220 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2221 mycols = ibuf; 2222 /* receive message of column indices*/ 2223 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2224 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2225 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2226 } 2227 2228 /* loop over local rows, determining number of off diagonal entries */ 2229 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2230 odlens = dlens + (rend-rstart); 2231 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2232 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2233 masked1 = mask + Mbs; 2234 masked2 = masked1 + Mbs; 2235 rowcount = 0; nzcount = 0; 2236 for (i=0; i<mbs; i++) { 2237 dcount = 0; 2238 odcount = 0; 2239 for (j=0; j<bs; j++) { 2240 kmax = locrowlens[rowcount]; 2241 for (k=0; k<kmax; k++) { 2242 tmp = mycols[nzcount++]/bs; /* block col. index */ 2243 if (!mask[tmp]) { 2244 mask[tmp] = 1; 2245 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2246 else masked1[dcount++] = tmp; /* entry in diag portion */ 2247 } 2248 } 2249 rowcount++; 2250 } 2251 2252 dlens[i] = dcount; /* d_nzz[i] */ 2253 odlens[i] = odcount; /* o_nzz[i] */ 2254 2255 /* zero out the mask elements we set */ 2256 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2257 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2258 } 2259 2260 /* create our matrix */ 2261 ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr); 2262 ierr = MatSetType(A,type);CHKERRQ(ierr); 2263 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2264 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2265 2266 if (!rank) { 2267 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2268 /* read in my part of the matrix numerical values */ 2269 nz = procsnz[0]; 2270 vals = buf; 2271 mycols = ibuf; 2272 if (size == 1) nz -= extra_rows; 2273 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2274 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2275 2276 /* insert into matrix */ 2277 jj = rstart*bs; 2278 for (i=0; i<m; i++) { 2279 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2280 mycols += locrowlens[i]; 2281 vals += locrowlens[i]; 2282 jj++; 2283 } 2284 2285 /* read in other processors (except the last one) and ship out */ 2286 for (i=1; i<size-1; i++) { 2287 nz = procsnz[i]; 2288 vals = buf; 2289 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2290 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2291 } 2292 /* the last proc */ 2293 if (size != 1){ 2294 nz = procsnz[i] - extra_rows; 2295 vals = buf; 2296 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2297 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2298 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2299 } 2300 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2301 2302 } else { 2303 /* receive numeric values */ 2304 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2305 2306 /* receive message of values*/ 2307 vals = buf; 2308 mycols = ibuf; 2309 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2310 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2311 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2312 2313 /* insert into matrix */ 2314 jj = rstart*bs; 2315 for (i=0; i<m; i++) { 2316 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2317 mycols += locrowlens[i]; 2318 vals += locrowlens[i]; 2319 jj++; 2320 } 2321 } 2322 2323 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2324 ierr = PetscFree(buf);CHKERRQ(ierr); 2325 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2326 ierr = PetscFree(rowners);CHKERRQ(ierr); 2327 ierr = PetscFree(dlens);CHKERRQ(ierr); 2328 ierr = PetscFree(mask);CHKERRQ(ierr); 2329 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2330 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2331 *newmat = A; 2332 PetscFunctionReturn(0); 2333 } 2334 2335 #undef __FUNCT__ 2336 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2337 /*@ 2338 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2339 2340 Input Parameters: 2341 . mat - the matrix 2342 . fact - factor 2343 2344 Collective on Mat 2345 2346 Level: advanced 2347 2348 Notes: 2349 This can also be set by the command line option: -mat_use_hash_table fact 2350 2351 .keywords: matrix, hashtable, factor, HT 2352 2353 .seealso: MatSetOption() 2354 @*/ 2355 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2356 { 2357 PetscFunctionBegin; 2358 SETERRQ(1,"Function not yet written for SBAIJ format"); 2359 /* PetscFunctionReturn(0); */ 2360 } 2361 2362 #undef __FUNCT__ 2363 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2364 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2365 { 2366 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2367 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2368 PetscReal atmp; 2369 PetscReal *work,*svalues,*rvalues; 2370 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2371 int rank,size,*rowners_bs,dest,count,source; 2372 PetscScalar *va; 2373 MatScalar *ba; 2374 MPI_Status stat; 2375 2376 PetscFunctionBegin; 2377 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2378 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2379 2380 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2381 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2382 2383 bs = a->bs; 2384 mbs = a->mbs; 2385 Mbs = a->Mbs; 2386 ba = b->a; 2387 bi = b->i; 2388 bj = b->j; 2389 /* 2390 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2391 PetscSynchronizedFlush(A->comm); 2392 */ 2393 2394 /* find ownerships */ 2395 rowners_bs = a->rowners_bs; 2396 /* 2397 if (!rank){ 2398 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2399 } 2400 */ 2401 2402 /* each proc creates an array to be distributed */ 2403 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2404 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2405 2406 /* row_max for B */ 2407 if (rank != size-1){ 2408 for (i=0; i<mbs; i++) { 2409 ncols = bi[1] - bi[0]; bi++; 2410 brow = bs*i; 2411 for (j=0; j<ncols; j++){ 2412 bcol = bs*(*bj); 2413 for (kcol=0; kcol<bs; kcol++){ 2414 col = bcol + kcol; /* local col index */ 2415 col += rowners_bs[rank+1]; /* global col index */ 2416 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2417 for (krow=0; krow<bs; krow++){ 2418 atmp = PetscAbsScalar(*ba); ba++; 2419 row = brow + krow; /* local row index */ 2420 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2421 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2422 if (work[col] < atmp) work[col] = atmp; 2423 } 2424 } 2425 bj++; 2426 } 2427 } 2428 /* 2429 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2430 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2431 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2432 */ 2433 2434 /* send values to its owners */ 2435 for (dest=rank+1; dest<size; dest++){ 2436 svalues = work + rowners_bs[dest]; 2437 count = rowners_bs[dest+1]-rowners_bs[dest]; 2438 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2439 /* 2440 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2441 PetscSynchronizedFlush(A->comm); 2442 */ 2443 } 2444 } 2445 2446 /* receive values */ 2447 if (rank){ 2448 rvalues = work; 2449 count = rowners_bs[rank+1]-rowners_bs[rank]; 2450 for (source=0; source<rank; source++){ 2451 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2452 /* process values */ 2453 for (i=0; i<count; i++){ 2454 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2455 } 2456 /* 2457 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2458 PetscSynchronizedFlush(A->comm); 2459 */ 2460 } 2461 } 2462 2463 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2464 ierr = PetscFree(work);CHKERRQ(ierr); 2465 PetscFunctionReturn(0); 2466 } 2467 2468 #undef __FUNCT__ 2469 #define __FUNCT__ "MatRelax_MPISBAIJ" 2470 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2471 { 2472 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2473 int ierr,mbs=mat->mbs,bs=mat->bs; 2474 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2475 Vec bb1; 2476 2477 PetscFunctionBegin; 2478 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2479 if (bs > 1) 2480 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2481 2482 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2483 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2484 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2485 its--; 2486 } 2487 2488 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2489 while (its--){ 2490 2491 /* lower triangular part: slvec0b = - B^T*xx */ 2492 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2493 2494 /* copy xx into slvec0a */ 2495 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2496 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2497 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2498 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2499 2500 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2501 2502 /* copy bb into slvec1a */ 2503 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2504 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2505 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2506 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2507 2508 /* set slvec1b = 0 */ 2509 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2510 2511 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2512 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2513 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2514 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2515 2516 /* upper triangular part: bb1 = bb1 - B*x */ 2517 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2518 2519 /* local diagonal sweep */ 2520 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2521 } 2522 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2523 } else { 2524 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2525 } 2526 PetscFunctionReturn(0); 2527 } 2528 2529 #undef __FUNCT__ 2530 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2531 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2532 { 2533 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2534 int ierr; 2535 PetscScalar mone=-1.0; 2536 Vec lvec1,bb1; 2537 2538 PetscFunctionBegin; 2539 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2540 if (mat->bs > 1) 2541 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2542 2543 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2544 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2545 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2546 its--; 2547 } 2548 2549 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2550 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2551 while (its--){ 2552 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2553 2554 /* lower diagonal part: bb1 = bb - B^T*xx */ 2555 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2556 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2557 2558 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2559 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2560 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2561 2562 /* upper diagonal part: bb1 = bb1 - B*x */ 2563 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2564 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2565 2566 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2567 2568 /* diagonal sweep */ 2569 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2570 } 2571 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2572 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2573 } else { 2574 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2575 } 2576 PetscFunctionReturn(0); 2577 } 2578 2579