1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 18 extern int MatPrintHelp_SeqSBAIJ(Mat); 19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 int MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 int ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 int MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 int ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 103 if (nrow >= rmax) { \ 104 /* there is no extra room in row, therefore enlarge */ \ 105 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 106 MatScalar *new_a; \ 107 \ 108 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 109 \ 110 /* malloc new storage space */ \ 111 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 112 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 113 new_j = (int*)(new_a + bs2*new_nz); \ 114 new_i = new_j + new_nz; \ 115 \ 116 /* copy over old data into new slots */ \ 117 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 118 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 119 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 120 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 121 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 122 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 124 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 125 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 126 /* free up old matrix storage */ \ 127 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 128 if (!a->singlemalloc) { \ 129 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 130 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 131 } \ 132 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 133 a->singlemalloc = PETSC_TRUE; \ 134 \ 135 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 136 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 137 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 138 a->maxnz += bs2*CHUNKSIZE; \ 139 a->reallocs++; \ 140 a->nz++; \ 141 } \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 a_noinsert:; \ 152 ailen[brow] = nrow; \ 153 } 154 #ifndef MatSetValues_SeqBAIJ_B_Private 155 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 156 { \ 157 brow = row/bs; \ 158 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 159 rmax = bimax[brow]; nrow = bilen[brow]; \ 160 bcol = col/bs; \ 161 ridx = row % bs; cidx = col % bs; \ 162 low = 0; high = nrow; \ 163 while (high-low > 3) { \ 164 t = (low+high)/2; \ 165 if (rp[t] > bcol) high = t; \ 166 else low = t; \ 167 } \ 168 for (_i=low; _i<high; _i++) { \ 169 if (rp[_i] > bcol) break; \ 170 if (rp[_i] == bcol) { \ 171 bap = ap + bs2*_i + bs*cidx + ridx; \ 172 if (addv == ADD_VALUES) *bap += value; \ 173 else *bap = value; \ 174 goto b_noinsert; \ 175 } \ 176 } \ 177 if (b->nonew == 1) goto b_noinsert; \ 178 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 179 if (nrow >= rmax) { \ 180 /* there is no extra room in row, therefore enlarge */ \ 181 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 182 MatScalar *new_a; \ 183 \ 184 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 185 \ 186 /* malloc new storage space */ \ 187 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 188 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 189 new_j = (int*)(new_a + bs2*new_nz); \ 190 new_i = new_j + new_nz; \ 191 \ 192 /* copy over old data into new slots */ \ 193 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 194 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 195 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 196 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 197 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 198 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 199 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 200 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 201 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 202 /* free up old matrix storage */ \ 203 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 204 if (!b->singlemalloc) { \ 205 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 206 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 207 } \ 208 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 209 b->singlemalloc = PETSC_TRUE; \ 210 \ 211 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 212 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 213 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 214 b->maxnz += bs2*CHUNKSIZE; \ 215 b->reallocs++; \ 216 b->nz++; \ 217 } \ 218 N = nrow++ - 1; \ 219 /* shift up all the later entries in this row */ \ 220 for (ii=N; ii>=_i; ii--) { \ 221 rp[ii+1] = rp[ii]; \ 222 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 223 } \ 224 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 225 rp[_i] = bcol; \ 226 ap[bs2*_i + bs*cidx + ridx] = value; \ 227 b_noinsert:; \ 228 bilen[brow] = nrow; \ 229 } 230 #endif 231 232 #if defined(PETSC_USE_MAT_SINGLE) 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatSetValues_MPISBAIJ" 235 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 236 { 237 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 238 int ierr,i,N = m*n; 239 MatScalar *vsingle; 240 241 PetscFunctionBegin; 242 if (N > b->setvalueslen) { 243 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 244 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 245 b->setvalueslen = N; 246 } 247 vsingle = b->setvaluescopy; 248 249 for (i=0; i<N; i++) { 250 vsingle[i] = v[i]; 251 } 252 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 258 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 259 { 260 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 261 int ierr,i,N = m*n*b->bs2; 262 MatScalar *vsingle; 263 264 PetscFunctionBegin; 265 if (N > b->setvalueslen) { 266 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 267 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 268 b->setvalueslen = N; 269 } 270 vsingle = b->setvaluescopy; 271 for (i=0; i<N; i++) { 272 vsingle[i] = v[i]; 273 } 274 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 280 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 281 { 282 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 283 int ierr,i,N = m*n; 284 MatScalar *vsingle; 285 286 PetscFunctionBegin; 287 SETERRQ(1,"Function not yet written for SBAIJ format"); 288 /* PetscFunctionReturn(0); */ 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 293 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 294 { 295 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 296 int ierr,i,N = m*n*b->bs2; 297 MatScalar *vsingle; 298 299 PetscFunctionBegin; 300 SETERRQ(1,"Function not yet written for SBAIJ format"); 301 /* PetscFunctionReturn(0); */ 302 } 303 #endif 304 305 /* Only add/insert a(i,j) with i<=j (blocks). 306 Any a(i,j) with i>j input by user is ingored. 307 */ 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 310 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 311 { 312 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 313 MatScalar value; 314 PetscTruth roworiented = baij->roworiented; 315 int ierr,i,j,row,col; 316 int rstart_orig=baij->rstart_bs; 317 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 318 int cend_orig=baij->cend_bs,bs=baij->bs; 319 320 /* Some Variables required in the macro */ 321 Mat A = baij->A; 322 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 323 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 324 MatScalar *aa=a->a; 325 326 Mat B = baij->B; 327 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 328 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 329 MatScalar *ba=b->a; 330 331 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 332 int low,high,t,ridx,cidx,bs2=a->bs2; 333 MatScalar *ap,*bap; 334 335 /* for stash */ 336 int n_loc, *in_loc=0; 337 MatScalar *v_loc=0; 338 339 PetscFunctionBegin; 340 341 if(!baij->donotstash){ 342 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 343 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 344 } 345 346 for (i=0; i<m; i++) { 347 if (im[i] < 0) continue; 348 #if defined(PETSC_USE_BOPT_g) 349 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 350 #endif 351 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 352 row = im[i] - rstart_orig; /* local row index */ 353 for (j=0; j<n; j++) { 354 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 355 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 356 col = in[j] - cstart_orig; /* local col index */ 357 brow = row/bs; bcol = col/bs; 358 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 359 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 360 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 361 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 362 } else if (in[j] < 0) continue; 363 #if defined(PETSC_USE_BOPT_g) 364 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 365 #endif 366 else { /* off-diag entry (B) */ 367 if (mat->was_assembled) { 368 if (!baij->colmap) { 369 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 370 } 371 #if defined (PETSC_USE_CTABLE) 372 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 373 col = col - 1; 374 #else 375 col = baij->colmap[in[j]/bs] - 1; 376 #endif 377 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 378 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 379 col = in[j]; 380 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 381 B = baij->B; 382 b = (Mat_SeqBAIJ*)(B)->data; 383 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 384 ba=b->a; 385 } else col += in[j]%bs; 386 } else col = in[j]; 387 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 388 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 389 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 390 } 391 } 392 } else { /* off processor entry */ 393 if (!baij->donotstash) { 394 n_loc = 0; 395 for (j=0; j<n; j++){ 396 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 397 in_loc[n_loc] = in[j]; 398 if (roworiented) { 399 v_loc[n_loc] = v[i*n+j]; 400 } else { 401 v_loc[n_loc] = v[j*m+i]; 402 } 403 n_loc++; 404 } 405 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 406 } 407 } 408 } 409 410 if(!baij->donotstash){ 411 ierr = PetscFree(in_loc);CHKERRQ(ierr); 412 ierr = PetscFree(v_loc);CHKERRQ(ierr); 413 } 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 419 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 420 { 421 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 422 const MatScalar *value; 423 MatScalar *barray=baij->barray; 424 PetscTruth roworiented = baij->roworiented; 425 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 426 int rend=baij->rend,cstart=baij->cstart,stepval; 427 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 428 429 PetscFunctionBegin; 430 if(!barray) { 431 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 432 baij->barray = barray; 433 } 434 435 if (roworiented) { 436 stepval = (n-1)*bs; 437 } else { 438 stepval = (m-1)*bs; 439 } 440 for (i=0; i<m; i++) { 441 if (im[i] < 0) continue; 442 #if defined(PETSC_USE_BOPT_g) 443 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 444 #endif 445 if (im[i] >= rstart && im[i] < rend) { 446 row = im[i] - rstart; 447 for (j=0; j<n; j++) { 448 /* If NumCol = 1 then a copy is not required */ 449 if ((roworiented) && (n == 1)) { 450 barray = (MatScalar*) v + i*bs2; 451 } else if((!roworiented) && (m == 1)) { 452 barray = (MatScalar*) v + j*bs2; 453 } else { /* Here a copy is required */ 454 if (roworiented) { 455 value = v + i*(stepval+bs)*bs + j*bs; 456 } else { 457 value = v + j*(stepval+bs)*bs + i*bs; 458 } 459 for (ii=0; ii<bs; ii++,value+=stepval) { 460 for (jj=0; jj<bs; jj++) { 461 *barray++ = *value++; 462 } 463 } 464 barray -=bs2; 465 } 466 467 if (in[j] >= cstart && in[j] < cend){ 468 col = in[j] - cstart; 469 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 470 } 471 else if (in[j] < 0) continue; 472 #if defined(PETSC_USE_BOPT_g) 473 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 474 #endif 475 else { 476 if (mat->was_assembled) { 477 if (!baij->colmap) { 478 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 479 } 480 481 #if defined(PETSC_USE_BOPT_g) 482 #if defined (PETSC_USE_CTABLE) 483 { int data; 484 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 485 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 486 } 487 #else 488 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 489 #endif 490 #endif 491 #if defined (PETSC_USE_CTABLE) 492 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 493 col = (col - 1)/bs; 494 #else 495 col = (baij->colmap[in[j]] - 1)/bs; 496 #endif 497 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 498 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 499 col = in[j]; 500 } 501 } 502 else col = in[j]; 503 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 504 } 505 } 506 } else { 507 if (!baij->donotstash) { 508 if (roworiented) { 509 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 510 } else { 511 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 512 } 513 } 514 } 515 } 516 PetscFunctionReturn(0); 517 } 518 519 #define HASH_KEY 0.6180339887 520 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 521 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 522 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 525 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 526 { 527 PetscFunctionBegin; 528 SETERRQ(1,"Function not yet written for SBAIJ format"); 529 /* PetscFunctionReturn(0); */ 530 } 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 534 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 535 { 536 PetscFunctionBegin; 537 SETERRQ(1,"Function not yet written for SBAIJ format"); 538 /* PetscFunctionReturn(0); */ 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatGetValues_MPISBAIJ" 543 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 544 { 545 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 546 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 547 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 552 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 553 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 554 row = idxm[i] - bsrstart; 555 for (j=0; j<n; j++) { 556 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]); 557 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 558 if (idxn[j] >= bscstart && idxn[j] < bscend){ 559 col = idxn[j] - bscstart; 560 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 561 } else { 562 if (!baij->colmap) { 563 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 564 } 565 #if defined (PETSC_USE_CTABLE) 566 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 567 data --; 568 #else 569 data = baij->colmap[idxn[j]/bs]-1; 570 #endif 571 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 572 else { 573 col = data + idxn[j]%bs; 574 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 575 } 576 } 577 } 578 } else { 579 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 580 } 581 } 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatNorm_MPISBAIJ" 587 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 588 { 589 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 590 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 591 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 592 int ierr; 593 PetscReal sum[2],*lnorm2; 594 595 PetscFunctionBegin; 596 if (baij->size == 1) { 597 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 598 } else { 599 if (type == NORM_FROBENIUS) { 600 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 601 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 602 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 603 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 605 /* 606 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 607 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 608 */ 609 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 610 /* 611 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 612 PetscSynchronizedFlush(mat->comm); */ 613 614 *norm = sqrt(sum[0] + 2*sum[1]); 615 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 616 } else { 617 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 /* 624 Creates the hash table, and sets the table 625 This table is created only once. 626 If new entried need to be added to the matrix 627 then the hash table has to be destroyed and 628 recreated. 629 */ 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 632 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 633 { 634 PetscFunctionBegin; 635 SETERRQ(1,"Function not yet written for SBAIJ format"); 636 /* PetscFunctionReturn(0); */ 637 } 638 639 #undef __FUNCT__ 640 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 641 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 642 { 643 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 644 int ierr,nstash,reallocs; 645 InsertMode addv; 646 647 PetscFunctionBegin; 648 if (baij->donotstash) { 649 PetscFunctionReturn(0); 650 } 651 652 /* make sure all processors are either in INSERTMODE or ADDMODE */ 653 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 654 if (addv == (ADD_VALUES|INSERT_VALUES)) { 655 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 656 } 657 mat->insertmode = addv; /* in case this processor had no cache */ 658 659 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 660 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 663 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 664 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNCT__ 669 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 670 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 673 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 674 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 675 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 676 int *row,*col,other_disassembled; 677 PetscTruth r1,r2,r3; 678 MatScalar *val; 679 InsertMode addv = mat->insertmode; 680 681 PetscFunctionBegin; 682 683 if (!baij->donotstash) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 /* 687 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 688 PetscSynchronizedFlush(mat->comm); 689 */ 690 if (!flg) break; 691 692 for (i=0; i<n;) { 693 /* Now identify the consecutive vals belonging to the same row */ 694 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 695 if (j < n) ncols = j-i; 696 else ncols = n-i; 697 /* Now assemble all these values with a single function call */ 698 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 699 i = j; 700 } 701 } 702 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 703 /* Now process the block-stash. Since the values are stashed column-oriented, 704 set the roworiented flag to column oriented, and after MatSetValues() 705 restore the original flags */ 706 r1 = baij->roworiented; 707 r2 = a->roworiented; 708 r3 = b->roworiented; 709 baij->roworiented = PETSC_FALSE; 710 a->roworiented = PETSC_FALSE; 711 b->roworiented = PETSC_FALSE; 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n;) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 719 if (j < n) ncols = j-i; 720 else ncols = n-i; 721 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 722 i = j; 723 } 724 } 725 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 726 baij->roworiented = r1; 727 a->roworiented = r2; 728 b->roworiented = r3; 729 } 730 731 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 741 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 747 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 748 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 749 } 750 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 752 753 #if defined(PETSC_USE_BOPT_g) 754 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 755 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 756 baij->ht_total_ct = 0; 757 baij->ht_insert_ct = 0; 758 } 759 #endif 760 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 761 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 762 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 763 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 764 } 765 766 if (baij->rowvalues) { 767 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 768 baij->rowvalues = 0; 769 } 770 771 PetscFunctionReturn(0); 772 } 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 776 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 777 { 778 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 779 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 780 PetscTruth isascii,isdraw; 781 PetscViewer sviewer; 782 PetscViewerFormat format; 783 784 PetscFunctionBegin; 785 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 786 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 787 if (isascii) { 788 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 789 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 790 MatInfo info; 791 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 792 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 793 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 794 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 795 baij->bs,(int)info.memory);CHKERRQ(ierr); 796 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 797 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 798 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 799 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 800 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 801 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 802 PetscFunctionReturn(0); 803 } else if (format == PETSC_VIEWER_ASCII_INFO) { 804 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 } 808 809 if (isdraw) { 810 PetscDraw draw; 811 PetscTruth isnull; 812 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 813 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 814 } 815 816 if (size == 1) { 817 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 818 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 819 } else { 820 /* assemble the entire matrix onto first processor. */ 821 Mat A; 822 Mat_SeqSBAIJ *Aloc; 823 Mat_SeqBAIJ *Bloc; 824 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 825 MatScalar *a; 826 827 if (!rank) { 828 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 829 } else { 830 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 831 } 832 PetscLogObjectParent(mat,A); 833 834 /* copy over the A part */ 835 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 836 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 837 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 838 839 for (i=0; i<mbs; i++) { 840 rvals[0] = bs*(baij->rstart + i); 841 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 842 for (j=ai[i]; j<ai[i+1]; j++) { 843 col = (baij->cstart+aj[j])*bs; 844 for (k=0; k<bs; k++) { 845 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 846 col++; a += bs; 847 } 848 } 849 } 850 /* copy over the B part */ 851 Bloc = (Mat_SeqBAIJ*)baij->B->data; 852 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 853 for (i=0; i<mbs; i++) { 854 rvals[0] = bs*(baij->rstart + i); 855 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 856 for (j=ai[i]; j<ai[i+1]; j++) { 857 col = baij->garray[aj[j]]*bs; 858 for (k=0; k<bs; k++) { 859 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 860 col++; a += bs; 861 } 862 } 863 } 864 ierr = PetscFree(rvals);CHKERRQ(ierr); 865 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 866 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 867 /* 868 Everyone has to call to draw the matrix since the graphics waits are 869 synchronized across all processors that share the PetscDraw object 870 */ 871 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 872 if (!rank) { 873 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 874 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 875 } 876 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 877 ierr = MatDestroy(A);CHKERRQ(ierr); 878 } 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "MatView_MPISBAIJ" 884 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 885 { 886 int ierr; 887 PetscTruth isascii,isdraw,issocket,isbinary; 888 889 PetscFunctionBegin; 890 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 891 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 892 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 893 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 894 if (isascii || isdraw || issocket || isbinary) { 895 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 896 } else { 897 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 898 } 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatDestroy_MPISBAIJ" 904 int MatDestroy_MPISBAIJ(Mat mat) 905 { 906 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 907 int ierr; 908 909 PetscFunctionBegin; 910 #if defined(PETSC_USE_LOG) 911 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 912 #endif 913 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 914 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 915 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 916 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 917 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 918 #if defined (PETSC_USE_CTABLE) 919 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 920 #else 921 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 922 #endif 923 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 924 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 925 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 926 if (baij->slvec0) { 927 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 928 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 929 } 930 if (baij->slvec1) { 931 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 932 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 933 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 934 } 935 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 936 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 937 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 938 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 939 #if defined(PETSC_USE_MAT_SINGLE) 940 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 941 #endif 942 ierr = PetscFree(baij);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 #undef __FUNCT__ 947 #define __FUNCT__ "MatMult_MPISBAIJ" 948 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 949 { 950 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 951 int ierr,nt,mbs=a->mbs,bs=a->bs; 952 PetscScalar *x,*from,zero=0.0; 953 954 PetscFunctionBegin; 955 /* 956 PetscSynchronizedPrintf(A->comm," _1comm is called ...\n"); 957 PetscSynchronizedFlush(A->comm); 958 */ 959 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 960 if (nt != A->n) { 961 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 962 } 963 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 964 if (nt != A->m) { 965 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 966 } 967 968 /* diagonal part */ 969 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 970 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 971 972 /* subdiagonal part */ 973 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 974 975 /* copy x into the vec slvec0 */ 976 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 977 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 978 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 979 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 980 981 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 982 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 983 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 984 985 /* supperdiagonal part */ 986 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 987 988 PetscFunctionReturn(0); 989 } 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 993 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 994 { 995 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 996 int ierr,nt; 997 998 PetscFunctionBegin; 999 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1000 if (nt != A->n) { 1001 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1002 } 1003 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1004 if (nt != A->m) { 1005 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1006 } 1007 1008 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1009 /* do diagonal part */ 1010 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1011 /* do supperdiagonal part */ 1012 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1013 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1014 /* do subdiagonal part */ 1015 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1016 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1017 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1018 1019 PetscFunctionReturn(0); 1020 } 1021 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1024 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1025 { 1026 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1027 int ierr,mbs=a->mbs,bs=a->bs; 1028 PetscScalar *x,*from,zero=0.0; 1029 1030 PetscFunctionBegin; 1031 /* 1032 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1033 PetscSynchronizedFlush(A->comm); 1034 */ 1035 /* diagonal part */ 1036 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1037 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1038 1039 /* subdiagonal part */ 1040 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1041 1042 /* copy x into the vec slvec0 */ 1043 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1044 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1045 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1046 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1047 1048 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1049 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1050 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1051 1052 /* supperdiagonal part */ 1053 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1054 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1060 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1061 { 1062 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1063 int ierr; 1064 1065 PetscFunctionBegin; 1066 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1067 /* do diagonal part */ 1068 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1069 /* do supperdiagonal part */ 1070 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1071 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1072 1073 /* do subdiagonal part */ 1074 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1075 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1076 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1077 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1083 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1084 { 1085 int ierr; 1086 1087 PetscFunctionBegin; 1088 ierr = MatMult(A,xx,yy);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1094 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1095 { 1096 int ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 /* 1104 This only works correctly for square matrices where the subblock A->A is the 1105 diagonal block 1106 */ 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1109 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1110 { 1111 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1112 int ierr; 1113 1114 PetscFunctionBegin; 1115 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1116 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatScale_MPISBAIJ" 1122 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1123 { 1124 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1125 int ierr; 1126 1127 PetscFunctionBegin; 1128 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1129 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1135 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1136 { 1137 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1138 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1139 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1140 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1141 int *cmap,*idx_p,cstart = mat->cstart; 1142 1143 PetscFunctionBegin; 1144 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1145 mat->getrowactive = PETSC_TRUE; 1146 1147 if (!mat->rowvalues && (idx || v)) { 1148 /* 1149 allocate enough space to hold information from the longest row. 1150 */ 1151 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1152 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1153 int max = 1,mbs = mat->mbs,tmp; 1154 for (i=0; i<mbs; i++) { 1155 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1156 if (max < tmp) { max = tmp; } 1157 } 1158 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1159 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1160 } 1161 1162 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1163 lrow = row - brstart; /* local row index */ 1164 1165 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1166 if (!v) {pvA = 0; pvB = 0;} 1167 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1168 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1169 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1170 nztot = nzA + nzB; 1171 1172 cmap = mat->garray; 1173 if (v || idx) { 1174 if (nztot) { 1175 /* Sort by increasing column numbers, assuming A and B already sorted */ 1176 int imark = -1; 1177 if (v) { 1178 *v = v_p = mat->rowvalues; 1179 for (i=0; i<nzB; i++) { 1180 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1181 else break; 1182 } 1183 imark = i; 1184 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1185 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1186 } 1187 if (idx) { 1188 *idx = idx_p = mat->rowindices; 1189 if (imark > -1) { 1190 for (i=0; i<imark; i++) { 1191 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1192 } 1193 } else { 1194 for (i=0; i<nzB; i++) { 1195 if (cmap[cworkB[i]/bs] < cstart) 1196 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1197 else break; 1198 } 1199 imark = i; 1200 } 1201 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1202 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1203 } 1204 } else { 1205 if (idx) *idx = 0; 1206 if (v) *v = 0; 1207 } 1208 } 1209 *nz = nztot; 1210 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1211 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214 1215 #undef __FUNCT__ 1216 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1217 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1218 { 1219 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1220 1221 PetscFunctionBegin; 1222 if (baij->getrowactive == PETSC_FALSE) { 1223 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1224 } 1225 baij->getrowactive = PETSC_FALSE; 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1231 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1232 { 1233 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1234 1235 PetscFunctionBegin; 1236 *bs = baij->bs; 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1242 int MatZeroEntries_MPISBAIJ(Mat A) 1243 { 1244 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1245 int ierr; 1246 1247 PetscFunctionBegin; 1248 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1249 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1255 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1256 { 1257 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1258 Mat A = a->A,B = a->B; 1259 int ierr; 1260 PetscReal isend[5],irecv[5]; 1261 1262 PetscFunctionBegin; 1263 info->block_size = (PetscReal)a->bs; 1264 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1265 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1266 isend[3] = info->memory; isend[4] = info->mallocs; 1267 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1268 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1269 isend[3] += info->memory; isend[4] += info->mallocs; 1270 if (flag == MAT_LOCAL) { 1271 info->nz_used = isend[0]; 1272 info->nz_allocated = isend[1]; 1273 info->nz_unneeded = isend[2]; 1274 info->memory = isend[3]; 1275 info->mallocs = isend[4]; 1276 } else if (flag == MAT_GLOBAL_MAX) { 1277 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1278 info->nz_used = irecv[0]; 1279 info->nz_allocated = irecv[1]; 1280 info->nz_unneeded = irecv[2]; 1281 info->memory = irecv[3]; 1282 info->mallocs = irecv[4]; 1283 } else if (flag == MAT_GLOBAL_SUM) { 1284 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1285 info->nz_used = irecv[0]; 1286 info->nz_allocated = irecv[1]; 1287 info->nz_unneeded = irecv[2]; 1288 info->memory = irecv[3]; 1289 info->mallocs = irecv[4]; 1290 } else { 1291 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1292 } 1293 info->rows_global = (PetscReal)A->M; 1294 info->columns_global = (PetscReal)A->N; 1295 info->rows_local = (PetscReal)A->m; 1296 info->columns_local = (PetscReal)A->N; 1297 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1298 info->fill_ratio_needed = 0; 1299 info->factor_mallocs = 0; 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1305 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1306 { 1307 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1308 int ierr; 1309 1310 PetscFunctionBegin; 1311 switch (op) { 1312 case MAT_NO_NEW_NONZERO_LOCATIONS: 1313 case MAT_YES_NEW_NONZERO_LOCATIONS: 1314 case MAT_COLUMNS_UNSORTED: 1315 case MAT_COLUMNS_SORTED: 1316 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1317 case MAT_KEEP_ZEROED_ROWS: 1318 case MAT_NEW_NONZERO_LOCATION_ERR: 1319 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1320 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1321 break; 1322 case MAT_ROW_ORIENTED: 1323 a->roworiented = PETSC_TRUE; 1324 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1325 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1326 break; 1327 case MAT_ROWS_SORTED: 1328 case MAT_ROWS_UNSORTED: 1329 case MAT_YES_NEW_DIAGONALS: 1330 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1331 break; 1332 case MAT_COLUMN_ORIENTED: 1333 a->roworiented = PETSC_FALSE; 1334 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1335 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1336 break; 1337 case MAT_IGNORE_OFF_PROC_ENTRIES: 1338 a->donotstash = PETSC_TRUE; 1339 break; 1340 case MAT_NO_NEW_DIAGONALS: 1341 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1342 case MAT_USE_HASH_TABLE: 1343 a->ht_flag = PETSC_TRUE; 1344 break; 1345 case MAT_NOT_SYMMETRIC: 1346 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1347 case MAT_HERMITIAN: 1348 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1349 case MAT_SYMMETRIC: 1350 case MAT_STRUCTURALLY_SYMMETRIC: 1351 case MAT_NOT_HERMITIAN: 1352 case MAT_SYMMETRY_ETERNAL: 1353 case MAT_NOT_SYMMETRY_ETERNAL: 1354 break; 1355 default: 1356 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1357 } 1358 PetscFunctionReturn(0); 1359 } 1360 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1363 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1364 { 1365 int ierr; 1366 PetscFunctionBegin; 1367 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1373 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1374 { 1375 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1376 Mat a = baij->A,b = baij->B; 1377 int ierr,s1,s2,s3; 1378 1379 PetscFunctionBegin; 1380 if (ll != rr) { 1381 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1382 } 1383 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1384 if (rr) { 1385 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1386 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1387 /* Overlap communication with computation. */ 1388 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1389 /*} if (ll) { */ 1390 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1391 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1392 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1393 /* } */ 1394 /* scale the diagonal block */ 1395 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1396 1397 /* if (rr) { */ 1398 /* Do a scatter end and then right scale the off-diagonal block */ 1399 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1400 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1401 } 1402 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1408 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag) 1409 { 1410 PetscFunctionBegin; 1411 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1416 int MatPrintHelp_MPISBAIJ(Mat A) 1417 { 1418 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1419 MPI_Comm comm = A->comm; 1420 static int called = 0; 1421 int ierr; 1422 1423 PetscFunctionBegin; 1424 if (!a->rank) { 1425 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1426 } 1427 if (called) {PetscFunctionReturn(0);} else called = 1; 1428 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1429 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1435 int MatSetUnfactored_MPISBAIJ(Mat A) 1436 { 1437 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1438 int ierr; 1439 1440 PetscFunctionBegin; 1441 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 1445 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatEqual_MPISBAIJ" 1449 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1450 { 1451 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1452 Mat a,b,c,d; 1453 PetscTruth flg; 1454 int ierr; 1455 1456 PetscFunctionBegin; 1457 a = matA->A; b = matA->B; 1458 c = matB->A; d = matB->B; 1459 1460 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1461 if (flg == PETSC_TRUE) { 1462 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1463 } 1464 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1470 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1471 { 1472 int ierr; 1473 1474 PetscFunctionBegin; 1475 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } 1478 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1481 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1482 { 1483 int i,ierr; 1484 PetscTruth flg; 1485 1486 for (i=0; i<n; i++) { 1487 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1488 if (!flg) { 1489 SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1490 } 1491 } 1492 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1493 PetscFunctionReturn(0); 1494 } 1495 1496 1497 /* -------------------------------------------------------------------*/ 1498 static struct _MatOps MatOps_Values = { 1499 MatSetValues_MPISBAIJ, 1500 MatGetRow_MPISBAIJ, 1501 MatRestoreRow_MPISBAIJ, 1502 MatMult_MPISBAIJ, 1503 /* 4*/ MatMultAdd_MPISBAIJ, 1504 MatMultTranspose_MPISBAIJ, 1505 MatMultTransposeAdd_MPISBAIJ, 1506 0, 1507 0, 1508 0, 1509 /*10*/ 0, 1510 0, 1511 0, 1512 MatRelax_MPISBAIJ, 1513 MatTranspose_MPISBAIJ, 1514 /*15*/ MatGetInfo_MPISBAIJ, 1515 MatEqual_MPISBAIJ, 1516 MatGetDiagonal_MPISBAIJ, 1517 MatDiagonalScale_MPISBAIJ, 1518 MatNorm_MPISBAIJ, 1519 /*20*/ MatAssemblyBegin_MPISBAIJ, 1520 MatAssemblyEnd_MPISBAIJ, 1521 0, 1522 MatSetOption_MPISBAIJ, 1523 MatZeroEntries_MPISBAIJ, 1524 /*25*/ MatZeroRows_MPISBAIJ, 1525 0, 1526 0, 1527 0, 1528 0, 1529 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1530 0, 1531 0, 1532 0, 1533 0, 1534 /*35*/ MatDuplicate_MPISBAIJ, 1535 0, 1536 0, 1537 0, 1538 0, 1539 /*40*/ 0, 1540 MatGetSubMatrices_MPISBAIJ, 1541 0, 1542 MatGetValues_MPISBAIJ, 1543 0, 1544 /*45*/ MatPrintHelp_MPISBAIJ, 1545 MatScale_MPISBAIJ, 1546 0, 1547 0, 1548 0, 1549 /*50*/ MatGetBlockSize_MPISBAIJ, 1550 0, 1551 0, 1552 0, 1553 0, 1554 /*55*/ 0, 1555 0, 1556 MatSetUnfactored_MPISBAIJ, 1557 0, 1558 MatSetValuesBlocked_MPISBAIJ, 1559 /*60*/ 0, 1560 0, 1561 0, 1562 MatGetPetscMaps_Petsc, 1563 0, 1564 /*65*/ 0, 1565 0, 1566 0, 1567 0, 1568 0, 1569 /*70*/ MatGetRowMax_MPISBAIJ, 1570 0, 1571 0, 1572 0, 1573 0, 1574 /*75*/ 0, 1575 0, 1576 0, 1577 0, 1578 0, 1579 /*80*/ 0, 1580 0, 1581 0, 1582 0, 1583 /*85*/ MatLoad_MPISBAIJ 1584 }; 1585 1586 1587 EXTERN_C_BEGIN 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1590 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1591 { 1592 PetscFunctionBegin; 1593 *a = ((Mat_MPISBAIJ *)A->data)->A; 1594 *iscopy = PETSC_FALSE; 1595 PetscFunctionReturn(0); 1596 } 1597 EXTERN_C_END 1598 1599 EXTERN_C_BEGIN 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1602 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1603 { 1604 Mat_MPISBAIJ *b; 1605 int ierr,i,mbs,Mbs; 1606 1607 PetscFunctionBegin; 1608 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1609 1610 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1611 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1612 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1613 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1614 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1615 if (d_nnz) { 1616 for (i=0; i<B->m/bs; i++) { 1617 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1618 } 1619 } 1620 if (o_nnz) { 1621 for (i=0; i<B->m/bs; i++) { 1622 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1623 } 1624 } 1625 B->preallocated = PETSC_TRUE; 1626 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1627 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1628 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1629 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1630 1631 b = (Mat_MPISBAIJ*)B->data; 1632 mbs = B->m/bs; 1633 Mbs = B->M/bs; 1634 if (mbs*bs != B->m) { 1635 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1636 } 1637 1638 b->bs = bs; 1639 b->bs2 = bs*bs; 1640 b->mbs = mbs; 1641 b->nbs = mbs; 1642 b->Mbs = Mbs; 1643 b->Nbs = Mbs; 1644 1645 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1646 b->rowners[0] = 0; 1647 for (i=2; i<=b->size; i++) { 1648 b->rowners[i] += b->rowners[i-1]; 1649 } 1650 b->rstart = b->rowners[b->rank]; 1651 b->rend = b->rowners[b->rank+1]; 1652 b->cstart = b->rstart; 1653 b->cend = b->rend; 1654 for (i=0; i<=b->size; i++) { 1655 b->rowners_bs[i] = b->rowners[i]*bs; 1656 } 1657 b->rstart_bs = b-> rstart*bs; 1658 b->rend_bs = b->rend*bs; 1659 1660 b->cstart_bs = b->cstart*bs; 1661 b->cend_bs = b->cend*bs; 1662 1663 1664 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1665 PetscLogObjectParent(B,b->A); 1666 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1667 PetscLogObjectParent(B,b->B); 1668 1669 /* build cache for off array entries formed */ 1670 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1671 1672 PetscFunctionReturn(0); 1673 } 1674 EXTERN_C_END 1675 1676 /*MC 1677 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1678 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1679 1680 Options Database Keys: 1681 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1682 1683 Level: beginner 1684 1685 .seealso: MatCreateMPISBAIJ 1686 M*/ 1687 1688 EXTERN_C_BEGIN 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatCreate_MPISBAIJ" 1691 int MatCreate_MPISBAIJ(Mat B) 1692 { 1693 Mat_MPISBAIJ *b; 1694 int ierr; 1695 PetscTruth flg; 1696 1697 PetscFunctionBegin; 1698 1699 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1700 B->data = (void*)b; 1701 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1702 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1703 1704 B->ops->destroy = MatDestroy_MPISBAIJ; 1705 B->ops->view = MatView_MPISBAIJ; 1706 B->mapping = 0; 1707 B->factor = 0; 1708 B->assembled = PETSC_FALSE; 1709 1710 B->insertmode = NOT_SET_VALUES; 1711 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1712 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1713 1714 /* build local table of row and column ownerships */ 1715 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1716 b->cowners = b->rowners + b->size + 2; 1717 b->rowners_bs = b->cowners + b->size + 2; 1718 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1719 1720 /* build cache for off array entries formed */ 1721 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1722 b->donotstash = PETSC_FALSE; 1723 b->colmap = PETSC_NULL; 1724 b->garray = PETSC_NULL; 1725 b->roworiented = PETSC_TRUE; 1726 1727 #if defined(PETSC_USE_MAT_SINGLE) 1728 /* stuff for MatSetValues_XXX in single precision */ 1729 b->setvalueslen = 0; 1730 b->setvaluescopy = PETSC_NULL; 1731 #endif 1732 1733 /* stuff used in block assembly */ 1734 b->barray = 0; 1735 1736 /* stuff used for matrix vector multiply */ 1737 b->lvec = 0; 1738 b->Mvctx = 0; 1739 b->slvec0 = 0; 1740 b->slvec0b = 0; 1741 b->slvec1 = 0; 1742 b->slvec1a = 0; 1743 b->slvec1b = 0; 1744 b->sMvctx = 0; 1745 1746 /* stuff for MatGetRow() */ 1747 b->rowindices = 0; 1748 b->rowvalues = 0; 1749 b->getrowactive = PETSC_FALSE; 1750 1751 /* hash table stuff */ 1752 b->ht = 0; 1753 b->hd = 0; 1754 b->ht_size = 0; 1755 b->ht_flag = PETSC_FALSE; 1756 b->ht_fact = 0; 1757 b->ht_total_ct = 0; 1758 b->ht_insert_ct = 0; 1759 1760 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1761 if (flg) { 1762 PetscReal fact = 1.39; 1763 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1764 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1765 if (fact <= 1.0) fact = 1.39; 1766 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1767 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1768 } 1769 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1770 "MatStoreValues_MPISBAIJ", 1771 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1772 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1773 "MatRetrieveValues_MPISBAIJ", 1774 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1775 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1776 "MatGetDiagonalBlock_MPISBAIJ", 1777 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1778 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1779 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1780 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1781 PetscFunctionReturn(0); 1782 } 1783 EXTERN_C_END 1784 1785 /*MC 1786 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1787 1788 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1789 and MATMPISBAIJ otherwise. 1790 1791 Options Database Keys: 1792 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1793 1794 Level: beginner 1795 1796 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1797 M*/ 1798 1799 EXTERN_C_BEGIN 1800 #undef __FUNCT__ 1801 #define __FUNCT__ "MatCreate_SBAIJ" 1802 int MatCreate_SBAIJ(Mat A) { 1803 int ierr,size; 1804 1805 PetscFunctionBegin; 1806 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1807 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1808 if (size == 1) { 1809 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1810 } else { 1811 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1812 } 1813 PetscFunctionReturn(0); 1814 } 1815 EXTERN_C_END 1816 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1819 /*@C 1820 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1821 the user should preallocate the matrix storage by setting the parameters 1822 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1823 performance can be increased by more than a factor of 50. 1824 1825 Collective on Mat 1826 1827 Input Parameters: 1828 + A - the matrix 1829 . bs - size of blockk 1830 . d_nz - number of block nonzeros per block row in diagonal portion of local 1831 submatrix (same for all local rows) 1832 . d_nnz - array containing the number of block nonzeros in the various block rows 1833 in the upper triangular and diagonal part of the in diagonal portion of the local 1834 (possibly different for each block row) or PETSC_NULL. You must leave room 1835 for the diagonal entry even if it is zero. 1836 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1837 submatrix (same for all local rows). 1838 - o_nnz - array containing the number of nonzeros in the various block rows of the 1839 off-diagonal portion of the local submatrix (possibly different for 1840 each block row) or PETSC_NULL. 1841 1842 1843 Options Database Keys: 1844 . -mat_no_unroll - uses code that does not unroll the loops in the 1845 block calculations (much slower) 1846 . -mat_block_size - size of the blocks to use 1847 1848 Notes: 1849 1850 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1851 than it must be used on all processors that share the object for that argument. 1852 1853 Storage Information: 1854 For a square global matrix we define each processor's diagonal portion 1855 to be its local rows and the corresponding columns (a square submatrix); 1856 each processor's off-diagonal portion encompasses the remainder of the 1857 local matrix (a rectangular submatrix). 1858 1859 The user can specify preallocated storage for the diagonal part of 1860 the local submatrix with either d_nz or d_nnz (not both). Set 1861 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1862 memory allocation. Likewise, specify preallocated storage for the 1863 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1864 1865 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1866 the figure below we depict these three local rows and all columns (0-11). 1867 1868 .vb 1869 0 1 2 3 4 5 6 7 8 9 10 11 1870 ------------------- 1871 row 3 | o o o d d d o o o o o o 1872 row 4 | o o o d d d o o o o o o 1873 row 5 | o o o d d d o o o o o o 1874 ------------------- 1875 .ve 1876 1877 Thus, any entries in the d locations are stored in the d (diagonal) 1878 submatrix, and any entries in the o locations are stored in the 1879 o (off-diagonal) submatrix. Note that the d matrix is stored in 1880 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1881 1882 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1883 plus the diagonal part of the d matrix, 1884 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1885 In general, for PDE problems in which most nonzeros are near the diagonal, 1886 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1887 or you will get TERRIBLE performance; see the users' manual chapter on 1888 matrices. 1889 1890 Level: intermediate 1891 1892 .keywords: matrix, block, aij, compressed row, sparse, parallel 1893 1894 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1895 @*/ 1896 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1897 { 1898 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 1899 1900 PetscFunctionBegin; 1901 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1902 if (f) { 1903 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1904 } 1905 PetscFunctionReturn(0); 1906 } 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "MatCreateMPISBAIJ" 1910 /*@C 1911 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1912 (block compressed row). For good matrix assembly performance 1913 the user should preallocate the matrix storage by setting the parameters 1914 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1915 performance can be increased by more than a factor of 50. 1916 1917 Collective on MPI_Comm 1918 1919 Input Parameters: 1920 + comm - MPI communicator 1921 . bs - size of blockk 1922 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1923 This value should be the same as the local size used in creating the 1924 y vector for the matrix-vector product y = Ax. 1925 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1926 This value should be the same as the local size used in creating the 1927 x vector for the matrix-vector product y = Ax. 1928 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1929 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1930 . d_nz - number of block nonzeros per block row in diagonal portion of local 1931 submatrix (same for all local rows) 1932 . d_nnz - array containing the number of block nonzeros in the various block rows 1933 in the upper triangular portion of the in diagonal portion of the local 1934 (possibly different for each block block row) or PETSC_NULL. 1935 You must leave room for the diagonal entry even if it is zero. 1936 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1937 submatrix (same for all local rows). 1938 - o_nnz - array containing the number of nonzeros in the various block rows of the 1939 off-diagonal portion of the local submatrix (possibly different for 1940 each block row) or PETSC_NULL. 1941 1942 Output Parameter: 1943 . A - the matrix 1944 1945 Options Database Keys: 1946 . -mat_no_unroll - uses code that does not unroll the loops in the 1947 block calculations (much slower) 1948 . -mat_block_size - size of the blocks to use 1949 . -mat_mpi - use the parallel matrix data structures even on one processor 1950 (defaults to using SeqBAIJ format on one processor) 1951 1952 Notes: 1953 The user MUST specify either the local or global matrix dimensions 1954 (possibly both). 1955 1956 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1957 than it must be used on all processors that share the object for that argument. 1958 1959 Storage Information: 1960 For a square global matrix we define each processor's diagonal portion 1961 to be its local rows and the corresponding columns (a square submatrix); 1962 each processor's off-diagonal portion encompasses the remainder of the 1963 local matrix (a rectangular submatrix). 1964 1965 The user can specify preallocated storage for the diagonal part of 1966 the local submatrix with either d_nz or d_nnz (not both). Set 1967 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1968 memory allocation. Likewise, specify preallocated storage for the 1969 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1970 1971 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1972 the figure below we depict these three local rows and all columns (0-11). 1973 1974 .vb 1975 0 1 2 3 4 5 6 7 8 9 10 11 1976 ------------------- 1977 row 3 | o o o d d d o o o o o o 1978 row 4 | o o o d d d o o o o o o 1979 row 5 | o o o d d d o o o o o o 1980 ------------------- 1981 .ve 1982 1983 Thus, any entries in the d locations are stored in the d (diagonal) 1984 submatrix, and any entries in the o locations are stored in the 1985 o (off-diagonal) submatrix. Note that the d matrix is stored in 1986 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1987 1988 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1989 plus the diagonal part of the d matrix, 1990 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1991 In general, for PDE problems in which most nonzeros are near the diagonal, 1992 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1993 or you will get TERRIBLE performance; see the users' manual chapter on 1994 matrices. 1995 1996 Level: intermediate 1997 1998 .keywords: matrix, block, aij, compressed row, sparse, parallel 1999 2000 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2001 @*/ 2002 2003 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2004 { 2005 int ierr,size; 2006 2007 PetscFunctionBegin; 2008 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2009 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2010 if (size > 1) { 2011 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2012 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2013 } else { 2014 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2015 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2016 } 2017 PetscFunctionReturn(0); 2018 } 2019 2020 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2023 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2024 { 2025 Mat mat; 2026 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2027 int ierr,len=0; 2028 2029 PetscFunctionBegin; 2030 *newmat = 0; 2031 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2032 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 2033 mat->preallocated = PETSC_TRUE; 2034 a = (Mat_MPISBAIJ*)mat->data; 2035 a->bs = oldmat->bs; 2036 a->bs2 = oldmat->bs2; 2037 a->mbs = oldmat->mbs; 2038 a->nbs = oldmat->nbs; 2039 a->Mbs = oldmat->Mbs; 2040 a->Nbs = oldmat->Nbs; 2041 2042 a->rstart = oldmat->rstart; 2043 a->rend = oldmat->rend; 2044 a->cstart = oldmat->cstart; 2045 a->cend = oldmat->cend; 2046 a->size = oldmat->size; 2047 a->rank = oldmat->rank; 2048 a->donotstash = oldmat->donotstash; 2049 a->roworiented = oldmat->roworiented; 2050 a->rowindices = 0; 2051 a->rowvalues = 0; 2052 a->getrowactive = PETSC_FALSE; 2053 a->barray = 0; 2054 a->rstart_bs = oldmat->rstart_bs; 2055 a->rend_bs = oldmat->rend_bs; 2056 a->cstart_bs = oldmat->cstart_bs; 2057 a->cend_bs = oldmat->cend_bs; 2058 2059 /* hash table stuff */ 2060 a->ht = 0; 2061 a->hd = 0; 2062 a->ht_size = 0; 2063 a->ht_flag = oldmat->ht_flag; 2064 a->ht_fact = oldmat->ht_fact; 2065 a->ht_total_ct = 0; 2066 a->ht_insert_ct = 0; 2067 2068 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 2069 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2070 a->cowners = a->rowners + a->size + 2; 2071 a->rowners_bs = a->cowners + a->size + 2; 2072 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2073 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2074 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2075 if (oldmat->colmap) { 2076 #if defined (PETSC_USE_CTABLE) 2077 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2078 #else 2079 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2080 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2081 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2082 #endif 2083 } else a->colmap = 0; 2084 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2085 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2086 PetscLogObjectMemory(mat,len*sizeof(int)); 2087 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2088 } else a->garray = 0; 2089 2090 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2091 PetscLogObjectParent(mat,a->lvec); 2092 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2093 2094 PetscLogObjectParent(mat,a->Mvctx); 2095 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2096 PetscLogObjectParent(mat,a->A); 2097 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2098 PetscLogObjectParent(mat,a->B); 2099 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2100 *newmat = mat; 2101 PetscFunctionReturn(0); 2102 } 2103 2104 #include "petscsys.h" 2105 2106 #undef __FUNCT__ 2107 #define __FUNCT__ "MatLoad_MPISBAIJ" 2108 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2109 { 2110 Mat A; 2111 int i,nz,ierr,j,rstart,rend,fd; 2112 PetscScalar *vals,*buf; 2113 MPI_Comm comm = ((PetscObject)viewer)->comm; 2114 MPI_Status status; 2115 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2116 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2117 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2118 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2119 int dcount,kmax,k,nzcount,tmp; 2120 2121 PetscFunctionBegin; 2122 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2123 2124 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2125 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2126 if (!rank) { 2127 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2128 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2129 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2130 if (header[3] < 0) { 2131 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2132 } 2133 } 2134 2135 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2136 M = header[1]; N = header[2]; 2137 2138 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2139 2140 /* 2141 This code adds extra rows to make sure the number of rows is 2142 divisible by the blocksize 2143 */ 2144 Mbs = M/bs; 2145 extra_rows = bs - M + bs*(Mbs); 2146 if (extra_rows == bs) extra_rows = 0; 2147 else Mbs++; 2148 if (extra_rows &&!rank) { 2149 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2150 } 2151 2152 /* determine ownership of all rows */ 2153 mbs = Mbs/size + ((Mbs % size) > rank); 2154 m = mbs*bs; 2155 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2156 browners = rowners + size + 1; 2157 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2158 rowners[0] = 0; 2159 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2160 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2161 rstart = rowners[rank]; 2162 rend = rowners[rank+1]; 2163 2164 /* distribute row lengths to all processors */ 2165 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2166 if (!rank) { 2167 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2168 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2169 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2170 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2171 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2172 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2173 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2174 } else { 2175 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2176 } 2177 2178 if (!rank) { /* procs[0] */ 2179 /* calculate the number of nonzeros on each processor */ 2180 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2181 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2182 for (i=0; i<size; i++) { 2183 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2184 procsnz[i] += rowlengths[j]; 2185 } 2186 } 2187 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2188 2189 /* determine max buffer needed and allocate it */ 2190 maxnz = 0; 2191 for (i=0; i<size; i++) { 2192 maxnz = PetscMax(maxnz,procsnz[i]); 2193 } 2194 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2195 2196 /* read in my part of the matrix column indices */ 2197 nz = procsnz[0]; 2198 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2199 mycols = ibuf; 2200 if (size == 1) nz -= extra_rows; 2201 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2202 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2203 2204 /* read in every ones (except the last) and ship off */ 2205 for (i=1; i<size-1; i++) { 2206 nz = procsnz[i]; 2207 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2208 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2209 } 2210 /* read in the stuff for the last proc */ 2211 if (size != 1) { 2212 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2213 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2214 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2215 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2216 } 2217 ierr = PetscFree(cols);CHKERRQ(ierr); 2218 } else { /* procs[i], i>0 */ 2219 /* determine buffer space needed for message */ 2220 nz = 0; 2221 for (i=0; i<m; i++) { 2222 nz += locrowlens[i]; 2223 } 2224 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2225 mycols = ibuf; 2226 /* receive message of column indices*/ 2227 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2228 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2229 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2230 } 2231 2232 /* loop over local rows, determining number of off diagonal entries */ 2233 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2234 odlens = dlens + (rend-rstart); 2235 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2236 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2237 masked1 = mask + Mbs; 2238 masked2 = masked1 + Mbs; 2239 rowcount = 0; nzcount = 0; 2240 for (i=0; i<mbs; i++) { 2241 dcount = 0; 2242 odcount = 0; 2243 for (j=0; j<bs; j++) { 2244 kmax = locrowlens[rowcount]; 2245 for (k=0; k<kmax; k++) { 2246 tmp = mycols[nzcount++]/bs; /* block col. index */ 2247 if (!mask[tmp]) { 2248 mask[tmp] = 1; 2249 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2250 else masked1[dcount++] = tmp; /* entry in diag portion */ 2251 } 2252 } 2253 rowcount++; 2254 } 2255 2256 dlens[i] = dcount; /* d_nzz[i] */ 2257 odlens[i] = odcount; /* o_nzz[i] */ 2258 2259 /* zero out the mask elements we set */ 2260 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2261 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2262 } 2263 2264 /* create our matrix */ 2265 ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr); 2266 ierr = MatSetType(A,type);CHKERRQ(ierr); 2267 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2268 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2269 2270 if (!rank) { 2271 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2272 /* read in my part of the matrix numerical values */ 2273 nz = procsnz[0]; 2274 vals = buf; 2275 mycols = ibuf; 2276 if (size == 1) nz -= extra_rows; 2277 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2278 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2279 2280 /* insert into matrix */ 2281 jj = rstart*bs; 2282 for (i=0; i<m; i++) { 2283 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2284 mycols += locrowlens[i]; 2285 vals += locrowlens[i]; 2286 jj++; 2287 } 2288 2289 /* read in other processors (except the last one) and ship out */ 2290 for (i=1; i<size-1; i++) { 2291 nz = procsnz[i]; 2292 vals = buf; 2293 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2294 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2295 } 2296 /* the last proc */ 2297 if (size != 1){ 2298 nz = procsnz[i] - extra_rows; 2299 vals = buf; 2300 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2301 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2302 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2303 } 2304 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2305 2306 } else { 2307 /* receive numeric values */ 2308 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2309 2310 /* receive message of values*/ 2311 vals = buf; 2312 mycols = ibuf; 2313 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2314 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2315 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2316 2317 /* insert into matrix */ 2318 jj = rstart*bs; 2319 for (i=0; i<m; i++) { 2320 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2321 mycols += locrowlens[i]; 2322 vals += locrowlens[i]; 2323 jj++; 2324 } 2325 } 2326 2327 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2328 ierr = PetscFree(buf);CHKERRQ(ierr); 2329 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2330 ierr = PetscFree(rowners);CHKERRQ(ierr); 2331 ierr = PetscFree(dlens);CHKERRQ(ierr); 2332 ierr = PetscFree(mask);CHKERRQ(ierr); 2333 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2334 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2335 *newmat = A; 2336 PetscFunctionReturn(0); 2337 } 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2341 /*@ 2342 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2343 2344 Input Parameters: 2345 . mat - the matrix 2346 . fact - factor 2347 2348 Collective on Mat 2349 2350 Level: advanced 2351 2352 Notes: 2353 This can also be set by the command line option: -mat_use_hash_table fact 2354 2355 .keywords: matrix, hashtable, factor, HT 2356 2357 .seealso: MatSetOption() 2358 @*/ 2359 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2360 { 2361 PetscFunctionBegin; 2362 SETERRQ(1,"Function not yet written for SBAIJ format"); 2363 /* PetscFunctionReturn(0); */ 2364 } 2365 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2368 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2369 { 2370 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2371 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2372 PetscReal atmp; 2373 PetscReal *work,*svalues,*rvalues; 2374 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2375 int rank,size,*rowners_bs,dest,count,source; 2376 PetscScalar *va; 2377 MatScalar *ba; 2378 MPI_Status stat; 2379 2380 PetscFunctionBegin; 2381 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2382 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2383 2384 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2385 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2386 2387 bs = a->bs; 2388 mbs = a->mbs; 2389 Mbs = a->Mbs; 2390 ba = b->a; 2391 bi = b->i; 2392 bj = b->j; 2393 /* 2394 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2395 PetscSynchronizedFlush(A->comm); 2396 */ 2397 2398 /* find ownerships */ 2399 rowners_bs = a->rowners_bs; 2400 /* 2401 if (!rank){ 2402 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2403 } 2404 */ 2405 2406 /* each proc creates an array to be distributed */ 2407 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2408 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2409 2410 /* row_max for B */ 2411 if (rank != size-1){ 2412 for (i=0; i<mbs; i++) { 2413 ncols = bi[1] - bi[0]; bi++; 2414 brow = bs*i; 2415 for (j=0; j<ncols; j++){ 2416 bcol = bs*(*bj); 2417 for (kcol=0; kcol<bs; kcol++){ 2418 col = bcol + kcol; /* local col index */ 2419 col += rowners_bs[rank+1]; /* global col index */ 2420 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2421 for (krow=0; krow<bs; krow++){ 2422 atmp = PetscAbsScalar(*ba); ba++; 2423 row = brow + krow; /* local row index */ 2424 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2425 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2426 if (work[col] < atmp) work[col] = atmp; 2427 } 2428 } 2429 bj++; 2430 } 2431 } 2432 /* 2433 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2434 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2435 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2436 */ 2437 2438 /* send values to its owners */ 2439 for (dest=rank+1; dest<size; dest++){ 2440 svalues = work + rowners_bs[dest]; 2441 count = rowners_bs[dest+1]-rowners_bs[dest]; 2442 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2443 /* 2444 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2445 PetscSynchronizedFlush(A->comm); 2446 */ 2447 } 2448 } 2449 2450 /* receive values */ 2451 if (rank){ 2452 rvalues = work; 2453 count = rowners_bs[rank+1]-rowners_bs[rank]; 2454 for (source=0; source<rank; source++){ 2455 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2456 /* process values */ 2457 for (i=0; i<count; i++){ 2458 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2459 } 2460 /* 2461 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2462 PetscSynchronizedFlush(A->comm); 2463 */ 2464 } 2465 } 2466 2467 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2468 ierr = PetscFree(work);CHKERRQ(ierr); 2469 PetscFunctionReturn(0); 2470 } 2471 2472 #undef __FUNCT__ 2473 #define __FUNCT__ "MatRelax_MPISBAIJ" 2474 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2475 { 2476 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2477 int ierr,mbs=mat->mbs,bs=mat->bs; 2478 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2479 Vec bb1; 2480 2481 PetscFunctionBegin; 2482 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2483 if (bs > 1) 2484 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2485 2486 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2487 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2488 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2489 its--; 2490 } 2491 2492 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2493 while (its--){ 2494 2495 /* lower triangular part: slvec0b = - B^T*xx */ 2496 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2497 2498 /* copy xx into slvec0a */ 2499 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2500 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2501 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2502 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2503 2504 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2505 2506 /* copy bb into slvec1a */ 2507 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2508 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2509 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2510 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2511 2512 /* set slvec1b = 0 */ 2513 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2514 2515 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2516 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2517 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2518 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2519 2520 /* upper triangular part: bb1 = bb1 - B*x */ 2521 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2522 2523 /* local diagonal sweep */ 2524 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2525 } 2526 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2527 } else { 2528 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2529 } 2530 PetscFunctionReturn(0); 2531 } 2532 2533 #undef __FUNCT__ 2534 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2535 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2536 { 2537 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2538 int ierr; 2539 PetscScalar mone=-1.0; 2540 Vec lvec1,bb1; 2541 2542 PetscFunctionBegin; 2543 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2544 if (mat->bs > 1) 2545 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2546 2547 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2548 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2549 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2550 its--; 2551 } 2552 2553 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2554 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2555 while (its--){ 2556 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2557 2558 /* lower diagonal part: bb1 = bb - B^T*xx */ 2559 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2560 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2561 2562 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2563 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2564 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2565 2566 /* upper diagonal part: bb1 = bb1 - B*x */ 2567 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2568 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2569 2570 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2571 2572 /* diagonal sweep */ 2573 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2574 } 2575 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2576 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2577 } else { 2578 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2579 } 2580 PetscFunctionReturn(0); 2581 } 2582 2583