1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 extern int MatSetUpMultiply_MPISBAIJ(Mat); 8 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern int DisAssemble_MPISBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS[],int); 11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 18 extern int MatPrintHelp_SeqSBAIJ(Mat); 19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 int MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 int ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 int MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 int ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 103 if (nrow >= rmax) { \ 104 /* there is no extra room in row, therefore enlarge */ \ 105 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 106 MatScalar *new_a; \ 107 \ 108 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 109 \ 110 /* malloc new storage space */ \ 111 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 112 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 113 new_j = (int*)(new_a + bs2*new_nz); \ 114 new_i = new_j + new_nz; \ 115 \ 116 /* copy over old data into new slots */ \ 117 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 118 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 119 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 120 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 121 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 122 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 124 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 125 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 126 /* free up old matrix storage */ \ 127 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 128 if (!a->singlemalloc) { \ 129 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 130 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 131 } \ 132 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 133 a->singlemalloc = PETSC_TRUE; \ 134 \ 135 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 136 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 137 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 138 a->maxnz += bs2*CHUNKSIZE; \ 139 a->reallocs++; \ 140 a->nz++; \ 141 } \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 a_noinsert:; \ 152 ailen[brow] = nrow; \ 153 } 154 #ifndef MatSetValues_SeqBAIJ_B_Private 155 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 156 { \ 157 brow = row/bs; \ 158 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 159 rmax = bimax[brow]; nrow = bilen[brow]; \ 160 bcol = col/bs; \ 161 ridx = row % bs; cidx = col % bs; \ 162 low = 0; high = nrow; \ 163 while (high-low > 3) { \ 164 t = (low+high)/2; \ 165 if (rp[t] > bcol) high = t; \ 166 else low = t; \ 167 } \ 168 for (_i=low; _i<high; _i++) { \ 169 if (rp[_i] > bcol) break; \ 170 if (rp[_i] == bcol) { \ 171 bap = ap + bs2*_i + bs*cidx + ridx; \ 172 if (addv == ADD_VALUES) *bap += value; \ 173 else *bap = value; \ 174 goto b_noinsert; \ 175 } \ 176 } \ 177 if (b->nonew == 1) goto b_noinsert; \ 178 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 179 if (nrow >= rmax) { \ 180 /* there is no extra room in row, therefore enlarge */ \ 181 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 182 MatScalar *new_a; \ 183 \ 184 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 185 \ 186 /* malloc new storage space */ \ 187 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 188 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 189 new_j = (int*)(new_a + bs2*new_nz); \ 190 new_i = new_j + new_nz; \ 191 \ 192 /* copy over old data into new slots */ \ 193 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 194 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 195 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 196 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 197 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 198 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 199 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 200 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 201 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 202 /* free up old matrix storage */ \ 203 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 204 if (!b->singlemalloc) { \ 205 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 206 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 207 } \ 208 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 209 b->singlemalloc = PETSC_TRUE; \ 210 \ 211 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 212 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 213 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 214 b->maxnz += bs2*CHUNKSIZE; \ 215 b->reallocs++; \ 216 b->nz++; \ 217 } \ 218 N = nrow++ - 1; \ 219 /* shift up all the later entries in this row */ \ 220 for (ii=N; ii>=_i; ii--) { \ 221 rp[ii+1] = rp[ii]; \ 222 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 223 } \ 224 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 225 rp[_i] = bcol; \ 226 ap[bs2*_i + bs*cidx + ridx] = value; \ 227 b_noinsert:; \ 228 bilen[brow] = nrow; \ 229 } 230 #endif 231 232 #if defined(PETSC_USE_MAT_SINGLE) 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatSetValues_MPISBAIJ" 235 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 236 { 237 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 238 int ierr,i,N = m*n; 239 MatScalar *vsingle; 240 241 PetscFunctionBegin; 242 if (N > b->setvalueslen) { 243 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 244 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 245 b->setvalueslen = N; 246 } 247 vsingle = b->setvaluescopy; 248 249 for (i=0; i<N; i++) { 250 vsingle[i] = v[i]; 251 } 252 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 258 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 259 { 260 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 261 int ierr,i,N = m*n*b->bs2; 262 MatScalar *vsingle; 263 264 PetscFunctionBegin; 265 if (N > b->setvalueslen) { 266 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 267 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 268 b->setvalueslen = N; 269 } 270 vsingle = b->setvaluescopy; 271 for (i=0; i<N; i++) { 272 vsingle[i] = v[i]; 273 } 274 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 280 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 281 { 282 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 283 int ierr,i,N = m*n; 284 MatScalar *vsingle; 285 286 PetscFunctionBegin; 287 SETERRQ(1,"Function not yet written for SBAIJ format"); 288 /* PetscFunctionReturn(0); */ 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 293 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 294 { 295 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 296 int ierr,i,N = m*n*b->bs2; 297 MatScalar *vsingle; 298 299 PetscFunctionBegin; 300 SETERRQ(1,"Function not yet written for SBAIJ format"); 301 /* PetscFunctionReturn(0); */ 302 } 303 #endif 304 305 /* Only add/insert a(i,j) with i<=j (blocks). 306 Any a(i,j) with i>j input by user is ingored. 307 */ 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 310 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 311 { 312 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 313 MatScalar value; 314 PetscTruth roworiented = baij->roworiented; 315 int ierr,i,j,row,col; 316 int rstart_orig=baij->rstart_bs; 317 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 318 int cend_orig=baij->cend_bs,bs=baij->bs; 319 320 /* Some Variables required in the macro */ 321 Mat A = baij->A; 322 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 323 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 324 MatScalar *aa=a->a; 325 326 Mat B = baij->B; 327 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 328 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 329 MatScalar *ba=b->a; 330 331 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 332 int low,high,t,ridx,cidx,bs2=a->bs2; 333 MatScalar *ap,*bap; 334 335 /* for stash */ 336 int n_loc, *in_loc=0; 337 MatScalar *v_loc=0; 338 339 PetscFunctionBegin; 340 341 if(!baij->donotstash){ 342 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 343 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 344 } 345 346 for (i=0; i<m; i++) { 347 if (im[i] < 0) continue; 348 #if defined(PETSC_USE_BOPT_g) 349 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 350 #endif 351 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 352 row = im[i] - rstart_orig; /* local row index */ 353 for (j=0; j<n; j++) { 354 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 355 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 356 col = in[j] - cstart_orig; /* local col index */ 357 brow = row/bs; bcol = col/bs; 358 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 359 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 360 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 361 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 362 } else if (in[j] < 0) continue; 363 #if defined(PETSC_USE_BOPT_g) 364 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 365 #endif 366 else { /* off-diag entry (B) */ 367 if (mat->was_assembled) { 368 if (!baij->colmap) { 369 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 370 } 371 #if defined (PETSC_USE_CTABLE) 372 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 373 col = col - 1; 374 #else 375 col = baij->colmap[in[j]/bs] - 1; 376 #endif 377 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 378 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 379 col = in[j]; 380 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 381 B = baij->B; 382 b = (Mat_SeqBAIJ*)(B)->data; 383 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 384 ba=b->a; 385 } else col += in[j]%bs; 386 } else col = in[j]; 387 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 388 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 389 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 390 } 391 } 392 } else { /* off processor entry */ 393 if (!baij->donotstash) { 394 n_loc = 0; 395 for (j=0; j<n; j++){ 396 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 397 in_loc[n_loc] = in[j]; 398 if (roworiented) { 399 v_loc[n_loc] = v[i*n+j]; 400 } else { 401 v_loc[n_loc] = v[j*m+i]; 402 } 403 n_loc++; 404 } 405 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 406 } 407 } 408 } 409 410 if(!baij->donotstash){ 411 ierr = PetscFree(in_loc);CHKERRQ(ierr); 412 ierr = PetscFree(v_loc);CHKERRQ(ierr); 413 } 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 419 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 420 { 421 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 422 const MatScalar *value; 423 MatScalar *barray=baij->barray; 424 PetscTruth roworiented = baij->roworiented; 425 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 426 int rend=baij->rend,cstart=baij->cstart,stepval; 427 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 428 429 PetscFunctionBegin; 430 if(!barray) { 431 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 432 baij->barray = barray; 433 } 434 435 if (roworiented) { 436 stepval = (n-1)*bs; 437 } else { 438 stepval = (m-1)*bs; 439 } 440 for (i=0; i<m; i++) { 441 if (im[i] < 0) continue; 442 #if defined(PETSC_USE_BOPT_g) 443 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 444 #endif 445 if (im[i] >= rstart && im[i] < rend) { 446 row = im[i] - rstart; 447 for (j=0; j<n; j++) { 448 /* If NumCol = 1 then a copy is not required */ 449 if ((roworiented) && (n == 1)) { 450 barray = (MatScalar*) v + i*bs2; 451 } else if((!roworiented) && (m == 1)) { 452 barray = (MatScalar*) v + j*bs2; 453 } else { /* Here a copy is required */ 454 if (roworiented) { 455 value = v + i*(stepval+bs)*bs + j*bs; 456 } else { 457 value = v + j*(stepval+bs)*bs + i*bs; 458 } 459 for (ii=0; ii<bs; ii++,value+=stepval) { 460 for (jj=0; jj<bs; jj++) { 461 *barray++ = *value++; 462 } 463 } 464 barray -=bs2; 465 } 466 467 if (in[j] >= cstart && in[j] < cend){ 468 col = in[j] - cstart; 469 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 470 } 471 else if (in[j] < 0) continue; 472 #if defined(PETSC_USE_BOPT_g) 473 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 474 #endif 475 else { 476 if (mat->was_assembled) { 477 if (!baij->colmap) { 478 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 479 } 480 481 #if defined(PETSC_USE_BOPT_g) 482 #if defined (PETSC_USE_CTABLE) 483 { int data; 484 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 485 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 486 } 487 #else 488 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 489 #endif 490 #endif 491 #if defined (PETSC_USE_CTABLE) 492 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 493 col = (col - 1)/bs; 494 #else 495 col = (baij->colmap[in[j]] - 1)/bs; 496 #endif 497 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 498 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 499 col = in[j]; 500 } 501 } 502 else col = in[j]; 503 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 504 } 505 } 506 } else { 507 if (!baij->donotstash) { 508 if (roworiented) { 509 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 510 } else { 511 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 512 } 513 } 514 } 515 } 516 PetscFunctionReturn(0); 517 } 518 519 #define HASH_KEY 0.6180339887 520 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 521 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 522 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 525 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 526 { 527 PetscFunctionBegin; 528 SETERRQ(1,"Function not yet written for SBAIJ format"); 529 /* PetscFunctionReturn(0); */ 530 } 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 534 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 535 { 536 PetscFunctionBegin; 537 SETERRQ(1,"Function not yet written for SBAIJ format"); 538 /* PetscFunctionReturn(0); */ 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatGetValues_MPISBAIJ" 543 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 544 { 545 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 546 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 547 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 552 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 553 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 554 row = idxm[i] - bsrstart; 555 for (j=0; j<n; j++) { 556 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]); 557 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 558 if (idxn[j] >= bscstart && idxn[j] < bscend){ 559 col = idxn[j] - bscstart; 560 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 561 } else { 562 if (!baij->colmap) { 563 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 564 } 565 #if defined (PETSC_USE_CTABLE) 566 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 567 data --; 568 #else 569 data = baij->colmap[idxn[j]/bs]-1; 570 #endif 571 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 572 else { 573 col = data + idxn[j]%bs; 574 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 575 } 576 } 577 } 578 } else { 579 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 580 } 581 } 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatNorm_MPISBAIJ" 587 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 588 { 589 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 590 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 591 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 592 int ierr; 593 PetscReal sum[2],*lnorm2; 594 595 PetscFunctionBegin; 596 if (baij->size == 1) { 597 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 598 } else { 599 if (type == NORM_FROBENIUS) { 600 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 601 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 602 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 603 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 605 /* 606 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 607 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 608 */ 609 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 610 /* 611 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 612 PetscSynchronizedFlush(mat->comm); */ 613 614 *norm = sqrt(sum[0] + 2*sum[1]); 615 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 616 } else { 617 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 /* 624 Creates the hash table, and sets the table 625 This table is created only once. 626 If new entried need to be added to the matrix 627 then the hash table has to be destroyed and 628 recreated. 629 */ 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 632 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 633 { 634 PetscFunctionBegin; 635 SETERRQ(1,"Function not yet written for SBAIJ format"); 636 /* PetscFunctionReturn(0); */ 637 } 638 639 #undef __FUNCT__ 640 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 641 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 642 { 643 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 644 int ierr,nstash,reallocs; 645 InsertMode addv; 646 647 PetscFunctionBegin; 648 if (baij->donotstash) { 649 PetscFunctionReturn(0); 650 } 651 652 /* make sure all processors are either in INSERTMODE or ADDMODE */ 653 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 654 if (addv == (ADD_VALUES|INSERT_VALUES)) { 655 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 656 } 657 mat->insertmode = addv; /* in case this processor had no cache */ 658 659 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 660 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 663 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 664 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNCT__ 669 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 670 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 673 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 674 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 675 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 676 int *row,*col,other_disassembled; 677 PetscTruth r1,r2,r3; 678 MatScalar *val; 679 InsertMode addv = mat->insertmode; 680 681 PetscFunctionBegin; 682 683 if (!baij->donotstash) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 /* 687 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 688 PetscSynchronizedFlush(mat->comm); 689 */ 690 if (!flg) break; 691 692 for (i=0; i<n;) { 693 /* Now identify the consecutive vals belonging to the same row */ 694 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 695 if (j < n) ncols = j-i; 696 else ncols = n-i; 697 /* Now assemble all these values with a single function call */ 698 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 699 i = j; 700 } 701 } 702 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 703 /* Now process the block-stash. Since the values are stashed column-oriented, 704 set the roworiented flag to column oriented, and after MatSetValues() 705 restore the original flags */ 706 r1 = baij->roworiented; 707 r2 = a->roworiented; 708 r3 = b->roworiented; 709 baij->roworiented = PETSC_FALSE; 710 a->roworiented = PETSC_FALSE; 711 b->roworiented = PETSC_FALSE; 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n;) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 719 if (j < n) ncols = j-i; 720 else ncols = n-i; 721 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 722 i = j; 723 } 724 } 725 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 726 baij->roworiented = r1; 727 a->roworiented = r2; 728 b->roworiented = r3; 729 } 730 731 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 741 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 747 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 748 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 749 } 750 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 752 753 #if defined(PETSC_USE_BOPT_g) 754 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 755 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 756 baij->ht_total_ct = 0; 757 baij->ht_insert_ct = 0; 758 } 759 #endif 760 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 761 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 762 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 763 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 764 } 765 766 if (baij->rowvalues) { 767 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 768 baij->rowvalues = 0; 769 } 770 771 PetscFunctionReturn(0); 772 } 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 776 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 777 { 778 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 779 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 780 PetscTruth isascii,isdraw; 781 PetscViewer sviewer; 782 PetscViewerFormat format; 783 784 PetscFunctionBegin; 785 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 786 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 787 if (isascii) { 788 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 789 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 790 MatInfo info; 791 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 792 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 793 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 794 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 795 baij->bs,(int)info.memory);CHKERRQ(ierr); 796 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 797 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 798 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 799 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 800 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 801 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 802 PetscFunctionReturn(0); 803 } else if (format == PETSC_VIEWER_ASCII_INFO) { 804 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 } 808 809 if (isdraw) { 810 PetscDraw draw; 811 PetscTruth isnull; 812 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 813 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 814 } 815 816 if (size == 1) { 817 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 818 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 819 } else { 820 /* assemble the entire matrix onto first processor. */ 821 Mat A; 822 Mat_SeqSBAIJ *Aloc; 823 Mat_SeqBAIJ *Bloc; 824 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 825 MatScalar *a; 826 827 /* Should this be the same type as mat? */ 828 if (!rank) { 829 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 830 } else { 831 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 832 } 833 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 834 ierr = MatMPISBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 835 PetscLogObjectParent(mat,A); 836 837 /* copy over the A part */ 838 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 839 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 840 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 841 842 for (i=0; i<mbs; i++) { 843 rvals[0] = bs*(baij->rstart + i); 844 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 845 for (j=ai[i]; j<ai[i+1]; j++) { 846 col = (baij->cstart+aj[j])*bs; 847 for (k=0; k<bs; k++) { 848 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 849 col++; a += bs; 850 } 851 } 852 } 853 /* copy over the B part */ 854 Bloc = (Mat_SeqBAIJ*)baij->B->data; 855 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 856 for (i=0; i<mbs; i++) { 857 rvals[0] = bs*(baij->rstart + i); 858 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 859 for (j=ai[i]; j<ai[i+1]; j++) { 860 col = baij->garray[aj[j]]*bs; 861 for (k=0; k<bs; k++) { 862 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 863 col++; a += bs; 864 } 865 } 866 } 867 ierr = PetscFree(rvals);CHKERRQ(ierr); 868 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 869 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 870 /* 871 Everyone has to call to draw the matrix since the graphics waits are 872 synchronized across all processors that share the PetscDraw object 873 */ 874 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 875 if (!rank) { 876 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 877 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 878 } 879 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 880 ierr = MatDestroy(A);CHKERRQ(ierr); 881 } 882 PetscFunctionReturn(0); 883 } 884 885 #undef __FUNCT__ 886 #define __FUNCT__ "MatView_MPISBAIJ" 887 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 888 { 889 int ierr; 890 PetscTruth isascii,isdraw,issocket,isbinary; 891 892 PetscFunctionBegin; 893 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 894 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 895 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 896 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 897 if (isascii || isdraw || issocket || isbinary) { 898 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 899 } else { 900 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 901 } 902 PetscFunctionReturn(0); 903 } 904 905 #undef __FUNCT__ 906 #define __FUNCT__ "MatDestroy_MPISBAIJ" 907 int MatDestroy_MPISBAIJ(Mat mat) 908 { 909 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 910 int ierr; 911 912 PetscFunctionBegin; 913 #if defined(PETSC_USE_LOG) 914 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 915 #endif 916 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 917 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 918 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 919 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 920 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 921 #if defined (PETSC_USE_CTABLE) 922 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 923 #else 924 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 925 #endif 926 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 927 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 928 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 929 if (baij->slvec0) { 930 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 931 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 932 } 933 if (baij->slvec1) { 934 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 935 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 936 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 937 } 938 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 939 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 940 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 941 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 942 #if defined(PETSC_USE_MAT_SINGLE) 943 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 944 #endif 945 ierr = PetscFree(baij);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatMult_MPISBAIJ" 951 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 952 { 953 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 954 int ierr,nt,mbs=a->mbs,bs=a->bs; 955 PetscScalar *x,*from,zero=0.0; 956 957 PetscFunctionBegin; 958 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 959 if (nt != A->n) { 960 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 961 } 962 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 963 if (nt != A->m) { 964 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 965 } 966 967 /* diagonal part */ 968 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 969 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 970 971 /* subdiagonal part */ 972 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 973 974 /* copy x into the vec slvec0 */ 975 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 976 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 977 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 978 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 979 980 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 981 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 982 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 983 984 /* supperdiagonal part */ 985 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 986 987 PetscFunctionReturn(0); 988 } 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 992 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 993 { 994 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 995 int ierr,nt; 996 997 PetscFunctionBegin; 998 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 999 if (nt != A->n) { 1000 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1001 } 1002 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1003 if (nt != A->m) { 1004 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1005 } 1006 1007 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1008 /* do diagonal part */ 1009 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1010 /* do supperdiagonal part */ 1011 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1012 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1013 /* do subdiagonal part */ 1014 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1015 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1016 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1017 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1023 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1024 { 1025 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1026 int ierr,mbs=a->mbs,bs=a->bs; 1027 PetscScalar *x,*from,zero=0.0; 1028 1029 PetscFunctionBegin; 1030 /* 1031 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1032 PetscSynchronizedFlush(A->comm); 1033 */ 1034 /* diagonal part */ 1035 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1036 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1037 1038 /* subdiagonal part */ 1039 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1040 1041 /* copy x into the vec slvec0 */ 1042 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1043 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1044 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1045 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1046 1047 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1048 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1049 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1050 1051 /* supperdiagonal part */ 1052 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1053 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1059 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1060 { 1061 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1062 int ierr; 1063 1064 PetscFunctionBegin; 1065 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1066 /* do diagonal part */ 1067 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1068 /* do supperdiagonal part */ 1069 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1070 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1071 1072 /* do subdiagonal part */ 1073 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1074 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1075 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1076 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1082 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1083 { 1084 int ierr; 1085 1086 PetscFunctionBegin; 1087 ierr = MatMult(A,xx,yy);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1093 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1094 { 1095 int ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 /* 1103 This only works correctly for square matrices where the subblock A->A is the 1104 diagonal block 1105 */ 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1108 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1109 { 1110 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1111 int ierr; 1112 1113 PetscFunctionBegin; 1114 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1115 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatScale_MPISBAIJ" 1121 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1122 { 1123 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1124 int ierr; 1125 1126 PetscFunctionBegin; 1127 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1128 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1134 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1135 { 1136 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1137 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1138 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1139 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1140 int *cmap,*idx_p,cstart = mat->cstart; 1141 1142 PetscFunctionBegin; 1143 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1144 mat->getrowactive = PETSC_TRUE; 1145 1146 if (!mat->rowvalues && (idx || v)) { 1147 /* 1148 allocate enough space to hold information from the longest row. 1149 */ 1150 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1151 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1152 int max = 1,mbs = mat->mbs,tmp; 1153 for (i=0; i<mbs; i++) { 1154 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1155 if (max < tmp) { max = tmp; } 1156 } 1157 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1158 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1159 } 1160 1161 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1162 lrow = row - brstart; /* local row index */ 1163 1164 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1165 if (!v) {pvA = 0; pvB = 0;} 1166 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1167 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1168 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1169 nztot = nzA + nzB; 1170 1171 cmap = mat->garray; 1172 if (v || idx) { 1173 if (nztot) { 1174 /* Sort by increasing column numbers, assuming A and B already sorted */ 1175 int imark = -1; 1176 if (v) { 1177 *v = v_p = mat->rowvalues; 1178 for (i=0; i<nzB; i++) { 1179 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1180 else break; 1181 } 1182 imark = i; 1183 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1184 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1185 } 1186 if (idx) { 1187 *idx = idx_p = mat->rowindices; 1188 if (imark > -1) { 1189 for (i=0; i<imark; i++) { 1190 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1191 } 1192 } else { 1193 for (i=0; i<nzB; i++) { 1194 if (cmap[cworkB[i]/bs] < cstart) 1195 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1196 else break; 1197 } 1198 imark = i; 1199 } 1200 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1201 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1202 } 1203 } else { 1204 if (idx) *idx = 0; 1205 if (v) *v = 0; 1206 } 1207 } 1208 *nz = nztot; 1209 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1210 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 #undef __FUNCT__ 1215 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1216 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1217 { 1218 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1219 1220 PetscFunctionBegin; 1221 if (baij->getrowactive == PETSC_FALSE) { 1222 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1223 } 1224 baij->getrowactive = PETSC_FALSE; 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1230 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1231 { 1232 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1233 1234 PetscFunctionBegin; 1235 *bs = baij->bs; 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1241 int MatZeroEntries_MPISBAIJ(Mat A) 1242 { 1243 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1244 int ierr; 1245 1246 PetscFunctionBegin; 1247 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1248 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1254 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1255 { 1256 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1257 Mat A = a->A,B = a->B; 1258 int ierr; 1259 PetscReal isend[5],irecv[5]; 1260 1261 PetscFunctionBegin; 1262 info->block_size = (PetscReal)a->bs; 1263 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1264 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1265 isend[3] = info->memory; isend[4] = info->mallocs; 1266 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1267 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1268 isend[3] += info->memory; isend[4] += info->mallocs; 1269 if (flag == MAT_LOCAL) { 1270 info->nz_used = isend[0]; 1271 info->nz_allocated = isend[1]; 1272 info->nz_unneeded = isend[2]; 1273 info->memory = isend[3]; 1274 info->mallocs = isend[4]; 1275 } else if (flag == MAT_GLOBAL_MAX) { 1276 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1277 info->nz_used = irecv[0]; 1278 info->nz_allocated = irecv[1]; 1279 info->nz_unneeded = irecv[2]; 1280 info->memory = irecv[3]; 1281 info->mallocs = irecv[4]; 1282 } else if (flag == MAT_GLOBAL_SUM) { 1283 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1284 info->nz_used = irecv[0]; 1285 info->nz_allocated = irecv[1]; 1286 info->nz_unneeded = irecv[2]; 1287 info->memory = irecv[3]; 1288 info->mallocs = irecv[4]; 1289 } else { 1290 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1291 } 1292 info->rows_global = (PetscReal)A->M; 1293 info->columns_global = (PetscReal)A->N; 1294 info->rows_local = (PetscReal)A->m; 1295 info->columns_local = (PetscReal)A->N; 1296 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1297 info->fill_ratio_needed = 0; 1298 info->factor_mallocs = 0; 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1304 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1305 { 1306 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1307 int ierr; 1308 1309 PetscFunctionBegin; 1310 switch (op) { 1311 case MAT_NO_NEW_NONZERO_LOCATIONS: 1312 case MAT_YES_NEW_NONZERO_LOCATIONS: 1313 case MAT_COLUMNS_UNSORTED: 1314 case MAT_COLUMNS_SORTED: 1315 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1316 case MAT_KEEP_ZEROED_ROWS: 1317 case MAT_NEW_NONZERO_LOCATION_ERR: 1318 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1319 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1320 break; 1321 case MAT_ROW_ORIENTED: 1322 a->roworiented = PETSC_TRUE; 1323 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1324 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1325 break; 1326 case MAT_ROWS_SORTED: 1327 case MAT_ROWS_UNSORTED: 1328 case MAT_YES_NEW_DIAGONALS: 1329 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1330 break; 1331 case MAT_COLUMN_ORIENTED: 1332 a->roworiented = PETSC_FALSE; 1333 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1334 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1335 break; 1336 case MAT_IGNORE_OFF_PROC_ENTRIES: 1337 a->donotstash = PETSC_TRUE; 1338 break; 1339 case MAT_NO_NEW_DIAGONALS: 1340 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1341 case MAT_USE_HASH_TABLE: 1342 a->ht_flag = PETSC_TRUE; 1343 break; 1344 case MAT_NOT_SYMMETRIC: 1345 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1346 case MAT_HERMITIAN: 1347 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1348 case MAT_SYMMETRIC: 1349 case MAT_STRUCTURALLY_SYMMETRIC: 1350 case MAT_NOT_HERMITIAN: 1351 case MAT_SYMMETRY_ETERNAL: 1352 case MAT_NOT_SYMMETRY_ETERNAL: 1353 break; 1354 default: 1355 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1356 } 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1362 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1363 { 1364 int ierr; 1365 PetscFunctionBegin; 1366 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1372 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1373 { 1374 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1375 Mat a = baij->A,b = baij->B; 1376 int ierr,s1,s2,s3; 1377 1378 PetscFunctionBegin; 1379 if (ll != rr) { 1380 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1381 } 1382 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1383 if (rr) { 1384 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1385 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1386 /* Overlap communication with computation. */ 1387 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1388 /*} if (ll) { */ 1389 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1390 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1391 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1392 /* } */ 1393 /* scale the diagonal block */ 1394 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1395 1396 /* if (rr) { */ 1397 /* Do a scatter end and then right scale the off-diagonal block */ 1398 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1399 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1400 } 1401 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1407 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag) 1408 { 1409 PetscFunctionBegin; 1410 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1415 int MatPrintHelp_MPISBAIJ(Mat A) 1416 { 1417 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1418 MPI_Comm comm = A->comm; 1419 static int called = 0; 1420 int ierr; 1421 1422 PetscFunctionBegin; 1423 if (!a->rank) { 1424 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1425 } 1426 if (called) {PetscFunctionReturn(0);} else called = 1; 1427 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1428 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1434 int MatSetUnfactored_MPISBAIJ(Mat A) 1435 { 1436 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1437 int ierr; 1438 1439 PetscFunctionBegin; 1440 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatEqual_MPISBAIJ" 1448 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1449 { 1450 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1451 Mat a,b,c,d; 1452 PetscTruth flg; 1453 int ierr; 1454 1455 PetscFunctionBegin; 1456 a = matA->A; b = matA->B; 1457 c = matB->A; d = matB->B; 1458 1459 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1460 if (flg == PETSC_TRUE) { 1461 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1462 } 1463 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1469 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1470 { 1471 int ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1480 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1481 { 1482 int i,ierr; 1483 PetscTruth flg; 1484 1485 for (i=0; i<n; i++) { 1486 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1487 if (!flg) { 1488 SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1489 } 1490 } 1491 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1492 PetscFunctionReturn(0); 1493 } 1494 1495 1496 /* -------------------------------------------------------------------*/ 1497 static struct _MatOps MatOps_Values = { 1498 MatSetValues_MPISBAIJ, 1499 MatGetRow_MPISBAIJ, 1500 MatRestoreRow_MPISBAIJ, 1501 MatMult_MPISBAIJ, 1502 /* 4*/ MatMultAdd_MPISBAIJ, 1503 MatMultTranspose_MPISBAIJ, 1504 MatMultTransposeAdd_MPISBAIJ, 1505 0, 1506 0, 1507 0, 1508 /*10*/ 0, 1509 0, 1510 0, 1511 MatRelax_MPISBAIJ, 1512 MatTranspose_MPISBAIJ, 1513 /*15*/ MatGetInfo_MPISBAIJ, 1514 MatEqual_MPISBAIJ, 1515 MatGetDiagonal_MPISBAIJ, 1516 MatDiagonalScale_MPISBAIJ, 1517 MatNorm_MPISBAIJ, 1518 /*20*/ MatAssemblyBegin_MPISBAIJ, 1519 MatAssemblyEnd_MPISBAIJ, 1520 0, 1521 MatSetOption_MPISBAIJ, 1522 MatZeroEntries_MPISBAIJ, 1523 /*25*/ MatZeroRows_MPISBAIJ, 1524 0, 1525 0, 1526 0, 1527 0, 1528 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1529 0, 1530 0, 1531 0, 1532 0, 1533 /*35*/ MatDuplicate_MPISBAIJ, 1534 0, 1535 0, 1536 0, 1537 0, 1538 /*40*/ 0, 1539 MatGetSubMatrices_MPISBAIJ, 1540 MatIncreaseOverlap_MPISBAIJ, 1541 MatGetValues_MPISBAIJ, 1542 0, 1543 /*45*/ MatPrintHelp_MPISBAIJ, 1544 MatScale_MPISBAIJ, 1545 0, 1546 0, 1547 0, 1548 /*50*/ MatGetBlockSize_MPISBAIJ, 1549 0, 1550 0, 1551 0, 1552 0, 1553 /*55*/ 0, 1554 0, 1555 MatSetUnfactored_MPISBAIJ, 1556 0, 1557 MatSetValuesBlocked_MPISBAIJ, 1558 /*60*/ 0, 1559 0, 1560 0, 1561 MatGetPetscMaps_Petsc, 1562 0, 1563 /*65*/ 0, 1564 0, 1565 0, 1566 0, 1567 0, 1568 /*70*/ MatGetRowMax_MPISBAIJ, 1569 0, 1570 0, 1571 0, 1572 0, 1573 /*75*/ 0, 1574 0, 1575 0, 1576 0, 1577 0, 1578 /*80*/ 0, 1579 0, 1580 0, 1581 0, 1582 /*85*/ MatLoad_MPISBAIJ 1583 }; 1584 1585 1586 EXTERN_C_BEGIN 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1589 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1590 { 1591 PetscFunctionBegin; 1592 *a = ((Mat_MPISBAIJ *)A->data)->A; 1593 *iscopy = PETSC_FALSE; 1594 PetscFunctionReturn(0); 1595 } 1596 EXTERN_C_END 1597 1598 EXTERN_C_BEGIN 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1601 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1602 { 1603 Mat_MPISBAIJ *b; 1604 int ierr,i,mbs,Mbs; 1605 1606 PetscFunctionBegin; 1607 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1608 1609 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1610 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1611 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1612 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1613 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1614 if (d_nnz) { 1615 for (i=0; i<B->m/bs; i++) { 1616 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1617 } 1618 } 1619 if (o_nnz) { 1620 for (i=0; i<B->m/bs; i++) { 1621 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1622 } 1623 } 1624 B->preallocated = PETSC_TRUE; 1625 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1626 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1627 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1628 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1629 1630 b = (Mat_MPISBAIJ*)B->data; 1631 mbs = B->m/bs; 1632 Mbs = B->M/bs; 1633 if (mbs*bs != B->m) { 1634 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1635 } 1636 1637 b->bs = bs; 1638 b->bs2 = bs*bs; 1639 b->mbs = mbs; 1640 b->nbs = mbs; 1641 b->Mbs = Mbs; 1642 b->Nbs = Mbs; 1643 1644 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1645 b->rowners[0] = 0; 1646 for (i=2; i<=b->size; i++) { 1647 b->rowners[i] += b->rowners[i-1]; 1648 } 1649 b->rstart = b->rowners[b->rank]; 1650 b->rend = b->rowners[b->rank+1]; 1651 b->cstart = b->rstart; 1652 b->cend = b->rend; 1653 for (i=0; i<=b->size; i++) { 1654 b->rowners_bs[i] = b->rowners[i]*bs; 1655 } 1656 b->rstart_bs = b-> rstart*bs; 1657 b->rend_bs = b->rend*bs; 1658 1659 b->cstart_bs = b->cstart*bs; 1660 b->cend_bs = b->cend*bs; 1661 1662 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr); 1663 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1664 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1665 PetscLogObjectParent(B,b->A); 1666 1667 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr); 1668 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1669 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1670 PetscLogObjectParent(B,b->B); 1671 1672 /* build cache for off array entries formed */ 1673 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1674 1675 PetscFunctionReturn(0); 1676 } 1677 EXTERN_C_END 1678 1679 /*MC 1680 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1681 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1682 1683 Options Database Keys: 1684 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1685 1686 Level: beginner 1687 1688 .seealso: MatCreateMPISBAIJ 1689 M*/ 1690 1691 EXTERN_C_BEGIN 1692 #undef __FUNCT__ 1693 #define __FUNCT__ "MatCreate_MPISBAIJ" 1694 int MatCreate_MPISBAIJ(Mat B) 1695 { 1696 Mat_MPISBAIJ *b; 1697 int ierr; 1698 PetscTruth flg; 1699 1700 PetscFunctionBegin; 1701 1702 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1703 B->data = (void*)b; 1704 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1705 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1706 1707 B->ops->destroy = MatDestroy_MPISBAIJ; 1708 B->ops->view = MatView_MPISBAIJ; 1709 B->mapping = 0; 1710 B->factor = 0; 1711 B->assembled = PETSC_FALSE; 1712 1713 B->insertmode = NOT_SET_VALUES; 1714 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1715 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1716 1717 /* build local table of row and column ownerships */ 1718 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1719 b->cowners = b->rowners + b->size + 2; 1720 b->rowners_bs = b->cowners + b->size + 2; 1721 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1722 1723 /* build cache for off array entries formed */ 1724 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1725 b->donotstash = PETSC_FALSE; 1726 b->colmap = PETSC_NULL; 1727 b->garray = PETSC_NULL; 1728 b->roworiented = PETSC_TRUE; 1729 1730 #if defined(PETSC_USE_MAT_SINGLE) 1731 /* stuff for MatSetValues_XXX in single precision */ 1732 b->setvalueslen = 0; 1733 b->setvaluescopy = PETSC_NULL; 1734 #endif 1735 1736 /* stuff used in block assembly */ 1737 b->barray = 0; 1738 1739 /* stuff used for matrix vector multiply */ 1740 b->lvec = 0; 1741 b->Mvctx = 0; 1742 b->slvec0 = 0; 1743 b->slvec0b = 0; 1744 b->slvec1 = 0; 1745 b->slvec1a = 0; 1746 b->slvec1b = 0; 1747 b->sMvctx = 0; 1748 1749 /* stuff for MatGetRow() */ 1750 b->rowindices = 0; 1751 b->rowvalues = 0; 1752 b->getrowactive = PETSC_FALSE; 1753 1754 /* hash table stuff */ 1755 b->ht = 0; 1756 b->hd = 0; 1757 b->ht_size = 0; 1758 b->ht_flag = PETSC_FALSE; 1759 b->ht_fact = 0; 1760 b->ht_total_ct = 0; 1761 b->ht_insert_ct = 0; 1762 1763 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1764 if (flg) { 1765 PetscReal fact = 1.39; 1766 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1767 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1768 if (fact <= 1.0) fact = 1.39; 1769 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1770 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1771 } 1772 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1773 "MatStoreValues_MPISBAIJ", 1774 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1775 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1776 "MatRetrieveValues_MPISBAIJ", 1777 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1778 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1779 "MatGetDiagonalBlock_MPISBAIJ", 1780 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1781 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1782 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1783 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1784 B->symmetric = PETSC_TRUE; 1785 B->structurally_symmetric = PETSC_TRUE; 1786 B->symmetric_set = PETSC_TRUE; 1787 B->structurally_symmetric_set = PETSC_TRUE; 1788 PetscFunctionReturn(0); 1789 } 1790 EXTERN_C_END 1791 1792 /*MC 1793 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1794 1795 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1796 and MATMPISBAIJ otherwise. 1797 1798 Options Database Keys: 1799 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1800 1801 Level: beginner 1802 1803 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1804 M*/ 1805 1806 EXTERN_C_BEGIN 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatCreate_SBAIJ" 1809 int MatCreate_SBAIJ(Mat A) { 1810 int ierr,size; 1811 1812 PetscFunctionBegin; 1813 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1814 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1815 if (size == 1) { 1816 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1817 } else { 1818 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1819 } 1820 PetscFunctionReturn(0); 1821 } 1822 EXTERN_C_END 1823 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1826 /*@C 1827 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1828 the user should preallocate the matrix storage by setting the parameters 1829 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1830 performance can be increased by more than a factor of 50. 1831 1832 Collective on Mat 1833 1834 Input Parameters: 1835 + A - the matrix 1836 . bs - size of blockk 1837 . d_nz - number of block nonzeros per block row in diagonal portion of local 1838 submatrix (same for all local rows) 1839 . d_nnz - array containing the number of block nonzeros in the various block rows 1840 in the upper triangular and diagonal part of the in diagonal portion of the local 1841 (possibly different for each block row) or PETSC_NULL. You must leave room 1842 for the diagonal entry even if it is zero. 1843 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1844 submatrix (same for all local rows). 1845 - o_nnz - array containing the number of nonzeros in the various block rows of the 1846 off-diagonal portion of the local submatrix (possibly different for 1847 each block row) or PETSC_NULL. 1848 1849 1850 Options Database Keys: 1851 . -mat_no_unroll - uses code that does not unroll the loops in the 1852 block calculations (much slower) 1853 . -mat_block_size - size of the blocks to use 1854 1855 Notes: 1856 1857 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1858 than it must be used on all processors that share the object for that argument. 1859 1860 Storage Information: 1861 For a square global matrix we define each processor's diagonal portion 1862 to be its local rows and the corresponding columns (a square submatrix); 1863 each processor's off-diagonal portion encompasses the remainder of the 1864 local matrix (a rectangular submatrix). 1865 1866 The user can specify preallocated storage for the diagonal part of 1867 the local submatrix with either d_nz or d_nnz (not both). Set 1868 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1869 memory allocation. Likewise, specify preallocated storage for the 1870 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1871 1872 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1873 the figure below we depict these three local rows and all columns (0-11). 1874 1875 .vb 1876 0 1 2 3 4 5 6 7 8 9 10 11 1877 ------------------- 1878 row 3 | o o o d d d o o o o o o 1879 row 4 | o o o d d d o o o o o o 1880 row 5 | o o o d d d o o o o o o 1881 ------------------- 1882 .ve 1883 1884 Thus, any entries in the d locations are stored in the d (diagonal) 1885 submatrix, and any entries in the o locations are stored in the 1886 o (off-diagonal) submatrix. Note that the d matrix is stored in 1887 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1888 1889 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1890 plus the diagonal part of the d matrix, 1891 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1892 In general, for PDE problems in which most nonzeros are near the diagonal, 1893 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1894 or you will get TERRIBLE performance; see the users' manual chapter on 1895 matrices. 1896 1897 Level: intermediate 1898 1899 .keywords: matrix, block, aij, compressed row, sparse, parallel 1900 1901 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1902 @*/ 1903 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1904 { 1905 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 1906 1907 PetscFunctionBegin; 1908 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1909 if (f) { 1910 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1911 } 1912 PetscFunctionReturn(0); 1913 } 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatCreateMPISBAIJ" 1917 /*@C 1918 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1919 (block compressed row). For good matrix assembly performance 1920 the user should preallocate the matrix storage by setting the parameters 1921 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1922 performance can be increased by more than a factor of 50. 1923 1924 Collective on MPI_Comm 1925 1926 Input Parameters: 1927 + comm - MPI communicator 1928 . bs - size of blockk 1929 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1930 This value should be the same as the local size used in creating the 1931 y vector for the matrix-vector product y = Ax. 1932 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1933 This value should be the same as the local size used in creating the 1934 x vector for the matrix-vector product y = Ax. 1935 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1936 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1937 . d_nz - number of block nonzeros per block row in diagonal portion of local 1938 submatrix (same for all local rows) 1939 . d_nnz - array containing the number of block nonzeros in the various block rows 1940 in the upper triangular portion of the in diagonal portion of the local 1941 (possibly different for each block block row) or PETSC_NULL. 1942 You must leave room for the diagonal entry even if it is zero. 1943 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1944 submatrix (same for all local rows). 1945 - o_nnz - array containing the number of nonzeros in the various block rows of the 1946 off-diagonal portion of the local submatrix (possibly different for 1947 each block row) or PETSC_NULL. 1948 1949 Output Parameter: 1950 . A - the matrix 1951 1952 Options Database Keys: 1953 . -mat_no_unroll - uses code that does not unroll the loops in the 1954 block calculations (much slower) 1955 . -mat_block_size - size of the blocks to use 1956 . -mat_mpi - use the parallel matrix data structures even on one processor 1957 (defaults to using SeqBAIJ format on one processor) 1958 1959 Notes: 1960 The user MUST specify either the local or global matrix dimensions 1961 (possibly both). 1962 1963 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1964 than it must be used on all processors that share the object for that argument. 1965 1966 Storage Information: 1967 For a square global matrix we define each processor's diagonal portion 1968 to be its local rows and the corresponding columns (a square submatrix); 1969 each processor's off-diagonal portion encompasses the remainder of the 1970 local matrix (a rectangular submatrix). 1971 1972 The user can specify preallocated storage for the diagonal part of 1973 the local submatrix with either d_nz or d_nnz (not both). Set 1974 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1975 memory allocation. Likewise, specify preallocated storage for the 1976 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1977 1978 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1979 the figure below we depict these three local rows and all columns (0-11). 1980 1981 .vb 1982 0 1 2 3 4 5 6 7 8 9 10 11 1983 ------------------- 1984 row 3 | o o o d d d o o o o o o 1985 row 4 | o o o d d d o o o o o o 1986 row 5 | o o o d d d o o o o o o 1987 ------------------- 1988 .ve 1989 1990 Thus, any entries in the d locations are stored in the d (diagonal) 1991 submatrix, and any entries in the o locations are stored in the 1992 o (off-diagonal) submatrix. Note that the d matrix is stored in 1993 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1994 1995 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1996 plus the diagonal part of the d matrix, 1997 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1998 In general, for PDE problems in which most nonzeros are near the diagonal, 1999 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2000 or you will get TERRIBLE performance; see the users' manual chapter on 2001 matrices. 2002 2003 Level: intermediate 2004 2005 .keywords: matrix, block, aij, compressed row, sparse, parallel 2006 2007 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2008 @*/ 2009 2010 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2011 { 2012 int ierr,size; 2013 2014 PetscFunctionBegin; 2015 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2016 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2017 if (size > 1) { 2018 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2019 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2020 } else { 2021 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2022 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2023 } 2024 PetscFunctionReturn(0); 2025 } 2026 2027 2028 #undef __FUNCT__ 2029 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2030 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2031 { 2032 Mat mat; 2033 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2034 int ierr,len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs; 2035 PetscScalar *array; 2036 2037 PetscFunctionBegin; 2038 *newmat = 0; 2039 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2040 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2041 2042 mat->factor = matin->factor; 2043 mat->preallocated = PETSC_TRUE; 2044 mat->assembled = PETSC_TRUE; 2045 mat->insertmode = NOT_SET_VALUES; 2046 2047 a = (Mat_MPISBAIJ*)mat->data; 2048 a->bs = oldmat->bs; 2049 a->bs2 = oldmat->bs2; 2050 a->mbs = oldmat->mbs; 2051 a->nbs = oldmat->nbs; 2052 a->Mbs = oldmat->Mbs; 2053 a->Nbs = oldmat->Nbs; 2054 2055 a->rstart = oldmat->rstart; 2056 a->rend = oldmat->rend; 2057 a->cstart = oldmat->cstart; 2058 a->cend = oldmat->cend; 2059 a->size = oldmat->size; 2060 a->rank = oldmat->rank; 2061 a->donotstash = oldmat->donotstash; 2062 a->roworiented = oldmat->roworiented; 2063 a->rowindices = 0; 2064 a->rowvalues = 0; 2065 a->getrowactive = PETSC_FALSE; 2066 a->barray = 0; 2067 a->rstart_bs = oldmat->rstart_bs; 2068 a->rend_bs = oldmat->rend_bs; 2069 a->cstart_bs = oldmat->cstart_bs; 2070 a->cend_bs = oldmat->cend_bs; 2071 2072 /* hash table stuff */ 2073 a->ht = 0; 2074 a->hd = 0; 2075 a->ht_size = 0; 2076 a->ht_flag = oldmat->ht_flag; 2077 a->ht_fact = oldmat->ht_fact; 2078 a->ht_total_ct = 0; 2079 a->ht_insert_ct = 0; 2080 2081 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2082 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2083 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2084 if (oldmat->colmap) { 2085 #if defined (PETSC_USE_CTABLE) 2086 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2087 #else 2088 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2089 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2090 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2091 #endif 2092 } else a->colmap = 0; 2093 2094 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2095 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2096 PetscLogObjectMemory(mat,len*sizeof(int)); 2097 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2098 } else a->garray = 0; 2099 2100 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2101 PetscLogObjectParent(mat,a->lvec); 2102 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2103 PetscLogObjectParent(mat,a->Mvctx); 2104 2105 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2106 PetscLogObjectParent(mat,a->slvec0); 2107 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2108 PetscLogObjectParent(mat,a->slvec1); 2109 2110 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2111 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2112 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2113 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2114 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2115 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2116 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2117 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2118 PetscLogObjectParent(mat,a->slvec0); 2119 PetscLogObjectParent(mat,a->slvec1); 2120 PetscLogObjectParent(mat,a->slvec0b); 2121 PetscLogObjectParent(mat,a->slvec1a); 2122 PetscLogObjectParent(mat,a->slvec1b); 2123 2124 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2125 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2126 a->sMvctx = oldmat->sMvctx; 2127 PetscLogObjectParent(mat,a->sMvctx); 2128 2129 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2130 PetscLogObjectParent(mat,a->A); 2131 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2132 PetscLogObjectParent(mat,a->B); 2133 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2134 *newmat = mat; 2135 PetscFunctionReturn(0); 2136 } 2137 2138 #include "petscsys.h" 2139 2140 #undef __FUNCT__ 2141 #define __FUNCT__ "MatLoad_MPISBAIJ" 2142 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2143 { 2144 Mat A; 2145 int i,nz,ierr,j,rstart,rend,fd; 2146 PetscScalar *vals,*buf; 2147 MPI_Comm comm = ((PetscObject)viewer)->comm; 2148 MPI_Status status; 2149 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2150 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2151 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2152 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2153 int dcount,kmax,k,nzcount,tmp; 2154 2155 PetscFunctionBegin; 2156 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2157 2158 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2159 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2160 if (!rank) { 2161 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2162 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2163 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2164 if (header[3] < 0) { 2165 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2166 } 2167 } 2168 2169 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2170 M = header[1]; N = header[2]; 2171 2172 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2173 2174 /* 2175 This code adds extra rows to make sure the number of rows is 2176 divisible by the blocksize 2177 */ 2178 Mbs = M/bs; 2179 extra_rows = bs - M + bs*(Mbs); 2180 if (extra_rows == bs) extra_rows = 0; 2181 else Mbs++; 2182 if (extra_rows &&!rank) { 2183 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2184 } 2185 2186 /* determine ownership of all rows */ 2187 mbs = Mbs/size + ((Mbs % size) > rank); 2188 m = mbs*bs; 2189 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2190 browners = rowners + size + 1; 2191 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2192 rowners[0] = 0; 2193 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2194 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2195 rstart = rowners[rank]; 2196 rend = rowners[rank+1]; 2197 2198 /* distribute row lengths to all processors */ 2199 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2200 if (!rank) { 2201 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2202 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2203 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2204 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2205 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2206 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2207 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2208 } else { 2209 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2210 } 2211 2212 if (!rank) { /* procs[0] */ 2213 /* calculate the number of nonzeros on each processor */ 2214 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2215 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2216 for (i=0; i<size; i++) { 2217 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2218 procsnz[i] += rowlengths[j]; 2219 } 2220 } 2221 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2222 2223 /* determine max buffer needed and allocate it */ 2224 maxnz = 0; 2225 for (i=0; i<size; i++) { 2226 maxnz = PetscMax(maxnz,procsnz[i]); 2227 } 2228 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2229 2230 /* read in my part of the matrix column indices */ 2231 nz = procsnz[0]; 2232 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2233 mycols = ibuf; 2234 if (size == 1) nz -= extra_rows; 2235 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2236 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2237 2238 /* read in every ones (except the last) and ship off */ 2239 for (i=1; i<size-1; i++) { 2240 nz = procsnz[i]; 2241 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2242 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2243 } 2244 /* read in the stuff for the last proc */ 2245 if (size != 1) { 2246 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2247 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2248 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2249 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2250 } 2251 ierr = PetscFree(cols);CHKERRQ(ierr); 2252 } else { /* procs[i], i>0 */ 2253 /* determine buffer space needed for message */ 2254 nz = 0; 2255 for (i=0; i<m; i++) { 2256 nz += locrowlens[i]; 2257 } 2258 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2259 mycols = ibuf; 2260 /* receive message of column indices*/ 2261 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2262 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2263 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2264 } 2265 2266 /* loop over local rows, determining number of off diagonal entries */ 2267 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2268 odlens = dlens + (rend-rstart); 2269 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2270 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2271 masked1 = mask + Mbs; 2272 masked2 = masked1 + Mbs; 2273 rowcount = 0; nzcount = 0; 2274 for (i=0; i<mbs; i++) { 2275 dcount = 0; 2276 odcount = 0; 2277 for (j=0; j<bs; j++) { 2278 kmax = locrowlens[rowcount]; 2279 for (k=0; k<kmax; k++) { 2280 tmp = mycols[nzcount++]/bs; /* block col. index */ 2281 if (!mask[tmp]) { 2282 mask[tmp] = 1; 2283 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2284 else masked1[dcount++] = tmp; /* entry in diag portion */ 2285 } 2286 } 2287 rowcount++; 2288 } 2289 2290 dlens[i] = dcount; /* d_nzz[i] */ 2291 odlens[i] = odcount; /* o_nzz[i] */ 2292 2293 /* zero out the mask elements we set */ 2294 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2295 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2296 } 2297 2298 /* create our matrix */ 2299 ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr); 2300 ierr = MatSetType(A,type);CHKERRQ(ierr); 2301 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2302 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2303 2304 if (!rank) { 2305 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2306 /* read in my part of the matrix numerical values */ 2307 nz = procsnz[0]; 2308 vals = buf; 2309 mycols = ibuf; 2310 if (size == 1) nz -= extra_rows; 2311 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2312 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2313 2314 /* insert into matrix */ 2315 jj = rstart*bs; 2316 for (i=0; i<m; i++) { 2317 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2318 mycols += locrowlens[i]; 2319 vals += locrowlens[i]; 2320 jj++; 2321 } 2322 2323 /* read in other processors (except the last one) and ship out */ 2324 for (i=1; i<size-1; i++) { 2325 nz = procsnz[i]; 2326 vals = buf; 2327 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2328 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2329 } 2330 /* the last proc */ 2331 if (size != 1){ 2332 nz = procsnz[i] - extra_rows; 2333 vals = buf; 2334 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2335 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2336 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2337 } 2338 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2339 2340 } else { 2341 /* receive numeric values */ 2342 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2343 2344 /* receive message of values*/ 2345 vals = buf; 2346 mycols = ibuf; 2347 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2348 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2349 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2350 2351 /* insert into matrix */ 2352 jj = rstart*bs; 2353 for (i=0; i<m; i++) { 2354 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2355 mycols += locrowlens[i]; 2356 vals += locrowlens[i]; 2357 jj++; 2358 } 2359 } 2360 2361 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2362 ierr = PetscFree(buf);CHKERRQ(ierr); 2363 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2364 ierr = PetscFree(rowners);CHKERRQ(ierr); 2365 ierr = PetscFree(dlens);CHKERRQ(ierr); 2366 ierr = PetscFree(mask);CHKERRQ(ierr); 2367 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2368 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2369 *newmat = A; 2370 PetscFunctionReturn(0); 2371 } 2372 2373 #undef __FUNCT__ 2374 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2375 /*@ 2376 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2377 2378 Input Parameters: 2379 . mat - the matrix 2380 . fact - factor 2381 2382 Collective on Mat 2383 2384 Level: advanced 2385 2386 Notes: 2387 This can also be set by the command line option: -mat_use_hash_table fact 2388 2389 .keywords: matrix, hashtable, factor, HT 2390 2391 .seealso: MatSetOption() 2392 @*/ 2393 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2394 { 2395 PetscFunctionBegin; 2396 SETERRQ(1,"Function not yet written for SBAIJ format"); 2397 /* PetscFunctionReturn(0); */ 2398 } 2399 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2402 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2403 { 2404 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2405 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2406 PetscReal atmp; 2407 PetscReal *work,*svalues,*rvalues; 2408 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2409 int rank,size,*rowners_bs,dest,count,source; 2410 PetscScalar *va; 2411 MatScalar *ba; 2412 MPI_Status stat; 2413 2414 PetscFunctionBegin; 2415 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2416 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2417 2418 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2419 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2420 2421 bs = a->bs; 2422 mbs = a->mbs; 2423 Mbs = a->Mbs; 2424 ba = b->a; 2425 bi = b->i; 2426 bj = b->j; 2427 /* 2428 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2429 PetscSynchronizedFlush(A->comm); 2430 */ 2431 2432 /* find ownerships */ 2433 rowners_bs = a->rowners_bs; 2434 /* 2435 if (!rank){ 2436 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2437 } 2438 */ 2439 2440 /* each proc creates an array to be distributed */ 2441 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2442 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2443 2444 /* row_max for B */ 2445 if (rank != size-1){ 2446 for (i=0; i<mbs; i++) { 2447 ncols = bi[1] - bi[0]; bi++; 2448 brow = bs*i; 2449 for (j=0; j<ncols; j++){ 2450 bcol = bs*(*bj); 2451 for (kcol=0; kcol<bs; kcol++){ 2452 col = bcol + kcol; /* local col index */ 2453 col += rowners_bs[rank+1]; /* global col index */ 2454 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2455 for (krow=0; krow<bs; krow++){ 2456 atmp = PetscAbsScalar(*ba); ba++; 2457 row = brow + krow; /* local row index */ 2458 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2459 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2460 if (work[col] < atmp) work[col] = atmp; 2461 } 2462 } 2463 bj++; 2464 } 2465 } 2466 /* 2467 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2468 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2469 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2470 */ 2471 2472 /* send values to its owners */ 2473 for (dest=rank+1; dest<size; dest++){ 2474 svalues = work + rowners_bs[dest]; 2475 count = rowners_bs[dest+1]-rowners_bs[dest]; 2476 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2477 /* 2478 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2479 PetscSynchronizedFlush(A->comm); 2480 */ 2481 } 2482 } 2483 2484 /* receive values */ 2485 if (rank){ 2486 rvalues = work; 2487 count = rowners_bs[rank+1]-rowners_bs[rank]; 2488 for (source=0; source<rank; source++){ 2489 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2490 /* process values */ 2491 for (i=0; i<count; i++){ 2492 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2493 } 2494 /* 2495 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2496 PetscSynchronizedFlush(A->comm); 2497 */ 2498 } 2499 } 2500 2501 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2502 ierr = PetscFree(work);CHKERRQ(ierr); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 #undef __FUNCT__ 2507 #define __FUNCT__ "MatRelax_MPISBAIJ" 2508 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2509 { 2510 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2511 int ierr,mbs=mat->mbs,bs=mat->bs; 2512 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2513 Vec bb1; 2514 2515 PetscFunctionBegin; 2516 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2517 if (bs > 1) 2518 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2519 2520 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2521 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2522 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2523 its--; 2524 } 2525 2526 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2527 while (its--){ 2528 2529 /* lower triangular part: slvec0b = - B^T*xx */ 2530 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2531 2532 /* copy xx into slvec0a */ 2533 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2534 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2535 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2536 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2537 2538 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2539 2540 /* copy bb into slvec1a */ 2541 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2542 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2543 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2544 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2545 2546 /* set slvec1b = 0 */ 2547 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2548 2549 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2550 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2551 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2552 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2553 2554 /* upper triangular part: bb1 = bb1 - B*x */ 2555 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2556 2557 /* local diagonal sweep */ 2558 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2559 } 2560 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2561 } else { 2562 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2563 } 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2569 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2570 { 2571 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2572 int ierr; 2573 PetscScalar mone=-1.0; 2574 Vec lvec1,bb1; 2575 2576 PetscFunctionBegin; 2577 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2578 if (mat->bs > 1) 2579 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2580 2581 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2582 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2583 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2584 its--; 2585 } 2586 2587 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2588 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2589 while (its--){ 2590 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2591 2592 /* lower diagonal part: bb1 = bb - B^T*xx */ 2593 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2594 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2595 2596 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2597 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2598 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2599 2600 /* upper diagonal part: bb1 = bb1 - B*x */ 2601 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2602 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2603 2604 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2605 2606 /* diagonal sweep */ 2607 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2608 } 2609 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2610 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2611 } else { 2612 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2613 } 2614 PetscFunctionReturn(0); 2615 } 2616 2617