1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 extern int MatSetUpMultiply_MPISBAIJ(Mat); 8 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern int DisAssemble_MPISBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS[],int); 11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 18 extern int MatPrintHelp_SeqSBAIJ(Mat); 19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 int MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 int ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 int MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 int ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 103 if (nrow >= rmax) { \ 104 /* there is no extra room in row, therefore enlarge */ \ 105 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 106 MatScalar *new_a; \ 107 \ 108 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 109 \ 110 /* malloc new storage space */ \ 111 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 112 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 113 new_j = (int*)(new_a + bs2*new_nz); \ 114 new_i = new_j + new_nz; \ 115 \ 116 /* copy over old data into new slots */ \ 117 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 118 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 119 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 120 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 121 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 122 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 124 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 125 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 126 /* free up old matrix storage */ \ 127 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 128 if (!a->singlemalloc) { \ 129 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 130 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 131 } \ 132 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 133 a->singlemalloc = PETSC_TRUE; \ 134 \ 135 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 136 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 137 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 138 a->maxnz += bs2*CHUNKSIZE; \ 139 a->reallocs++; \ 140 a->nz++; \ 141 } \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 a_noinsert:; \ 152 ailen[brow] = nrow; \ 153 } 154 #ifndef MatSetValues_SeqBAIJ_B_Private 155 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 156 { \ 157 brow = row/bs; \ 158 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 159 rmax = bimax[brow]; nrow = bilen[brow]; \ 160 bcol = col/bs; \ 161 ridx = row % bs; cidx = col % bs; \ 162 low = 0; high = nrow; \ 163 while (high-low > 3) { \ 164 t = (low+high)/2; \ 165 if (rp[t] > bcol) high = t; \ 166 else low = t; \ 167 } \ 168 for (_i=low; _i<high; _i++) { \ 169 if (rp[_i] > bcol) break; \ 170 if (rp[_i] == bcol) { \ 171 bap = ap + bs2*_i + bs*cidx + ridx; \ 172 if (addv == ADD_VALUES) *bap += value; \ 173 else *bap = value; \ 174 goto b_noinsert; \ 175 } \ 176 } \ 177 if (b->nonew == 1) goto b_noinsert; \ 178 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 179 if (nrow >= rmax) { \ 180 /* there is no extra room in row, therefore enlarge */ \ 181 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 182 MatScalar *new_a; \ 183 \ 184 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 185 \ 186 /* malloc new storage space */ \ 187 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 188 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 189 new_j = (int*)(new_a + bs2*new_nz); \ 190 new_i = new_j + new_nz; \ 191 \ 192 /* copy over old data into new slots */ \ 193 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 194 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 195 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 196 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 197 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 198 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 199 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 200 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 201 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 202 /* free up old matrix storage */ \ 203 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 204 if (!b->singlemalloc) { \ 205 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 206 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 207 } \ 208 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 209 b->singlemalloc = PETSC_TRUE; \ 210 \ 211 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 212 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 213 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 214 b->maxnz += bs2*CHUNKSIZE; \ 215 b->reallocs++; \ 216 b->nz++; \ 217 } \ 218 N = nrow++ - 1; \ 219 /* shift up all the later entries in this row */ \ 220 for (ii=N; ii>=_i; ii--) { \ 221 rp[ii+1] = rp[ii]; \ 222 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 223 } \ 224 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 225 rp[_i] = bcol; \ 226 ap[bs2*_i + bs*cidx + ridx] = value; \ 227 b_noinsert:; \ 228 bilen[brow] = nrow; \ 229 } 230 #endif 231 232 #if defined(PETSC_USE_MAT_SINGLE) 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatSetValues_MPISBAIJ" 235 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 236 { 237 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 238 int ierr,i,N = m*n; 239 MatScalar *vsingle; 240 241 PetscFunctionBegin; 242 if (N > b->setvalueslen) { 243 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 244 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 245 b->setvalueslen = N; 246 } 247 vsingle = b->setvaluescopy; 248 249 for (i=0; i<N; i++) { 250 vsingle[i] = v[i]; 251 } 252 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 258 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 259 { 260 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 261 int ierr,i,N = m*n*b->bs2; 262 MatScalar *vsingle; 263 264 PetscFunctionBegin; 265 if (N > b->setvalueslen) { 266 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 267 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 268 b->setvalueslen = N; 269 } 270 vsingle = b->setvaluescopy; 271 for (i=0; i<N; i++) { 272 vsingle[i] = v[i]; 273 } 274 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 280 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 281 { 282 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 283 int ierr,i,N = m*n; 284 MatScalar *vsingle; 285 286 PetscFunctionBegin; 287 SETERRQ(1,"Function not yet written for SBAIJ format"); 288 /* PetscFunctionReturn(0); */ 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 293 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 294 { 295 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 296 int ierr,i,N = m*n*b->bs2; 297 MatScalar *vsingle; 298 299 PetscFunctionBegin; 300 SETERRQ(1,"Function not yet written for SBAIJ format"); 301 /* PetscFunctionReturn(0); */ 302 } 303 #endif 304 305 /* Only add/insert a(i,j) with i<=j (blocks). 306 Any a(i,j) with i>j input by user is ingored. 307 */ 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 310 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 311 { 312 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 313 MatScalar value; 314 PetscTruth roworiented = baij->roworiented; 315 int ierr,i,j,row,col; 316 int rstart_orig=baij->rstart_bs; 317 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 318 int cend_orig=baij->cend_bs,bs=baij->bs; 319 320 /* Some Variables required in the macro */ 321 Mat A = baij->A; 322 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 323 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 324 MatScalar *aa=a->a; 325 326 Mat B = baij->B; 327 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 328 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 329 MatScalar *ba=b->a; 330 331 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 332 int low,high,t,ridx,cidx,bs2=a->bs2; 333 MatScalar *ap,*bap; 334 335 /* for stash */ 336 int n_loc, *in_loc=0; 337 MatScalar *v_loc=0; 338 339 PetscFunctionBegin; 340 341 if(!baij->donotstash){ 342 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 343 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 344 } 345 346 for (i=0; i<m; i++) { 347 if (im[i] < 0) continue; 348 #if defined(PETSC_USE_BOPT_g) 349 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 350 #endif 351 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 352 row = im[i] - rstart_orig; /* local row index */ 353 for (j=0; j<n; j++) { 354 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 355 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 356 col = in[j] - cstart_orig; /* local col index */ 357 brow = row/bs; bcol = col/bs; 358 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 359 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 360 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 361 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 362 } else if (in[j] < 0) continue; 363 #if defined(PETSC_USE_BOPT_g) 364 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 365 #endif 366 else { /* off-diag entry (B) */ 367 if (mat->was_assembled) { 368 if (!baij->colmap) { 369 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 370 } 371 #if defined (PETSC_USE_CTABLE) 372 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 373 col = col - 1; 374 #else 375 col = baij->colmap[in[j]/bs] - 1; 376 #endif 377 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 378 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 379 col = in[j]; 380 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 381 B = baij->B; 382 b = (Mat_SeqBAIJ*)(B)->data; 383 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 384 ba=b->a; 385 } else col += in[j]%bs; 386 } else col = in[j]; 387 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 388 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 389 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 390 } 391 } 392 } else { /* off processor entry */ 393 if (!baij->donotstash) { 394 n_loc = 0; 395 for (j=0; j<n; j++){ 396 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 397 in_loc[n_loc] = in[j]; 398 if (roworiented) { 399 v_loc[n_loc] = v[i*n+j]; 400 } else { 401 v_loc[n_loc] = v[j*m+i]; 402 } 403 n_loc++; 404 } 405 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 406 } 407 } 408 } 409 410 if(!baij->donotstash){ 411 ierr = PetscFree(in_loc);CHKERRQ(ierr); 412 ierr = PetscFree(v_loc);CHKERRQ(ierr); 413 } 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 419 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 420 { 421 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 422 const MatScalar *value; 423 MatScalar *barray=baij->barray; 424 PetscTruth roworiented = baij->roworiented; 425 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 426 int rend=baij->rend,cstart=baij->cstart,stepval; 427 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 428 429 PetscFunctionBegin; 430 if(!barray) { 431 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 432 baij->barray = barray; 433 } 434 435 if (roworiented) { 436 stepval = (n-1)*bs; 437 } else { 438 stepval = (m-1)*bs; 439 } 440 for (i=0; i<m; i++) { 441 if (im[i] < 0) continue; 442 #if defined(PETSC_USE_BOPT_g) 443 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 444 #endif 445 if (im[i] >= rstart && im[i] < rend) { 446 row = im[i] - rstart; 447 for (j=0; j<n; j++) { 448 /* If NumCol = 1 then a copy is not required */ 449 if ((roworiented) && (n == 1)) { 450 barray = (MatScalar*) v + i*bs2; 451 } else if((!roworiented) && (m == 1)) { 452 barray = (MatScalar*) v + j*bs2; 453 } else { /* Here a copy is required */ 454 if (roworiented) { 455 value = v + i*(stepval+bs)*bs + j*bs; 456 } else { 457 value = v + j*(stepval+bs)*bs + i*bs; 458 } 459 for (ii=0; ii<bs; ii++,value+=stepval) { 460 for (jj=0; jj<bs; jj++) { 461 *barray++ = *value++; 462 } 463 } 464 barray -=bs2; 465 } 466 467 if (in[j] >= cstart && in[j] < cend){ 468 col = in[j] - cstart; 469 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 470 } 471 else if (in[j] < 0) continue; 472 #if defined(PETSC_USE_BOPT_g) 473 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 474 #endif 475 else { 476 if (mat->was_assembled) { 477 if (!baij->colmap) { 478 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 479 } 480 481 #if defined(PETSC_USE_BOPT_g) 482 #if defined (PETSC_USE_CTABLE) 483 { int data; 484 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 485 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 486 } 487 #else 488 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 489 #endif 490 #endif 491 #if defined (PETSC_USE_CTABLE) 492 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 493 col = (col - 1)/bs; 494 #else 495 col = (baij->colmap[in[j]] - 1)/bs; 496 #endif 497 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 498 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 499 col = in[j]; 500 } 501 } 502 else col = in[j]; 503 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 504 } 505 } 506 } else { 507 if (!baij->donotstash) { 508 if (roworiented) { 509 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 510 } else { 511 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 512 } 513 } 514 } 515 } 516 PetscFunctionReturn(0); 517 } 518 519 #define HASH_KEY 0.6180339887 520 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 521 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 522 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 525 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 526 { 527 PetscFunctionBegin; 528 SETERRQ(1,"Function not yet written for SBAIJ format"); 529 /* PetscFunctionReturn(0); */ 530 } 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 534 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 535 { 536 PetscFunctionBegin; 537 SETERRQ(1,"Function not yet written for SBAIJ format"); 538 /* PetscFunctionReturn(0); */ 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatGetValues_MPISBAIJ" 543 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 544 { 545 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 546 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 547 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 552 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 553 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 554 row = idxm[i] - bsrstart; 555 for (j=0; j<n; j++) { 556 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]); 557 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 558 if (idxn[j] >= bscstart && idxn[j] < bscend){ 559 col = idxn[j] - bscstart; 560 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 561 } else { 562 if (!baij->colmap) { 563 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 564 } 565 #if defined (PETSC_USE_CTABLE) 566 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 567 data --; 568 #else 569 data = baij->colmap[idxn[j]/bs]-1; 570 #endif 571 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 572 else { 573 col = data + idxn[j]%bs; 574 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 575 } 576 } 577 } 578 } else { 579 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 580 } 581 } 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatNorm_MPISBAIJ" 587 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 588 { 589 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 590 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 591 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 592 int ierr; 593 PetscReal sum[2],*lnorm2; 594 595 PetscFunctionBegin; 596 if (baij->size == 1) { 597 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 598 } else { 599 if (type == NORM_FROBENIUS) { 600 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 601 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 602 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 603 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 605 /* 606 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 607 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 608 */ 609 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 610 /* 611 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 612 PetscSynchronizedFlush(mat->comm); */ 613 614 *norm = sqrt(sum[0] + 2*sum[1]); 615 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 616 } else { 617 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 /* 624 Creates the hash table, and sets the table 625 This table is created only once. 626 If new entried need to be added to the matrix 627 then the hash table has to be destroyed and 628 recreated. 629 */ 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 632 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 633 { 634 PetscFunctionBegin; 635 SETERRQ(1,"Function not yet written for SBAIJ format"); 636 /* PetscFunctionReturn(0); */ 637 } 638 639 #undef __FUNCT__ 640 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 641 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 642 { 643 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 644 int ierr,nstash,reallocs; 645 InsertMode addv; 646 647 PetscFunctionBegin; 648 if (baij->donotstash) { 649 PetscFunctionReturn(0); 650 } 651 652 /* make sure all processors are either in INSERTMODE or ADDMODE */ 653 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 654 if (addv == (ADD_VALUES|INSERT_VALUES)) { 655 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 656 } 657 mat->insertmode = addv; /* in case this processor had no cache */ 658 659 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 660 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 663 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 664 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNCT__ 669 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 670 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 673 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 674 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 675 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 676 int *row,*col,other_disassembled; 677 PetscTruth r1,r2,r3; 678 MatScalar *val; 679 InsertMode addv = mat->insertmode; 680 681 PetscFunctionBegin; 682 683 if (!baij->donotstash) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 /* 687 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 688 PetscSynchronizedFlush(mat->comm); 689 */ 690 if (!flg) break; 691 692 for (i=0; i<n;) { 693 /* Now identify the consecutive vals belonging to the same row */ 694 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 695 if (j < n) ncols = j-i; 696 else ncols = n-i; 697 /* Now assemble all these values with a single function call */ 698 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 699 i = j; 700 } 701 } 702 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 703 /* Now process the block-stash. Since the values are stashed column-oriented, 704 set the roworiented flag to column oriented, and after MatSetValues() 705 restore the original flags */ 706 r1 = baij->roworiented; 707 r2 = a->roworiented; 708 r3 = b->roworiented; 709 baij->roworiented = PETSC_FALSE; 710 a->roworiented = PETSC_FALSE; 711 b->roworiented = PETSC_FALSE; 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n;) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 719 if (j < n) ncols = j-i; 720 else ncols = n-i; 721 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 722 i = j; 723 } 724 } 725 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 726 baij->roworiented = r1; 727 a->roworiented = r2; 728 b->roworiented = r3; 729 } 730 731 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 741 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 747 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 748 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 749 } 750 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 752 753 #if defined(PETSC_USE_BOPT_g) 754 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 755 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 756 baij->ht_total_ct = 0; 757 baij->ht_insert_ct = 0; 758 } 759 #endif 760 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 761 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 762 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 763 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 764 } 765 766 if (baij->rowvalues) { 767 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 768 baij->rowvalues = 0; 769 } 770 771 PetscFunctionReturn(0); 772 } 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 776 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 777 { 778 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 779 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 780 PetscTruth isascii,isdraw; 781 PetscViewer sviewer; 782 PetscViewerFormat format; 783 784 PetscFunctionBegin; 785 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 786 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 787 if (isascii) { 788 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 789 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 790 MatInfo info; 791 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 792 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 793 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 794 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 795 baij->bs,(int)info.memory);CHKERRQ(ierr); 796 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 797 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 798 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 799 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 800 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 801 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 802 PetscFunctionReturn(0); 803 } else if (format == PETSC_VIEWER_ASCII_INFO) { 804 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 } 808 809 if (isdraw) { 810 PetscDraw draw; 811 PetscTruth isnull; 812 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 813 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 814 } 815 816 if (size == 1) { 817 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 818 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 819 } else { 820 /* assemble the entire matrix onto first processor. */ 821 Mat A; 822 Mat_SeqSBAIJ *Aloc; 823 Mat_SeqBAIJ *Bloc; 824 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 825 MatScalar *a; 826 827 if (!rank) { 828 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 829 } else { 830 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 831 } 832 PetscLogObjectParent(mat,A); 833 834 /* copy over the A part */ 835 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 836 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 837 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 838 839 for (i=0; i<mbs; i++) { 840 rvals[0] = bs*(baij->rstart + i); 841 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 842 for (j=ai[i]; j<ai[i+1]; j++) { 843 col = (baij->cstart+aj[j])*bs; 844 for (k=0; k<bs; k++) { 845 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 846 col++; a += bs; 847 } 848 } 849 } 850 /* copy over the B part */ 851 Bloc = (Mat_SeqBAIJ*)baij->B->data; 852 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 853 for (i=0; i<mbs; i++) { 854 rvals[0] = bs*(baij->rstart + i); 855 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 856 for (j=ai[i]; j<ai[i+1]; j++) { 857 col = baij->garray[aj[j]]*bs; 858 for (k=0; k<bs; k++) { 859 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 860 col++; a += bs; 861 } 862 } 863 } 864 ierr = PetscFree(rvals);CHKERRQ(ierr); 865 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 866 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 867 /* 868 Everyone has to call to draw the matrix since the graphics waits are 869 synchronized across all processors that share the PetscDraw object 870 */ 871 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 872 if (!rank) { 873 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 874 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 875 } 876 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 877 ierr = MatDestroy(A);CHKERRQ(ierr); 878 } 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "MatView_MPISBAIJ" 884 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 885 { 886 int ierr; 887 PetscTruth isascii,isdraw,issocket,isbinary; 888 889 PetscFunctionBegin; 890 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 891 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 892 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 893 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 894 if (isascii || isdraw || issocket || isbinary) { 895 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 896 } else { 897 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 898 } 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatDestroy_MPISBAIJ" 904 int MatDestroy_MPISBAIJ(Mat mat) 905 { 906 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 907 int ierr; 908 909 PetscFunctionBegin; 910 #if defined(PETSC_USE_LOG) 911 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 912 #endif 913 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 914 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 915 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 916 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 917 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 918 #if defined (PETSC_USE_CTABLE) 919 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 920 #else 921 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 922 #endif 923 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 924 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 925 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 926 if (baij->slvec0) { 927 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 928 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 929 } 930 if (baij->slvec1) { 931 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 932 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 933 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 934 } 935 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 936 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 937 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 938 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 939 #if defined(PETSC_USE_MAT_SINGLE) 940 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 941 #endif 942 ierr = PetscFree(baij);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 #undef __FUNCT__ 947 #define __FUNCT__ "MatMult_MPISBAIJ" 948 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 949 { 950 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 951 int ierr,nt,mbs=a->mbs,bs=a->bs; 952 PetscScalar *x,*from,zero=0.0; 953 954 PetscFunctionBegin; 955 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 956 if (nt != A->n) { 957 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 958 } 959 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 960 if (nt != A->m) { 961 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 962 } 963 964 /* diagonal part */ 965 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 966 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 967 968 /* subdiagonal part */ 969 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 970 971 /* copy x into the vec slvec0 */ 972 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 973 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 974 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 975 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 976 977 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 978 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 979 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 980 981 /* supperdiagonal part */ 982 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 983 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 989 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 990 { 991 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 992 int ierr,nt; 993 994 PetscFunctionBegin; 995 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 996 if (nt != A->n) { 997 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 998 } 999 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1000 if (nt != A->m) { 1001 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1002 } 1003 1004 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1005 /* do diagonal part */ 1006 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1007 /* do supperdiagonal part */ 1008 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1009 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1010 /* do subdiagonal part */ 1011 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1012 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1013 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1014 1015 PetscFunctionReturn(0); 1016 } 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1020 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1021 { 1022 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1023 int ierr,mbs=a->mbs,bs=a->bs; 1024 PetscScalar *x,*from,zero=0.0; 1025 1026 PetscFunctionBegin; 1027 /* 1028 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1029 PetscSynchronizedFlush(A->comm); 1030 */ 1031 /* diagonal part */ 1032 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1033 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1034 1035 /* subdiagonal part */ 1036 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1037 1038 /* copy x into the vec slvec0 */ 1039 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1040 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1041 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1042 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1043 1044 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1045 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1046 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1047 1048 /* supperdiagonal part */ 1049 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1050 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1056 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1057 { 1058 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1059 int ierr; 1060 1061 PetscFunctionBegin; 1062 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1063 /* do diagonal part */ 1064 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1065 /* do supperdiagonal part */ 1066 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1067 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1068 1069 /* do subdiagonal part */ 1070 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1071 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1072 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1073 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1079 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1080 { 1081 int ierr; 1082 1083 PetscFunctionBegin; 1084 ierr = MatMult(A,xx,yy);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1090 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1091 { 1092 int ierr; 1093 1094 PetscFunctionBegin; 1095 ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /* 1100 This only works correctly for square matrices where the subblock A->A is the 1101 diagonal block 1102 */ 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1105 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1106 { 1107 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1108 int ierr; 1109 1110 PetscFunctionBegin; 1111 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1112 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatScale_MPISBAIJ" 1118 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1119 { 1120 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1121 int ierr; 1122 1123 PetscFunctionBegin; 1124 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1125 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1131 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1132 { 1133 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1134 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1135 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1136 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1137 int *cmap,*idx_p,cstart = mat->cstart; 1138 1139 PetscFunctionBegin; 1140 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1141 mat->getrowactive = PETSC_TRUE; 1142 1143 if (!mat->rowvalues && (idx || v)) { 1144 /* 1145 allocate enough space to hold information from the longest row. 1146 */ 1147 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1148 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1149 int max = 1,mbs = mat->mbs,tmp; 1150 for (i=0; i<mbs; i++) { 1151 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1152 if (max < tmp) { max = tmp; } 1153 } 1154 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1155 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1156 } 1157 1158 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1159 lrow = row - brstart; /* local row index */ 1160 1161 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1162 if (!v) {pvA = 0; pvB = 0;} 1163 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1164 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1165 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1166 nztot = nzA + nzB; 1167 1168 cmap = mat->garray; 1169 if (v || idx) { 1170 if (nztot) { 1171 /* Sort by increasing column numbers, assuming A and B already sorted */ 1172 int imark = -1; 1173 if (v) { 1174 *v = v_p = mat->rowvalues; 1175 for (i=0; i<nzB; i++) { 1176 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1177 else break; 1178 } 1179 imark = i; 1180 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1181 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1182 } 1183 if (idx) { 1184 *idx = idx_p = mat->rowindices; 1185 if (imark > -1) { 1186 for (i=0; i<imark; i++) { 1187 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1188 } 1189 } else { 1190 for (i=0; i<nzB; i++) { 1191 if (cmap[cworkB[i]/bs] < cstart) 1192 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1193 else break; 1194 } 1195 imark = i; 1196 } 1197 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1198 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1199 } 1200 } else { 1201 if (idx) *idx = 0; 1202 if (v) *v = 0; 1203 } 1204 } 1205 *nz = nztot; 1206 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1207 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1213 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1214 { 1215 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1216 1217 PetscFunctionBegin; 1218 if (baij->getrowactive == PETSC_FALSE) { 1219 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1220 } 1221 baij->getrowactive = PETSC_FALSE; 1222 PetscFunctionReturn(0); 1223 } 1224 1225 #undef __FUNCT__ 1226 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1227 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1228 { 1229 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1230 1231 PetscFunctionBegin; 1232 *bs = baij->bs; 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1238 int MatZeroEntries_MPISBAIJ(Mat A) 1239 { 1240 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1241 int ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1245 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1251 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1252 { 1253 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1254 Mat A = a->A,B = a->B; 1255 int ierr; 1256 PetscReal isend[5],irecv[5]; 1257 1258 PetscFunctionBegin; 1259 info->block_size = (PetscReal)a->bs; 1260 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1261 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1262 isend[3] = info->memory; isend[4] = info->mallocs; 1263 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1264 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1265 isend[3] += info->memory; isend[4] += info->mallocs; 1266 if (flag == MAT_LOCAL) { 1267 info->nz_used = isend[0]; 1268 info->nz_allocated = isend[1]; 1269 info->nz_unneeded = isend[2]; 1270 info->memory = isend[3]; 1271 info->mallocs = isend[4]; 1272 } else if (flag == MAT_GLOBAL_MAX) { 1273 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1274 info->nz_used = irecv[0]; 1275 info->nz_allocated = irecv[1]; 1276 info->nz_unneeded = irecv[2]; 1277 info->memory = irecv[3]; 1278 info->mallocs = irecv[4]; 1279 } else if (flag == MAT_GLOBAL_SUM) { 1280 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1281 info->nz_used = irecv[0]; 1282 info->nz_allocated = irecv[1]; 1283 info->nz_unneeded = irecv[2]; 1284 info->memory = irecv[3]; 1285 info->mallocs = irecv[4]; 1286 } else { 1287 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1288 } 1289 info->rows_global = (PetscReal)A->M; 1290 info->columns_global = (PetscReal)A->N; 1291 info->rows_local = (PetscReal)A->m; 1292 info->columns_local = (PetscReal)A->N; 1293 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1294 info->fill_ratio_needed = 0; 1295 info->factor_mallocs = 0; 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1301 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1302 { 1303 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1304 int ierr; 1305 1306 PetscFunctionBegin; 1307 switch (op) { 1308 case MAT_NO_NEW_NONZERO_LOCATIONS: 1309 case MAT_YES_NEW_NONZERO_LOCATIONS: 1310 case MAT_COLUMNS_UNSORTED: 1311 case MAT_COLUMNS_SORTED: 1312 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1313 case MAT_KEEP_ZEROED_ROWS: 1314 case MAT_NEW_NONZERO_LOCATION_ERR: 1315 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1316 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1317 break; 1318 case MAT_ROW_ORIENTED: 1319 a->roworiented = PETSC_TRUE; 1320 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1321 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1322 break; 1323 case MAT_ROWS_SORTED: 1324 case MAT_ROWS_UNSORTED: 1325 case MAT_YES_NEW_DIAGONALS: 1326 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1327 break; 1328 case MAT_COLUMN_ORIENTED: 1329 a->roworiented = PETSC_FALSE; 1330 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1331 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1332 break; 1333 case MAT_IGNORE_OFF_PROC_ENTRIES: 1334 a->donotstash = PETSC_TRUE; 1335 break; 1336 case MAT_NO_NEW_DIAGONALS: 1337 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1338 case MAT_USE_HASH_TABLE: 1339 a->ht_flag = PETSC_TRUE; 1340 break; 1341 case MAT_NOT_SYMMETRIC: 1342 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1343 case MAT_HERMITIAN: 1344 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1345 case MAT_SYMMETRIC: 1346 case MAT_STRUCTURALLY_SYMMETRIC: 1347 case MAT_NOT_HERMITIAN: 1348 case MAT_SYMMETRY_ETERNAL: 1349 case MAT_NOT_SYMMETRY_ETERNAL: 1350 break; 1351 default: 1352 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1353 } 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1359 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1360 { 1361 int ierr; 1362 PetscFunctionBegin; 1363 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1369 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1370 { 1371 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1372 Mat a = baij->A,b = baij->B; 1373 int ierr,s1,s2,s3; 1374 1375 PetscFunctionBegin; 1376 if (ll != rr) { 1377 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1378 } 1379 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1380 if (rr) { 1381 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1382 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1383 /* Overlap communication with computation. */ 1384 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1385 /*} if (ll) { */ 1386 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1387 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1388 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1389 /* } */ 1390 /* scale the diagonal block */ 1391 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1392 1393 /* if (rr) { */ 1394 /* Do a scatter end and then right scale the off-diagonal block */ 1395 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1396 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1397 } 1398 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1404 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag) 1405 { 1406 PetscFunctionBegin; 1407 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1412 int MatPrintHelp_MPISBAIJ(Mat A) 1413 { 1414 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1415 MPI_Comm comm = A->comm; 1416 static int called = 0; 1417 int ierr; 1418 1419 PetscFunctionBegin; 1420 if (!a->rank) { 1421 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1422 } 1423 if (called) {PetscFunctionReturn(0);} else called = 1; 1424 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1425 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1431 int MatSetUnfactored_MPISBAIJ(Mat A) 1432 { 1433 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1434 int ierr; 1435 1436 PetscFunctionBegin; 1437 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1442 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatEqual_MPISBAIJ" 1445 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1446 { 1447 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1448 Mat a,b,c,d; 1449 PetscTruth flg; 1450 int ierr; 1451 1452 PetscFunctionBegin; 1453 a = matA->A; b = matA->B; 1454 c = matB->A; d = matB->B; 1455 1456 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1457 if (flg == PETSC_TRUE) { 1458 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1459 } 1460 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1466 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1467 { 1468 int ierr; 1469 1470 PetscFunctionBegin; 1471 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1477 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1478 { 1479 int i,ierr; 1480 PetscTruth flg; 1481 1482 for (i=0; i<n; i++) { 1483 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1484 if (!flg) { 1485 SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1486 } 1487 } 1488 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1489 PetscFunctionReturn(0); 1490 } 1491 1492 1493 /* -------------------------------------------------------------------*/ 1494 static struct _MatOps MatOps_Values = { 1495 MatSetValues_MPISBAIJ, 1496 MatGetRow_MPISBAIJ, 1497 MatRestoreRow_MPISBAIJ, 1498 MatMult_MPISBAIJ, 1499 /* 4*/ MatMultAdd_MPISBAIJ, 1500 MatMultTranspose_MPISBAIJ, 1501 MatMultTransposeAdd_MPISBAIJ, 1502 0, 1503 0, 1504 0, 1505 /*10*/ 0, 1506 0, 1507 0, 1508 MatRelax_MPISBAIJ, 1509 MatTranspose_MPISBAIJ, 1510 /*15*/ MatGetInfo_MPISBAIJ, 1511 MatEqual_MPISBAIJ, 1512 MatGetDiagonal_MPISBAIJ, 1513 MatDiagonalScale_MPISBAIJ, 1514 MatNorm_MPISBAIJ, 1515 /*20*/ MatAssemblyBegin_MPISBAIJ, 1516 MatAssemblyEnd_MPISBAIJ, 1517 0, 1518 MatSetOption_MPISBAIJ, 1519 MatZeroEntries_MPISBAIJ, 1520 /*25*/ MatZeroRows_MPISBAIJ, 1521 0, 1522 0, 1523 0, 1524 0, 1525 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1526 0, 1527 0, 1528 0, 1529 0, 1530 /*35*/ MatDuplicate_MPISBAIJ, 1531 0, 1532 0, 1533 0, 1534 0, 1535 /*40*/ 0, 1536 MatGetSubMatrices_MPISBAIJ, 1537 MatIncreaseOverlap_MPISBAIJ, 1538 MatGetValues_MPISBAIJ, 1539 0, 1540 /*45*/ MatPrintHelp_MPISBAIJ, 1541 MatScale_MPISBAIJ, 1542 0, 1543 0, 1544 0, 1545 /*50*/ MatGetBlockSize_MPISBAIJ, 1546 0, 1547 0, 1548 0, 1549 0, 1550 /*55*/ 0, 1551 0, 1552 MatSetUnfactored_MPISBAIJ, 1553 0, 1554 MatSetValuesBlocked_MPISBAIJ, 1555 /*60*/ 0, 1556 0, 1557 0, 1558 MatGetPetscMaps_Petsc, 1559 0, 1560 /*65*/ 0, 1561 0, 1562 0, 1563 0, 1564 0, 1565 /*70*/ MatGetRowMax_MPISBAIJ, 1566 0, 1567 0, 1568 0, 1569 0, 1570 /*75*/ 0, 1571 0, 1572 0, 1573 0, 1574 0, 1575 /*80*/ 0, 1576 0, 1577 0, 1578 0, 1579 /*85*/ MatLoad_MPISBAIJ 1580 }; 1581 1582 1583 EXTERN_C_BEGIN 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1586 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1587 { 1588 PetscFunctionBegin; 1589 *a = ((Mat_MPISBAIJ *)A->data)->A; 1590 *iscopy = PETSC_FALSE; 1591 PetscFunctionReturn(0); 1592 } 1593 EXTERN_C_END 1594 1595 EXTERN_C_BEGIN 1596 #undef __FUNCT__ 1597 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1598 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1599 { 1600 Mat_MPISBAIJ *b; 1601 int ierr,i,mbs,Mbs; 1602 1603 PetscFunctionBegin; 1604 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1605 1606 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1607 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1608 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1609 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1610 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1611 if (d_nnz) { 1612 for (i=0; i<B->m/bs; i++) { 1613 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1614 } 1615 } 1616 if (o_nnz) { 1617 for (i=0; i<B->m/bs; i++) { 1618 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1619 } 1620 } 1621 B->preallocated = PETSC_TRUE; 1622 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1623 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1624 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1625 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1626 1627 b = (Mat_MPISBAIJ*)B->data; 1628 mbs = B->m/bs; 1629 Mbs = B->M/bs; 1630 if (mbs*bs != B->m) { 1631 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1632 } 1633 1634 b->bs = bs; 1635 b->bs2 = bs*bs; 1636 b->mbs = mbs; 1637 b->nbs = mbs; 1638 b->Mbs = Mbs; 1639 b->Nbs = Mbs; 1640 1641 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1642 b->rowners[0] = 0; 1643 for (i=2; i<=b->size; i++) { 1644 b->rowners[i] += b->rowners[i-1]; 1645 } 1646 b->rstart = b->rowners[b->rank]; 1647 b->rend = b->rowners[b->rank+1]; 1648 b->cstart = b->rstart; 1649 b->cend = b->rend; 1650 for (i=0; i<=b->size; i++) { 1651 b->rowners_bs[i] = b->rowners[i]*bs; 1652 } 1653 b->rstart_bs = b-> rstart*bs; 1654 b->rend_bs = b->rend*bs; 1655 1656 b->cstart_bs = b->cstart*bs; 1657 b->cend_bs = b->cend*bs; 1658 1659 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr); 1660 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1661 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1662 PetscLogObjectParent(B,b->A); 1663 1664 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr); 1665 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1666 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1667 PetscLogObjectParent(B,b->B); 1668 1669 /* build cache for off array entries formed */ 1670 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1671 1672 PetscFunctionReturn(0); 1673 } 1674 EXTERN_C_END 1675 1676 /*MC 1677 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1678 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1679 1680 Options Database Keys: 1681 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1682 1683 Level: beginner 1684 1685 .seealso: MatCreateMPISBAIJ 1686 M*/ 1687 1688 EXTERN_C_BEGIN 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatCreate_MPISBAIJ" 1691 int MatCreate_MPISBAIJ(Mat B) 1692 { 1693 Mat_MPISBAIJ *b; 1694 int ierr; 1695 PetscTruth flg; 1696 1697 PetscFunctionBegin; 1698 1699 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1700 B->data = (void*)b; 1701 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1702 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1703 1704 B->ops->destroy = MatDestroy_MPISBAIJ; 1705 B->ops->view = MatView_MPISBAIJ; 1706 B->mapping = 0; 1707 B->factor = 0; 1708 B->assembled = PETSC_FALSE; 1709 1710 B->insertmode = NOT_SET_VALUES; 1711 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1712 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1713 1714 /* build local table of row and column ownerships */ 1715 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1716 b->cowners = b->rowners + b->size + 2; 1717 b->rowners_bs = b->cowners + b->size + 2; 1718 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1719 1720 /* build cache for off array entries formed */ 1721 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1722 b->donotstash = PETSC_FALSE; 1723 b->colmap = PETSC_NULL; 1724 b->garray = PETSC_NULL; 1725 b->roworiented = PETSC_TRUE; 1726 1727 #if defined(PETSC_USE_MAT_SINGLE) 1728 /* stuff for MatSetValues_XXX in single precision */ 1729 b->setvalueslen = 0; 1730 b->setvaluescopy = PETSC_NULL; 1731 #endif 1732 1733 /* stuff used in block assembly */ 1734 b->barray = 0; 1735 1736 /* stuff used for matrix vector multiply */ 1737 b->lvec = 0; 1738 b->Mvctx = 0; 1739 b->slvec0 = 0; 1740 b->slvec0b = 0; 1741 b->slvec1 = 0; 1742 b->slvec1a = 0; 1743 b->slvec1b = 0; 1744 b->sMvctx = 0; 1745 1746 /* stuff for MatGetRow() */ 1747 b->rowindices = 0; 1748 b->rowvalues = 0; 1749 b->getrowactive = PETSC_FALSE; 1750 1751 /* hash table stuff */ 1752 b->ht = 0; 1753 b->hd = 0; 1754 b->ht_size = 0; 1755 b->ht_flag = PETSC_FALSE; 1756 b->ht_fact = 0; 1757 b->ht_total_ct = 0; 1758 b->ht_insert_ct = 0; 1759 1760 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1761 if (flg) { 1762 PetscReal fact = 1.39; 1763 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1764 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1765 if (fact <= 1.0) fact = 1.39; 1766 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1767 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1768 } 1769 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1770 "MatStoreValues_MPISBAIJ", 1771 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1772 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1773 "MatRetrieveValues_MPISBAIJ", 1774 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1775 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1776 "MatGetDiagonalBlock_MPISBAIJ", 1777 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1778 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1779 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1780 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1781 PetscFunctionReturn(0); 1782 } 1783 EXTERN_C_END 1784 1785 /*MC 1786 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1787 1788 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1789 and MATMPISBAIJ otherwise. 1790 1791 Options Database Keys: 1792 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1793 1794 Level: beginner 1795 1796 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1797 M*/ 1798 1799 EXTERN_C_BEGIN 1800 #undef __FUNCT__ 1801 #define __FUNCT__ "MatCreate_SBAIJ" 1802 int MatCreate_SBAIJ(Mat A) { 1803 int ierr,size; 1804 1805 PetscFunctionBegin; 1806 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1807 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1808 if (size == 1) { 1809 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1810 } else { 1811 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1812 } 1813 PetscFunctionReturn(0); 1814 } 1815 EXTERN_C_END 1816 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1819 /*@C 1820 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1821 the user should preallocate the matrix storage by setting the parameters 1822 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1823 performance can be increased by more than a factor of 50. 1824 1825 Collective on Mat 1826 1827 Input Parameters: 1828 + A - the matrix 1829 . bs - size of blockk 1830 . d_nz - number of block nonzeros per block row in diagonal portion of local 1831 submatrix (same for all local rows) 1832 . d_nnz - array containing the number of block nonzeros in the various block rows 1833 in the upper triangular and diagonal part of the in diagonal portion of the local 1834 (possibly different for each block row) or PETSC_NULL. You must leave room 1835 for the diagonal entry even if it is zero. 1836 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1837 submatrix (same for all local rows). 1838 - o_nnz - array containing the number of nonzeros in the various block rows of the 1839 off-diagonal portion of the local submatrix (possibly different for 1840 each block row) or PETSC_NULL. 1841 1842 1843 Options Database Keys: 1844 . -mat_no_unroll - uses code that does not unroll the loops in the 1845 block calculations (much slower) 1846 . -mat_block_size - size of the blocks to use 1847 1848 Notes: 1849 1850 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1851 than it must be used on all processors that share the object for that argument. 1852 1853 Storage Information: 1854 For a square global matrix we define each processor's diagonal portion 1855 to be its local rows and the corresponding columns (a square submatrix); 1856 each processor's off-diagonal portion encompasses the remainder of the 1857 local matrix (a rectangular submatrix). 1858 1859 The user can specify preallocated storage for the diagonal part of 1860 the local submatrix with either d_nz or d_nnz (not both). Set 1861 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1862 memory allocation. Likewise, specify preallocated storage for the 1863 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1864 1865 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1866 the figure below we depict these three local rows and all columns (0-11). 1867 1868 .vb 1869 0 1 2 3 4 5 6 7 8 9 10 11 1870 ------------------- 1871 row 3 | o o o d d d o o o o o o 1872 row 4 | o o o d d d o o o o o o 1873 row 5 | o o o d d d o o o o o o 1874 ------------------- 1875 .ve 1876 1877 Thus, any entries in the d locations are stored in the d (diagonal) 1878 submatrix, and any entries in the o locations are stored in the 1879 o (off-diagonal) submatrix. Note that the d matrix is stored in 1880 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1881 1882 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1883 plus the diagonal part of the d matrix, 1884 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1885 In general, for PDE problems in which most nonzeros are near the diagonal, 1886 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1887 or you will get TERRIBLE performance; see the users' manual chapter on 1888 matrices. 1889 1890 Level: intermediate 1891 1892 .keywords: matrix, block, aij, compressed row, sparse, parallel 1893 1894 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1895 @*/ 1896 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1897 { 1898 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 1899 1900 PetscFunctionBegin; 1901 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1902 if (f) { 1903 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1904 } 1905 PetscFunctionReturn(0); 1906 } 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "MatCreateMPISBAIJ" 1910 /*@C 1911 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1912 (block compressed row). For good matrix assembly performance 1913 the user should preallocate the matrix storage by setting the parameters 1914 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1915 performance can be increased by more than a factor of 50. 1916 1917 Collective on MPI_Comm 1918 1919 Input Parameters: 1920 + comm - MPI communicator 1921 . bs - size of blockk 1922 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1923 This value should be the same as the local size used in creating the 1924 y vector for the matrix-vector product y = Ax. 1925 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1926 This value should be the same as the local size used in creating the 1927 x vector for the matrix-vector product y = Ax. 1928 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1929 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1930 . d_nz - number of block nonzeros per block row in diagonal portion of local 1931 submatrix (same for all local rows) 1932 . d_nnz - array containing the number of block nonzeros in the various block rows 1933 in the upper triangular portion of the in diagonal portion of the local 1934 (possibly different for each block block row) or PETSC_NULL. 1935 You must leave room for the diagonal entry even if it is zero. 1936 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1937 submatrix (same for all local rows). 1938 - o_nnz - array containing the number of nonzeros in the various block rows of the 1939 off-diagonal portion of the local submatrix (possibly different for 1940 each block row) or PETSC_NULL. 1941 1942 Output Parameter: 1943 . A - the matrix 1944 1945 Options Database Keys: 1946 . -mat_no_unroll - uses code that does not unroll the loops in the 1947 block calculations (much slower) 1948 . -mat_block_size - size of the blocks to use 1949 . -mat_mpi - use the parallel matrix data structures even on one processor 1950 (defaults to using SeqBAIJ format on one processor) 1951 1952 Notes: 1953 The user MUST specify either the local or global matrix dimensions 1954 (possibly both). 1955 1956 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1957 than it must be used on all processors that share the object for that argument. 1958 1959 Storage Information: 1960 For a square global matrix we define each processor's diagonal portion 1961 to be its local rows and the corresponding columns (a square submatrix); 1962 each processor's off-diagonal portion encompasses the remainder of the 1963 local matrix (a rectangular submatrix). 1964 1965 The user can specify preallocated storage for the diagonal part of 1966 the local submatrix with either d_nz or d_nnz (not both). Set 1967 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1968 memory allocation. Likewise, specify preallocated storage for the 1969 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1970 1971 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1972 the figure below we depict these three local rows and all columns (0-11). 1973 1974 .vb 1975 0 1 2 3 4 5 6 7 8 9 10 11 1976 ------------------- 1977 row 3 | o o o d d d o o o o o o 1978 row 4 | o o o d d d o o o o o o 1979 row 5 | o o o d d d o o o o o o 1980 ------------------- 1981 .ve 1982 1983 Thus, any entries in the d locations are stored in the d (diagonal) 1984 submatrix, and any entries in the o locations are stored in the 1985 o (off-diagonal) submatrix. Note that the d matrix is stored in 1986 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1987 1988 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1989 plus the diagonal part of the d matrix, 1990 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1991 In general, for PDE problems in which most nonzeros are near the diagonal, 1992 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1993 or you will get TERRIBLE performance; see the users' manual chapter on 1994 matrices. 1995 1996 Level: intermediate 1997 1998 .keywords: matrix, block, aij, compressed row, sparse, parallel 1999 2000 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2001 @*/ 2002 2003 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2004 { 2005 int ierr,size; 2006 2007 PetscFunctionBegin; 2008 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2009 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2010 if (size > 1) { 2011 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2012 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2013 } else { 2014 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2015 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2016 } 2017 PetscFunctionReturn(0); 2018 } 2019 2020 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2023 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2024 { 2025 Mat mat; 2026 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2027 int ierr,len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs; 2028 PetscScalar *array; 2029 2030 PetscFunctionBegin; 2031 *newmat = 0; 2032 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2033 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 2034 2035 /* ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); */ /*-- cause error? */ 2036 mat->factor = matin->factor; 2037 mat->preallocated = PETSC_TRUE; 2038 mat->assembled = PETSC_TRUE; 2039 mat->insertmode = NOT_SET_VALUES; 2040 2041 a = (Mat_MPISBAIJ*)mat->data; 2042 a->bs = oldmat->bs; 2043 a->bs2 = oldmat->bs2; 2044 a->mbs = oldmat->mbs; 2045 a->nbs = oldmat->nbs; 2046 a->Mbs = oldmat->Mbs; 2047 a->Nbs = oldmat->Nbs; 2048 2049 a->rstart = oldmat->rstart; 2050 a->rend = oldmat->rend; 2051 a->cstart = oldmat->cstart; 2052 a->cend = oldmat->cend; 2053 a->size = oldmat->size; 2054 a->rank = oldmat->rank; 2055 a->donotstash = oldmat->donotstash; 2056 a->roworiented = oldmat->roworiented; 2057 a->rowindices = 0; 2058 a->rowvalues = 0; 2059 a->getrowactive = PETSC_FALSE; 2060 a->barray = 0; 2061 a->rstart_bs = oldmat->rstart_bs; 2062 a->rend_bs = oldmat->rend_bs; 2063 a->cstart_bs = oldmat->cstart_bs; 2064 a->cend_bs = oldmat->cend_bs; 2065 2066 /* hash table stuff */ 2067 a->ht = 0; 2068 a->hd = 0; 2069 a->ht_size = 0; 2070 a->ht_flag = oldmat->ht_flag; 2071 a->ht_fact = oldmat->ht_fact; 2072 a->ht_total_ct = 0; 2073 a->ht_insert_ct = 0; 2074 2075 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2076 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2077 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2078 if (oldmat->colmap) { 2079 #if defined (PETSC_USE_CTABLE) 2080 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2081 #else 2082 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2083 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2084 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2085 #endif 2086 } else a->colmap = 0; 2087 2088 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2089 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2090 PetscLogObjectMemory(mat,len*sizeof(int)); 2091 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2092 } else a->garray = 0; 2093 2094 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2095 PetscLogObjectParent(mat,a->lvec); 2096 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2097 PetscLogObjectParent(mat,a->Mvctx); 2098 2099 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2100 PetscLogObjectParent(mat,a->slvec0); 2101 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2102 PetscLogObjectParent(mat,a->slvec1); 2103 2104 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2105 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2106 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2107 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2108 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2109 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2110 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2111 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2112 PetscLogObjectParent(mat,a->slvec0); 2113 PetscLogObjectParent(mat,a->slvec1); 2114 PetscLogObjectParent(mat,a->slvec0b); 2115 PetscLogObjectParent(mat,a->slvec1a); 2116 PetscLogObjectParent(mat,a->slvec1b); 2117 2118 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2119 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2120 a->sMvctx = oldmat->sMvctx; 2121 PetscLogObjectParent(mat,a->sMvctx); 2122 2123 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2124 PetscLogObjectParent(mat,a->A); 2125 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2126 PetscLogObjectParent(mat,a->B); 2127 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2128 *newmat = mat; 2129 PetscFunctionReturn(0); 2130 } 2131 2132 #include "petscsys.h" 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "MatLoad_MPISBAIJ" 2136 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2137 { 2138 Mat A; 2139 int i,nz,ierr,j,rstart,rend,fd; 2140 PetscScalar *vals,*buf; 2141 MPI_Comm comm = ((PetscObject)viewer)->comm; 2142 MPI_Status status; 2143 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2144 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2145 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2146 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2147 int dcount,kmax,k,nzcount,tmp; 2148 2149 PetscFunctionBegin; 2150 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2151 2152 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2153 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2154 if (!rank) { 2155 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2156 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2157 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2158 if (header[3] < 0) { 2159 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2160 } 2161 } 2162 2163 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2164 M = header[1]; N = header[2]; 2165 2166 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2167 2168 /* 2169 This code adds extra rows to make sure the number of rows is 2170 divisible by the blocksize 2171 */ 2172 Mbs = M/bs; 2173 extra_rows = bs - M + bs*(Mbs); 2174 if (extra_rows == bs) extra_rows = 0; 2175 else Mbs++; 2176 if (extra_rows &&!rank) { 2177 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2178 } 2179 2180 /* determine ownership of all rows */ 2181 mbs = Mbs/size + ((Mbs % size) > rank); 2182 m = mbs*bs; 2183 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2184 browners = rowners + size + 1; 2185 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2186 rowners[0] = 0; 2187 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2188 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2189 rstart = rowners[rank]; 2190 rend = rowners[rank+1]; 2191 2192 /* distribute row lengths to all processors */ 2193 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2194 if (!rank) { 2195 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2196 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2197 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2198 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2199 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2200 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2201 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2202 } else { 2203 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2204 } 2205 2206 if (!rank) { /* procs[0] */ 2207 /* calculate the number of nonzeros on each processor */ 2208 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2209 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2210 for (i=0; i<size; i++) { 2211 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2212 procsnz[i] += rowlengths[j]; 2213 } 2214 } 2215 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2216 2217 /* determine max buffer needed and allocate it */ 2218 maxnz = 0; 2219 for (i=0; i<size; i++) { 2220 maxnz = PetscMax(maxnz,procsnz[i]); 2221 } 2222 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2223 2224 /* read in my part of the matrix column indices */ 2225 nz = procsnz[0]; 2226 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2227 mycols = ibuf; 2228 if (size == 1) nz -= extra_rows; 2229 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2230 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2231 2232 /* read in every ones (except the last) and ship off */ 2233 for (i=1; i<size-1; i++) { 2234 nz = procsnz[i]; 2235 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2236 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2237 } 2238 /* read in the stuff for the last proc */ 2239 if (size != 1) { 2240 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2241 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2242 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2243 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2244 } 2245 ierr = PetscFree(cols);CHKERRQ(ierr); 2246 } else { /* procs[i], i>0 */ 2247 /* determine buffer space needed for message */ 2248 nz = 0; 2249 for (i=0; i<m; i++) { 2250 nz += locrowlens[i]; 2251 } 2252 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2253 mycols = ibuf; 2254 /* receive message of column indices*/ 2255 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2256 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2257 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2258 } 2259 2260 /* loop over local rows, determining number of off diagonal entries */ 2261 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2262 odlens = dlens + (rend-rstart); 2263 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2264 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2265 masked1 = mask + Mbs; 2266 masked2 = masked1 + Mbs; 2267 rowcount = 0; nzcount = 0; 2268 for (i=0; i<mbs; i++) { 2269 dcount = 0; 2270 odcount = 0; 2271 for (j=0; j<bs; j++) { 2272 kmax = locrowlens[rowcount]; 2273 for (k=0; k<kmax; k++) { 2274 tmp = mycols[nzcount++]/bs; /* block col. index */ 2275 if (!mask[tmp]) { 2276 mask[tmp] = 1; 2277 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2278 else masked1[dcount++] = tmp; /* entry in diag portion */ 2279 } 2280 } 2281 rowcount++; 2282 } 2283 2284 dlens[i] = dcount; /* d_nzz[i] */ 2285 odlens[i] = odcount; /* o_nzz[i] */ 2286 2287 /* zero out the mask elements we set */ 2288 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2289 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2290 } 2291 2292 /* create our matrix */ 2293 ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr); 2294 ierr = MatSetType(A,type);CHKERRQ(ierr); 2295 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2296 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2297 2298 if (!rank) { 2299 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2300 /* read in my part of the matrix numerical values */ 2301 nz = procsnz[0]; 2302 vals = buf; 2303 mycols = ibuf; 2304 if (size == 1) nz -= extra_rows; 2305 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2306 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2307 2308 /* insert into matrix */ 2309 jj = rstart*bs; 2310 for (i=0; i<m; i++) { 2311 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2312 mycols += locrowlens[i]; 2313 vals += locrowlens[i]; 2314 jj++; 2315 } 2316 2317 /* read in other processors (except the last one) and ship out */ 2318 for (i=1; i<size-1; i++) { 2319 nz = procsnz[i]; 2320 vals = buf; 2321 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2322 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2323 } 2324 /* the last proc */ 2325 if (size != 1){ 2326 nz = procsnz[i] - extra_rows; 2327 vals = buf; 2328 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2329 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2330 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2331 } 2332 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2333 2334 } else { 2335 /* receive numeric values */ 2336 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2337 2338 /* receive message of values*/ 2339 vals = buf; 2340 mycols = ibuf; 2341 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2342 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2343 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2344 2345 /* insert into matrix */ 2346 jj = rstart*bs; 2347 for (i=0; i<m; i++) { 2348 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2349 mycols += locrowlens[i]; 2350 vals += locrowlens[i]; 2351 jj++; 2352 } 2353 } 2354 2355 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2356 ierr = PetscFree(buf);CHKERRQ(ierr); 2357 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2358 ierr = PetscFree(rowners);CHKERRQ(ierr); 2359 ierr = PetscFree(dlens);CHKERRQ(ierr); 2360 ierr = PetscFree(mask);CHKERRQ(ierr); 2361 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2362 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2363 *newmat = A; 2364 PetscFunctionReturn(0); 2365 } 2366 2367 #undef __FUNCT__ 2368 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2369 /*@ 2370 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2371 2372 Input Parameters: 2373 . mat - the matrix 2374 . fact - factor 2375 2376 Collective on Mat 2377 2378 Level: advanced 2379 2380 Notes: 2381 This can also be set by the command line option: -mat_use_hash_table fact 2382 2383 .keywords: matrix, hashtable, factor, HT 2384 2385 .seealso: MatSetOption() 2386 @*/ 2387 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2388 { 2389 PetscFunctionBegin; 2390 SETERRQ(1,"Function not yet written for SBAIJ format"); 2391 /* PetscFunctionReturn(0); */ 2392 } 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2396 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2397 { 2398 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2399 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2400 PetscReal atmp; 2401 PetscReal *work,*svalues,*rvalues; 2402 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2403 int rank,size,*rowners_bs,dest,count,source; 2404 PetscScalar *va; 2405 MatScalar *ba; 2406 MPI_Status stat; 2407 2408 PetscFunctionBegin; 2409 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2410 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2411 2412 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2413 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2414 2415 bs = a->bs; 2416 mbs = a->mbs; 2417 Mbs = a->Mbs; 2418 ba = b->a; 2419 bi = b->i; 2420 bj = b->j; 2421 /* 2422 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2423 PetscSynchronizedFlush(A->comm); 2424 */ 2425 2426 /* find ownerships */ 2427 rowners_bs = a->rowners_bs; 2428 /* 2429 if (!rank){ 2430 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2431 } 2432 */ 2433 2434 /* each proc creates an array to be distributed */ 2435 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2436 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2437 2438 /* row_max for B */ 2439 if (rank != size-1){ 2440 for (i=0; i<mbs; i++) { 2441 ncols = bi[1] - bi[0]; bi++; 2442 brow = bs*i; 2443 for (j=0; j<ncols; j++){ 2444 bcol = bs*(*bj); 2445 for (kcol=0; kcol<bs; kcol++){ 2446 col = bcol + kcol; /* local col index */ 2447 col += rowners_bs[rank+1]; /* global col index */ 2448 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2449 for (krow=0; krow<bs; krow++){ 2450 atmp = PetscAbsScalar(*ba); ba++; 2451 row = brow + krow; /* local row index */ 2452 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2453 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2454 if (work[col] < atmp) work[col] = atmp; 2455 } 2456 } 2457 bj++; 2458 } 2459 } 2460 /* 2461 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2462 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2463 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2464 */ 2465 2466 /* send values to its owners */ 2467 for (dest=rank+1; dest<size; dest++){ 2468 svalues = work + rowners_bs[dest]; 2469 count = rowners_bs[dest+1]-rowners_bs[dest]; 2470 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2471 /* 2472 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2473 PetscSynchronizedFlush(A->comm); 2474 */ 2475 } 2476 } 2477 2478 /* receive values */ 2479 if (rank){ 2480 rvalues = work; 2481 count = rowners_bs[rank+1]-rowners_bs[rank]; 2482 for (source=0; source<rank; source++){ 2483 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2484 /* process values */ 2485 for (i=0; i<count; i++){ 2486 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2487 } 2488 /* 2489 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2490 PetscSynchronizedFlush(A->comm); 2491 */ 2492 } 2493 } 2494 2495 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2496 ierr = PetscFree(work);CHKERRQ(ierr); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 #undef __FUNCT__ 2501 #define __FUNCT__ "MatRelax_MPISBAIJ" 2502 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2503 { 2504 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2505 int ierr,mbs=mat->mbs,bs=mat->bs; 2506 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2507 Vec bb1; 2508 2509 PetscFunctionBegin; 2510 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2511 if (bs > 1) 2512 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2513 2514 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2515 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2516 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2517 its--; 2518 } 2519 2520 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2521 while (its--){ 2522 2523 /* lower triangular part: slvec0b = - B^T*xx */ 2524 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2525 2526 /* copy xx into slvec0a */ 2527 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2528 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2529 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2530 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2531 2532 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2533 2534 /* copy bb into slvec1a */ 2535 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2536 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2537 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2538 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2539 2540 /* set slvec1b = 0 */ 2541 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2542 2543 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2544 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2545 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2546 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2547 2548 /* upper triangular part: bb1 = bb1 - B*x */ 2549 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2550 2551 /* local diagonal sweep */ 2552 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2553 } 2554 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2555 } else { 2556 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2557 } 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2563 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2564 { 2565 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2566 int ierr; 2567 PetscScalar mone=-1.0; 2568 Vec lvec1,bb1; 2569 2570 PetscFunctionBegin; 2571 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2572 if (mat->bs > 1) 2573 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2574 2575 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2576 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2577 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2578 its--; 2579 } 2580 2581 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2582 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2583 while (its--){ 2584 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2585 2586 /* lower diagonal part: bb1 = bb - B^T*xx */ 2587 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2588 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2589 2590 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2591 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2592 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2593 2594 /* upper diagonal part: bb1 = bb1 - B*x */ 2595 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2596 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2597 2598 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2599 2600 /* diagonal sweep */ 2601 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2602 } 2603 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2604 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2605 } else { 2606 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2607 } 2608 PetscFunctionReturn(0); 2609 } 2610 2611