1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 18 extern int MatPrintHelp_SeqSBAIJ(Mat); 19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 int MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 int ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 int MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 int ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 103 if (nrow >= rmax) { \ 104 /* there is no extra room in row, therefore enlarge */ \ 105 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 106 MatScalar *new_a; \ 107 \ 108 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 109 \ 110 /* malloc new storage space */ \ 111 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 112 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 113 new_j = (int*)(new_a + bs2*new_nz); \ 114 new_i = new_j + new_nz; \ 115 \ 116 /* copy over old data into new slots */ \ 117 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 118 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 119 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 120 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 121 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 122 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 124 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 125 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 126 /* free up old matrix storage */ \ 127 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 128 if (!a->singlemalloc) { \ 129 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 130 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 131 } \ 132 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 133 a->singlemalloc = PETSC_TRUE; \ 134 \ 135 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 136 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 137 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 138 a->maxnz += bs2*CHUNKSIZE; \ 139 a->reallocs++; \ 140 a->nz++; \ 141 } \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 a_noinsert:; \ 152 ailen[brow] = nrow; \ 153 } 154 #ifndef MatSetValues_SeqBAIJ_B_Private 155 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 156 { \ 157 brow = row/bs; \ 158 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 159 rmax = bimax[brow]; nrow = bilen[brow]; \ 160 bcol = col/bs; \ 161 ridx = row % bs; cidx = col % bs; \ 162 low = 0; high = nrow; \ 163 while (high-low > 3) { \ 164 t = (low+high)/2; \ 165 if (rp[t] > bcol) high = t; \ 166 else low = t; \ 167 } \ 168 for (_i=low; _i<high; _i++) { \ 169 if (rp[_i] > bcol) break; \ 170 if (rp[_i] == bcol) { \ 171 bap = ap + bs2*_i + bs*cidx + ridx; \ 172 if (addv == ADD_VALUES) *bap += value; \ 173 else *bap = value; \ 174 goto b_noinsert; \ 175 } \ 176 } \ 177 if (b->nonew == 1) goto b_noinsert; \ 178 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 179 if (nrow >= rmax) { \ 180 /* there is no extra room in row, therefore enlarge */ \ 181 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 182 MatScalar *new_a; \ 183 \ 184 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 185 \ 186 /* malloc new storage space */ \ 187 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 188 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 189 new_j = (int*)(new_a + bs2*new_nz); \ 190 new_i = new_j + new_nz; \ 191 \ 192 /* copy over old data into new slots */ \ 193 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 194 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 195 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 196 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 197 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 198 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 199 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 200 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 201 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 202 /* free up old matrix storage */ \ 203 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 204 if (!b->singlemalloc) { \ 205 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 206 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 207 } \ 208 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 209 b->singlemalloc = PETSC_TRUE; \ 210 \ 211 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 212 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 213 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 214 b->maxnz += bs2*CHUNKSIZE; \ 215 b->reallocs++; \ 216 b->nz++; \ 217 } \ 218 N = nrow++ - 1; \ 219 /* shift up all the later entries in this row */ \ 220 for (ii=N; ii>=_i; ii--) { \ 221 rp[ii+1] = rp[ii]; \ 222 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 223 } \ 224 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 225 rp[_i] = bcol; \ 226 ap[bs2*_i + bs*cidx + ridx] = value; \ 227 b_noinsert:; \ 228 bilen[brow] = nrow; \ 229 } 230 #endif 231 232 #if defined(PETSC_USE_MAT_SINGLE) 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatSetValues_MPISBAIJ" 235 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 236 { 237 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 238 int ierr,i,N = m*n; 239 MatScalar *vsingle; 240 241 PetscFunctionBegin; 242 if (N > b->setvalueslen) { 243 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 244 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 245 b->setvalueslen = N; 246 } 247 vsingle = b->setvaluescopy; 248 249 for (i=0; i<N; i++) { 250 vsingle[i] = v[i]; 251 } 252 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 258 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 259 { 260 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 261 int ierr,i,N = m*n*b->bs2; 262 MatScalar *vsingle; 263 264 PetscFunctionBegin; 265 if (N > b->setvalueslen) { 266 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 267 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 268 b->setvalueslen = N; 269 } 270 vsingle = b->setvaluescopy; 271 for (i=0; i<N; i++) { 272 vsingle[i] = v[i]; 273 } 274 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 280 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 281 { 282 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 283 int ierr,i,N = m*n; 284 MatScalar *vsingle; 285 286 PetscFunctionBegin; 287 SETERRQ(1,"Function not yet written for SBAIJ format"); 288 /* PetscFunctionReturn(0); */ 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 293 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 294 { 295 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 296 int ierr,i,N = m*n*b->bs2; 297 MatScalar *vsingle; 298 299 PetscFunctionBegin; 300 SETERRQ(1,"Function not yet written for SBAIJ format"); 301 /* PetscFunctionReturn(0); */ 302 } 303 #endif 304 305 /* Only add/insert a(i,j) with i<=j (blocks). 306 Any a(i,j) with i>j input by user is ingored. 307 */ 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 310 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 311 { 312 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 313 MatScalar value; 314 PetscTruth roworiented = baij->roworiented; 315 int ierr,i,j,row,col; 316 int rstart_orig=baij->rstart_bs; 317 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 318 int cend_orig=baij->cend_bs,bs=baij->bs; 319 320 /* Some Variables required in the macro */ 321 Mat A = baij->A; 322 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 323 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 324 MatScalar *aa=a->a; 325 326 Mat B = baij->B; 327 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 328 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 329 MatScalar *ba=b->a; 330 331 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 332 int low,high,t,ridx,cidx,bs2=a->bs2; 333 MatScalar *ap,*bap; 334 335 /* for stash */ 336 int n_loc, *in_loc=0; 337 MatScalar *v_loc=0; 338 339 PetscFunctionBegin; 340 341 if(!baij->donotstash){ 342 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 343 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 344 } 345 346 for (i=0; i<m; i++) { 347 if (im[i] < 0) continue; 348 #if defined(PETSC_USE_BOPT_g) 349 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 350 #endif 351 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 352 row = im[i] - rstart_orig; /* local row index */ 353 for (j=0; j<n; j++) { 354 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 355 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 356 col = in[j] - cstart_orig; /* local col index */ 357 brow = row/bs; bcol = col/bs; 358 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 359 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 360 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 361 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 362 } else if (in[j] < 0) continue; 363 #if defined(PETSC_USE_BOPT_g) 364 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 365 #endif 366 else { /* off-diag entry (B) */ 367 if (mat->was_assembled) { 368 if (!baij->colmap) { 369 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 370 } 371 #if defined (PETSC_USE_CTABLE) 372 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 373 col = col - 1; 374 #else 375 col = baij->colmap[in[j]/bs] - 1; 376 #endif 377 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 378 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 379 col = in[j]; 380 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 381 B = baij->B; 382 b = (Mat_SeqBAIJ*)(B)->data; 383 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 384 ba=b->a; 385 } else col += in[j]%bs; 386 } else col = in[j]; 387 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 388 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 389 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 390 } 391 } 392 } else { /* off processor entry */ 393 if (!baij->donotstash) { 394 n_loc = 0; 395 for (j=0; j<n; j++){ 396 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 397 in_loc[n_loc] = in[j]; 398 if (roworiented) { 399 v_loc[n_loc] = v[i*n+j]; 400 } else { 401 v_loc[n_loc] = v[j*m+i]; 402 } 403 n_loc++; 404 } 405 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 406 } 407 } 408 } 409 410 if(!baij->donotstash){ 411 ierr = PetscFree(in_loc);CHKERRQ(ierr); 412 ierr = PetscFree(v_loc);CHKERRQ(ierr); 413 } 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 419 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 420 { 421 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 422 const MatScalar *value; 423 MatScalar *barray=baij->barray; 424 PetscTruth roworiented = baij->roworiented; 425 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 426 int rend=baij->rend,cstart=baij->cstart,stepval; 427 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 428 429 PetscFunctionBegin; 430 if(!barray) { 431 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 432 baij->barray = barray; 433 } 434 435 if (roworiented) { 436 stepval = (n-1)*bs; 437 } else { 438 stepval = (m-1)*bs; 439 } 440 for (i=0; i<m; i++) { 441 if (im[i] < 0) continue; 442 #if defined(PETSC_USE_BOPT_g) 443 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 444 #endif 445 if (im[i] >= rstart && im[i] < rend) { 446 row = im[i] - rstart; 447 for (j=0; j<n; j++) { 448 /* If NumCol = 1 then a copy is not required */ 449 if ((roworiented) && (n == 1)) { 450 barray = (MatScalar*) v + i*bs2; 451 } else if((!roworiented) && (m == 1)) { 452 barray = (MatScalar*) v + j*bs2; 453 } else { /* Here a copy is required */ 454 if (roworiented) { 455 value = v + i*(stepval+bs)*bs + j*bs; 456 } else { 457 value = v + j*(stepval+bs)*bs + i*bs; 458 } 459 for (ii=0; ii<bs; ii++,value+=stepval) { 460 for (jj=0; jj<bs; jj++) { 461 *barray++ = *value++; 462 } 463 } 464 barray -=bs2; 465 } 466 467 if (in[j] >= cstart && in[j] < cend){ 468 col = in[j] - cstart; 469 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 470 } 471 else if (in[j] < 0) continue; 472 #if defined(PETSC_USE_BOPT_g) 473 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 474 #endif 475 else { 476 if (mat->was_assembled) { 477 if (!baij->colmap) { 478 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 479 } 480 481 #if defined(PETSC_USE_BOPT_g) 482 #if defined (PETSC_USE_CTABLE) 483 { int data; 484 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 485 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 486 } 487 #else 488 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 489 #endif 490 #endif 491 #if defined (PETSC_USE_CTABLE) 492 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 493 col = (col - 1)/bs; 494 #else 495 col = (baij->colmap[in[j]] - 1)/bs; 496 #endif 497 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 498 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 499 col = in[j]; 500 } 501 } 502 else col = in[j]; 503 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 504 } 505 } 506 } else { 507 if (!baij->donotstash) { 508 if (roworiented) { 509 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 510 } else { 511 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 512 } 513 } 514 } 515 } 516 PetscFunctionReturn(0); 517 } 518 519 #define HASH_KEY 0.6180339887 520 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 521 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 522 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 525 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 526 { 527 PetscFunctionBegin; 528 SETERRQ(1,"Function not yet written for SBAIJ format"); 529 /* PetscFunctionReturn(0); */ 530 } 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 534 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 535 { 536 PetscFunctionBegin; 537 SETERRQ(1,"Function not yet written for SBAIJ format"); 538 /* PetscFunctionReturn(0); */ 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatGetValues_MPISBAIJ" 543 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 544 { 545 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 546 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 547 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 552 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 553 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 554 row = idxm[i] - bsrstart; 555 for (j=0; j<n; j++) { 556 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]); 557 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 558 if (idxn[j] >= bscstart && idxn[j] < bscend){ 559 col = idxn[j] - bscstart; 560 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 561 } else { 562 if (!baij->colmap) { 563 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 564 } 565 #if defined (PETSC_USE_CTABLE) 566 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 567 data --; 568 #else 569 data = baij->colmap[idxn[j]/bs]-1; 570 #endif 571 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 572 else { 573 col = data + idxn[j]%bs; 574 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 575 } 576 } 577 } 578 } else { 579 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 580 } 581 } 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatNorm_MPISBAIJ" 587 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 588 { 589 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 590 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 591 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 592 int ierr; 593 PetscReal sum[2],*lnorm2; 594 595 PetscFunctionBegin; 596 if (baij->size == 1) { 597 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 598 } else { 599 if (type == NORM_FROBENIUS) { 600 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 601 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 602 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 603 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 605 /* 606 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 607 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 608 */ 609 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 610 /* 611 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 612 PetscSynchronizedFlush(mat->comm); */ 613 614 *norm = sqrt(sum[0] + 2*sum[1]); 615 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 616 } else { 617 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 /* 624 Creates the hash table, and sets the table 625 This table is created only once. 626 If new entried need to be added to the matrix 627 then the hash table has to be destroyed and 628 recreated. 629 */ 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 632 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 633 { 634 PetscFunctionBegin; 635 SETERRQ(1,"Function not yet written for SBAIJ format"); 636 /* PetscFunctionReturn(0); */ 637 } 638 639 #undef __FUNCT__ 640 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 641 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 642 { 643 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 644 int ierr,nstash,reallocs; 645 InsertMode addv; 646 647 PetscFunctionBegin; 648 if (baij->donotstash) { 649 PetscFunctionReturn(0); 650 } 651 652 /* make sure all processors are either in INSERTMODE or ADDMODE */ 653 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 654 if (addv == (ADD_VALUES|INSERT_VALUES)) { 655 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 656 } 657 mat->insertmode = addv; /* in case this processor had no cache */ 658 659 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 660 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 663 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 664 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNCT__ 669 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 670 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 673 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 674 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 675 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 676 int *row,*col,other_disassembled; 677 PetscTruth r1,r2,r3; 678 MatScalar *val; 679 InsertMode addv = mat->insertmode; 680 681 PetscFunctionBegin; 682 683 if (!baij->donotstash) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 /* 687 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 688 PetscSynchronizedFlush(mat->comm); 689 */ 690 if (!flg) break; 691 692 for (i=0; i<n;) { 693 /* Now identify the consecutive vals belonging to the same row */ 694 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 695 if (j < n) ncols = j-i; 696 else ncols = n-i; 697 /* Now assemble all these values with a single function call */ 698 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 699 i = j; 700 } 701 } 702 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 703 /* Now process the block-stash. Since the values are stashed column-oriented, 704 set the roworiented flag to column oriented, and after MatSetValues() 705 restore the original flags */ 706 r1 = baij->roworiented; 707 r2 = a->roworiented; 708 r3 = b->roworiented; 709 baij->roworiented = PETSC_FALSE; 710 a->roworiented = PETSC_FALSE; 711 b->roworiented = PETSC_FALSE; 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n;) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 719 if (j < n) ncols = j-i; 720 else ncols = n-i; 721 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 722 i = j; 723 } 724 } 725 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 726 baij->roworiented = r1; 727 a->roworiented = r2; 728 b->roworiented = r3; 729 } 730 731 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 741 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 747 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 748 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 749 } 750 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 752 753 #if defined(PETSC_USE_BOPT_g) 754 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 755 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 756 baij->ht_total_ct = 0; 757 baij->ht_insert_ct = 0; 758 } 759 #endif 760 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 761 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 762 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 763 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 764 } 765 766 if (baij->rowvalues) { 767 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 768 baij->rowvalues = 0; 769 } 770 771 PetscFunctionReturn(0); 772 } 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 776 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 777 { 778 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 779 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 780 PetscTruth isascii,isdraw; 781 PetscViewer sviewer; 782 PetscViewerFormat format; 783 784 PetscFunctionBegin; 785 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 786 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 787 if (isascii) { 788 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 789 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 790 MatInfo info; 791 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 792 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 793 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 794 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 795 baij->bs,(int)info.memory);CHKERRQ(ierr); 796 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 797 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 798 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 799 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 800 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 801 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 802 PetscFunctionReturn(0); 803 } else if (format == PETSC_VIEWER_ASCII_INFO) { 804 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 } 808 809 if (isdraw) { 810 PetscDraw draw; 811 PetscTruth isnull; 812 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 813 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 814 } 815 816 if (size == 1) { 817 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 818 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 819 } else { 820 /* assemble the entire matrix onto first processor. */ 821 Mat A; 822 Mat_SeqSBAIJ *Aloc; 823 Mat_SeqBAIJ *Bloc; 824 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 825 MatScalar *a; 826 827 if (!rank) { 828 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 829 } else { 830 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 831 } 832 PetscLogObjectParent(mat,A); 833 834 /* copy over the A part */ 835 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 836 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 837 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 838 839 for (i=0; i<mbs; i++) { 840 rvals[0] = bs*(baij->rstart + i); 841 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 842 for (j=ai[i]; j<ai[i+1]; j++) { 843 col = (baij->cstart+aj[j])*bs; 844 for (k=0; k<bs; k++) { 845 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 846 col++; a += bs; 847 } 848 } 849 } 850 /* copy over the B part */ 851 Bloc = (Mat_SeqBAIJ*)baij->B->data; 852 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 853 for (i=0; i<mbs; i++) { 854 rvals[0] = bs*(baij->rstart + i); 855 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 856 for (j=ai[i]; j<ai[i+1]; j++) { 857 col = baij->garray[aj[j]]*bs; 858 for (k=0; k<bs; k++) { 859 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 860 col++; a += bs; 861 } 862 } 863 } 864 ierr = PetscFree(rvals);CHKERRQ(ierr); 865 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 866 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 867 /* 868 Everyone has to call to draw the matrix since the graphics waits are 869 synchronized across all processors that share the PetscDraw object 870 */ 871 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 872 if (!rank) { 873 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 874 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 875 } 876 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 877 ierr = MatDestroy(A);CHKERRQ(ierr); 878 } 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "MatView_MPISBAIJ" 884 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 885 { 886 int ierr; 887 PetscTruth isascii,isdraw,issocket,isbinary; 888 889 PetscFunctionBegin; 890 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 891 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 892 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 893 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 894 if (isascii || isdraw || issocket || isbinary) { 895 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 896 } else { 897 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 898 } 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatDestroy_MPISBAIJ" 904 int MatDestroy_MPISBAIJ(Mat mat) 905 { 906 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 907 int ierr; 908 909 PetscFunctionBegin; 910 #if defined(PETSC_USE_LOG) 911 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 912 #endif 913 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 914 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 915 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 916 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 917 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 918 #if defined (PETSC_USE_CTABLE) 919 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 920 #else 921 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 922 #endif 923 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 924 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 925 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 926 if (baij->slvec0) { 927 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 928 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 929 } 930 if (baij->slvec1) { 931 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 932 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 933 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 934 } 935 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 936 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 937 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 938 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 939 #if defined(PETSC_USE_MAT_SINGLE) 940 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 941 #endif 942 ierr = PetscFree(baij);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 #undef __FUNCT__ 947 #define __FUNCT__ "MatMult_MPISBAIJ" 948 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 949 { 950 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 951 int ierr,nt,mbs=a->mbs,bs=a->bs; 952 PetscScalar *x,*from,zero=0.0; 953 954 PetscFunctionBegin; 955 /* 956 PetscSynchronizedPrintf(A->comm," _1comm is called ...\n"); 957 PetscSynchronizedFlush(A->comm); 958 */ 959 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 960 if (nt != A->n) { 961 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 962 } 963 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 964 if (nt != A->m) { 965 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 966 } 967 968 /* diagonal part */ 969 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 970 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 971 972 /* subdiagonal part */ 973 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 974 975 /* copy x into the vec slvec0 */ 976 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 977 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 978 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 979 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 980 981 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 982 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 983 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 984 985 /* supperdiagonal part */ 986 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 987 988 PetscFunctionReturn(0); 989 } 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 993 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 994 { 995 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 996 int ierr,nt; 997 998 PetscFunctionBegin; 999 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1000 if (nt != A->n) { 1001 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1002 } 1003 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1004 if (nt != A->m) { 1005 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1006 } 1007 1008 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1009 /* do diagonal part */ 1010 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1011 /* do supperdiagonal part */ 1012 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1013 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1014 /* do subdiagonal part */ 1015 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1016 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1017 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1018 1019 PetscFunctionReturn(0); 1020 } 1021 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1024 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1025 { 1026 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1027 int ierr,mbs=a->mbs,bs=a->bs; 1028 PetscScalar *x,*from,zero=0.0; 1029 1030 PetscFunctionBegin; 1031 /* 1032 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1033 PetscSynchronizedFlush(A->comm); 1034 */ 1035 /* diagonal part */ 1036 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1037 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1038 1039 /* subdiagonal part */ 1040 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1041 1042 /* copy x into the vec slvec0 */ 1043 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1044 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1045 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1046 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1047 1048 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1049 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1050 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1051 1052 /* supperdiagonal part */ 1053 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1054 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1060 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1061 { 1062 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1063 int ierr; 1064 1065 PetscFunctionBegin; 1066 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1067 /* do diagonal part */ 1068 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1069 /* do supperdiagonal part */ 1070 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1071 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1072 1073 /* do subdiagonal part */ 1074 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1075 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1076 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1077 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1083 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1084 { 1085 PetscFunctionBegin; 1086 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1087 /* PetscFunctionReturn(0); */ 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1092 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1093 { 1094 PetscFunctionBegin; 1095 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1096 /* PetscFunctionReturn(0); */ 1097 } 1098 1099 /* 1100 This only works correctly for square matrices where the subblock A->A is the 1101 diagonal block 1102 */ 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1105 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1106 { 1107 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1108 int ierr; 1109 1110 PetscFunctionBegin; 1111 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1112 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatScale_MPISBAIJ" 1118 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1119 { 1120 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1121 int ierr; 1122 1123 PetscFunctionBegin; 1124 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1125 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1131 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1132 { 1133 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1134 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1135 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1136 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1137 int *cmap,*idx_p,cstart = mat->cstart; 1138 1139 PetscFunctionBegin; 1140 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1141 mat->getrowactive = PETSC_TRUE; 1142 1143 if (!mat->rowvalues && (idx || v)) { 1144 /* 1145 allocate enough space to hold information from the longest row. 1146 */ 1147 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1148 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1149 int max = 1,mbs = mat->mbs,tmp; 1150 for (i=0; i<mbs; i++) { 1151 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1152 if (max < tmp) { max = tmp; } 1153 } 1154 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1155 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1156 } 1157 1158 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1159 lrow = row - brstart; /* local row index */ 1160 1161 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1162 if (!v) {pvA = 0; pvB = 0;} 1163 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1164 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1165 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1166 nztot = nzA + nzB; 1167 1168 cmap = mat->garray; 1169 if (v || idx) { 1170 if (nztot) { 1171 /* Sort by increasing column numbers, assuming A and B already sorted */ 1172 int imark = -1; 1173 if (v) { 1174 *v = v_p = mat->rowvalues; 1175 for (i=0; i<nzB; i++) { 1176 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1177 else break; 1178 } 1179 imark = i; 1180 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1181 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1182 } 1183 if (idx) { 1184 *idx = idx_p = mat->rowindices; 1185 if (imark > -1) { 1186 for (i=0; i<imark; i++) { 1187 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1188 } 1189 } else { 1190 for (i=0; i<nzB; i++) { 1191 if (cmap[cworkB[i]/bs] < cstart) 1192 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1193 else break; 1194 } 1195 imark = i; 1196 } 1197 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1198 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1199 } 1200 } else { 1201 if (idx) *idx = 0; 1202 if (v) *v = 0; 1203 } 1204 } 1205 *nz = nztot; 1206 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1207 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1213 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1214 { 1215 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1216 1217 PetscFunctionBegin; 1218 if (baij->getrowactive == PETSC_FALSE) { 1219 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1220 } 1221 baij->getrowactive = PETSC_FALSE; 1222 PetscFunctionReturn(0); 1223 } 1224 1225 #undef __FUNCT__ 1226 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1227 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1228 { 1229 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1230 1231 PetscFunctionBegin; 1232 *bs = baij->bs; 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1238 int MatZeroEntries_MPISBAIJ(Mat A) 1239 { 1240 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1241 int ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1245 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1251 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1252 { 1253 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1254 Mat A = a->A,B = a->B; 1255 int ierr; 1256 PetscReal isend[5],irecv[5]; 1257 1258 PetscFunctionBegin; 1259 info->block_size = (PetscReal)a->bs; 1260 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1261 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1262 isend[3] = info->memory; isend[4] = info->mallocs; 1263 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1264 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1265 isend[3] += info->memory; isend[4] += info->mallocs; 1266 if (flag == MAT_LOCAL) { 1267 info->nz_used = isend[0]; 1268 info->nz_allocated = isend[1]; 1269 info->nz_unneeded = isend[2]; 1270 info->memory = isend[3]; 1271 info->mallocs = isend[4]; 1272 } else if (flag == MAT_GLOBAL_MAX) { 1273 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1274 info->nz_used = irecv[0]; 1275 info->nz_allocated = irecv[1]; 1276 info->nz_unneeded = irecv[2]; 1277 info->memory = irecv[3]; 1278 info->mallocs = irecv[4]; 1279 } else if (flag == MAT_GLOBAL_SUM) { 1280 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1281 info->nz_used = irecv[0]; 1282 info->nz_allocated = irecv[1]; 1283 info->nz_unneeded = irecv[2]; 1284 info->memory = irecv[3]; 1285 info->mallocs = irecv[4]; 1286 } else { 1287 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1288 } 1289 info->rows_global = (PetscReal)A->M; 1290 info->columns_global = (PetscReal)A->N; 1291 info->rows_local = (PetscReal)A->m; 1292 info->columns_local = (PetscReal)A->N; 1293 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1294 info->fill_ratio_needed = 0; 1295 info->factor_mallocs = 0; 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1301 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1302 { 1303 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1304 int ierr; 1305 1306 PetscFunctionBegin; 1307 switch (op) { 1308 case MAT_NO_NEW_NONZERO_LOCATIONS: 1309 case MAT_YES_NEW_NONZERO_LOCATIONS: 1310 case MAT_COLUMNS_UNSORTED: 1311 case MAT_COLUMNS_SORTED: 1312 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1313 case MAT_KEEP_ZEROED_ROWS: 1314 case MAT_NEW_NONZERO_LOCATION_ERR: 1315 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1316 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1317 break; 1318 case MAT_ROW_ORIENTED: 1319 a->roworiented = PETSC_TRUE; 1320 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1321 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1322 break; 1323 case MAT_ROWS_SORTED: 1324 case MAT_ROWS_UNSORTED: 1325 case MAT_YES_NEW_DIAGONALS: 1326 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1327 break; 1328 case MAT_COLUMN_ORIENTED: 1329 a->roworiented = PETSC_FALSE; 1330 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1331 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1332 break; 1333 case MAT_IGNORE_OFF_PROC_ENTRIES: 1334 a->donotstash = PETSC_TRUE; 1335 break; 1336 case MAT_NO_NEW_DIAGONALS: 1337 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1338 case MAT_USE_HASH_TABLE: 1339 a->ht_flag = PETSC_TRUE; 1340 break; 1341 case MAT_NOT_SYMMETRIC: 1342 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1343 case MAT_HERMITIAN: 1344 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1345 case MAT_SYMMETRIC: 1346 case MAT_STRUCTURALLY_SYMMETRIC: 1347 case MAT_NOT_HERMITIAN: 1348 case MAT_SYMMETRY_ETERNAL: 1349 case MAT_NOT_SYMMETRY_ETERNAL: 1350 break; 1351 default: 1352 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1353 } 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1359 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1360 { 1361 int ierr; 1362 PetscFunctionBegin; 1363 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1369 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1370 { 1371 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1372 Mat a = baij->A,b = baij->B; 1373 int ierr,s1,s2,s3; 1374 1375 PetscFunctionBegin; 1376 if (ll != rr) { 1377 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1378 } 1379 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1380 if (rr) { 1381 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1382 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1383 /* Overlap communication with computation. */ 1384 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1385 /*} if (ll) { */ 1386 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1387 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1388 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1389 /* } */ 1390 /* scale the diagonal block */ 1391 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1392 1393 /* if (rr) { */ 1394 /* Do a scatter end and then right scale the off-diagonal block */ 1395 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1396 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1397 } 1398 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1404 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag) 1405 { 1406 PetscFunctionBegin; 1407 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1412 int MatPrintHelp_MPISBAIJ(Mat A) 1413 { 1414 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1415 MPI_Comm comm = A->comm; 1416 static int called = 0; 1417 int ierr; 1418 1419 PetscFunctionBegin; 1420 if (!a->rank) { 1421 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1422 } 1423 if (called) {PetscFunctionReturn(0);} else called = 1; 1424 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1425 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1431 int MatSetUnfactored_MPISBAIJ(Mat A) 1432 { 1433 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1434 int ierr; 1435 1436 PetscFunctionBegin; 1437 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1442 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatEqual_MPISBAIJ" 1445 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1446 { 1447 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1448 Mat a,b,c,d; 1449 PetscTruth flg; 1450 int ierr; 1451 1452 PetscFunctionBegin; 1453 a = matA->A; b = matA->B; 1454 c = matB->A; d = matB->B; 1455 1456 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1457 if (flg == PETSC_TRUE) { 1458 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1459 } 1460 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1466 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1467 { 1468 int ierr; 1469 1470 PetscFunctionBegin; 1471 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } 1474 /* -------------------------------------------------------------------*/ 1475 static struct _MatOps MatOps_Values = { 1476 MatSetValues_MPISBAIJ, 1477 MatGetRow_MPISBAIJ, 1478 MatRestoreRow_MPISBAIJ, 1479 MatMult_MPISBAIJ, 1480 /* 4*/ MatMultAdd_MPISBAIJ, 1481 MatMultTranspose_MPISBAIJ, 1482 MatMultTransposeAdd_MPISBAIJ, 1483 0, 1484 0, 1485 0, 1486 /*10*/ 0, 1487 0, 1488 0, 1489 MatRelax_MPISBAIJ, 1490 MatTranspose_MPISBAIJ, 1491 /*15*/ MatGetInfo_MPISBAIJ, 1492 MatEqual_MPISBAIJ, 1493 MatGetDiagonal_MPISBAIJ, 1494 MatDiagonalScale_MPISBAIJ, 1495 MatNorm_MPISBAIJ, 1496 /*20*/ MatAssemblyBegin_MPISBAIJ, 1497 MatAssemblyEnd_MPISBAIJ, 1498 0, 1499 MatSetOption_MPISBAIJ, 1500 MatZeroEntries_MPISBAIJ, 1501 /*25*/ MatZeroRows_MPISBAIJ, 1502 0, 1503 0, 1504 0, 1505 0, 1506 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1507 0, 1508 0, 1509 0, 1510 0, 1511 /*35*/ MatDuplicate_MPISBAIJ, 1512 0, 1513 0, 1514 0, 1515 0, 1516 /*40*/ 0, 1517 0, 1518 0, 1519 MatGetValues_MPISBAIJ, 1520 0, 1521 /*45*/ MatPrintHelp_MPISBAIJ, 1522 MatScale_MPISBAIJ, 1523 0, 1524 0, 1525 0, 1526 /*50*/ MatGetBlockSize_MPISBAIJ, 1527 0, 1528 0, 1529 0, 1530 0, 1531 /*55*/ 0, 1532 0, 1533 MatSetUnfactored_MPISBAIJ, 1534 0, 1535 MatSetValuesBlocked_MPISBAIJ, 1536 /*60*/ 0, 1537 0, 1538 0, 1539 MatGetPetscMaps_Petsc, 1540 0, 1541 /*65*/ 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 /*70*/ MatGetRowMax_MPISBAIJ, 1547 0, 1548 0, 1549 0, 1550 0, 1551 /*75*/ 0, 1552 0, 1553 0, 1554 0, 1555 0, 1556 /*80*/ 0, 1557 0, 1558 0, 1559 0, 1560 /*85*/ MatLoad_MPISBAIJ 1561 }; 1562 1563 1564 EXTERN_C_BEGIN 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1567 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1568 { 1569 PetscFunctionBegin; 1570 *a = ((Mat_MPISBAIJ *)A->data)->A; 1571 *iscopy = PETSC_FALSE; 1572 PetscFunctionReturn(0); 1573 } 1574 EXTERN_C_END 1575 1576 EXTERN_C_BEGIN 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1579 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1580 { 1581 Mat_MPISBAIJ *b; 1582 int ierr,i,mbs,Mbs; 1583 1584 PetscFunctionBegin; 1585 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1586 1587 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1588 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1589 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1590 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1591 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1592 if (d_nnz) { 1593 for (i=0; i<B->m/bs; i++) { 1594 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1595 } 1596 } 1597 if (o_nnz) { 1598 for (i=0; i<B->m/bs; i++) { 1599 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1600 } 1601 } 1602 B->preallocated = PETSC_TRUE; 1603 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1604 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1605 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1606 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1607 1608 b = (Mat_MPISBAIJ*)B->data; 1609 mbs = B->m/bs; 1610 Mbs = B->M/bs; 1611 if (mbs*bs != B->m) { 1612 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1613 } 1614 1615 b->bs = bs; 1616 b->bs2 = bs*bs; 1617 b->mbs = mbs; 1618 b->nbs = mbs; 1619 b->Mbs = Mbs; 1620 b->Nbs = Mbs; 1621 1622 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1623 b->rowners[0] = 0; 1624 for (i=2; i<=b->size; i++) { 1625 b->rowners[i] += b->rowners[i-1]; 1626 } 1627 b->rstart = b->rowners[b->rank]; 1628 b->rend = b->rowners[b->rank+1]; 1629 b->cstart = b->rstart; 1630 b->cend = b->rend; 1631 for (i=0; i<=b->size; i++) { 1632 b->rowners_bs[i] = b->rowners[i]*bs; 1633 } 1634 b->rstart_bs = b-> rstart*bs; 1635 b->rend_bs = b->rend*bs; 1636 1637 b->cstart_bs = b->cstart*bs; 1638 b->cend_bs = b->cend*bs; 1639 1640 1641 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1642 PetscLogObjectParent(B,b->A); 1643 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1644 PetscLogObjectParent(B,b->B); 1645 1646 /* build cache for off array entries formed */ 1647 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1648 1649 PetscFunctionReturn(0); 1650 } 1651 EXTERN_C_END 1652 1653 /*MC 1654 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1655 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1656 1657 Options Database Keys: 1658 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1659 1660 Level: beginner 1661 1662 .seealso: MatCreateMPISBAIJ 1663 M*/ 1664 1665 EXTERN_C_BEGIN 1666 #undef __FUNCT__ 1667 #define __FUNCT__ "MatCreate_MPISBAIJ" 1668 int MatCreate_MPISBAIJ(Mat B) 1669 { 1670 Mat_MPISBAIJ *b; 1671 int ierr; 1672 PetscTruth flg; 1673 1674 PetscFunctionBegin; 1675 1676 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1677 B->data = (void*)b; 1678 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1679 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1680 1681 B->ops->destroy = MatDestroy_MPISBAIJ; 1682 B->ops->view = MatView_MPISBAIJ; 1683 B->mapping = 0; 1684 B->factor = 0; 1685 B->assembled = PETSC_FALSE; 1686 1687 B->insertmode = NOT_SET_VALUES; 1688 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1689 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1690 1691 /* build local table of row and column ownerships */ 1692 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1693 b->cowners = b->rowners + b->size + 2; 1694 b->rowners_bs = b->cowners + b->size + 2; 1695 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1696 1697 /* build cache for off array entries formed */ 1698 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1699 b->donotstash = PETSC_FALSE; 1700 b->colmap = PETSC_NULL; 1701 b->garray = PETSC_NULL; 1702 b->roworiented = PETSC_TRUE; 1703 1704 #if defined(PETSC_USE_MAT_SINGLE) 1705 /* stuff for MatSetValues_XXX in single precision */ 1706 b->setvalueslen = 0; 1707 b->setvaluescopy = PETSC_NULL; 1708 #endif 1709 1710 /* stuff used in block assembly */ 1711 b->barray = 0; 1712 1713 /* stuff used for matrix vector multiply */ 1714 b->lvec = 0; 1715 b->Mvctx = 0; 1716 b->slvec0 = 0; 1717 b->slvec0b = 0; 1718 b->slvec1 = 0; 1719 b->slvec1a = 0; 1720 b->slvec1b = 0; 1721 b->sMvctx = 0; 1722 1723 /* stuff for MatGetRow() */ 1724 b->rowindices = 0; 1725 b->rowvalues = 0; 1726 b->getrowactive = PETSC_FALSE; 1727 1728 /* hash table stuff */ 1729 b->ht = 0; 1730 b->hd = 0; 1731 b->ht_size = 0; 1732 b->ht_flag = PETSC_FALSE; 1733 b->ht_fact = 0; 1734 b->ht_total_ct = 0; 1735 b->ht_insert_ct = 0; 1736 1737 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1738 if (flg) { 1739 PetscReal fact = 1.39; 1740 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1741 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1742 if (fact <= 1.0) fact = 1.39; 1743 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1744 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1745 } 1746 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1747 "MatStoreValues_MPISBAIJ", 1748 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1749 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1750 "MatRetrieveValues_MPISBAIJ", 1751 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1752 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1753 "MatGetDiagonalBlock_MPISBAIJ", 1754 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1755 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1756 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1757 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1758 PetscFunctionReturn(0); 1759 } 1760 EXTERN_C_END 1761 1762 /*MC 1763 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1764 1765 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1766 and MATMPISBAIJ otherwise. 1767 1768 Options Database Keys: 1769 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1770 1771 Level: beginner 1772 1773 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1774 M*/ 1775 1776 EXTERN_C_BEGIN 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatCreate_SBAIJ" 1779 int MatCreate_SBAIJ(Mat A) { 1780 int ierr,size; 1781 1782 PetscFunctionBegin; 1783 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1784 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1785 if (size == 1) { 1786 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1787 } else { 1788 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1789 } 1790 PetscFunctionReturn(0); 1791 } 1792 EXTERN_C_END 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1796 /*@C 1797 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1798 the user should preallocate the matrix storage by setting the parameters 1799 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1800 performance can be increased by more than a factor of 50. 1801 1802 Collective on Mat 1803 1804 Input Parameters: 1805 + A - the matrix 1806 . bs - size of blockk 1807 . d_nz - number of block nonzeros per block row in diagonal portion of local 1808 submatrix (same for all local rows) 1809 . d_nnz - array containing the number of block nonzeros in the various block rows 1810 in the upper triangular and diagonal part of the in diagonal portion of the local 1811 (possibly different for each block row) or PETSC_NULL. You must leave room 1812 for the diagonal entry even if it is zero. 1813 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1814 submatrix (same for all local rows). 1815 - o_nnz - array containing the number of nonzeros in the various block rows of the 1816 off-diagonal portion of the local submatrix (possibly different for 1817 each block row) or PETSC_NULL. 1818 1819 1820 Options Database Keys: 1821 . -mat_no_unroll - uses code that does not unroll the loops in the 1822 block calculations (much slower) 1823 . -mat_block_size - size of the blocks to use 1824 1825 Notes: 1826 1827 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1828 than it must be used on all processors that share the object for that argument. 1829 1830 Storage Information: 1831 For a square global matrix we define each processor's diagonal portion 1832 to be its local rows and the corresponding columns (a square submatrix); 1833 each processor's off-diagonal portion encompasses the remainder of the 1834 local matrix (a rectangular submatrix). 1835 1836 The user can specify preallocated storage for the diagonal part of 1837 the local submatrix with either d_nz or d_nnz (not both). Set 1838 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1839 memory allocation. Likewise, specify preallocated storage for the 1840 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1841 1842 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1843 the figure below we depict these three local rows and all columns (0-11). 1844 1845 .vb 1846 0 1 2 3 4 5 6 7 8 9 10 11 1847 ------------------- 1848 row 3 | o o o d d d o o o o o o 1849 row 4 | o o o d d d o o o o o o 1850 row 5 | o o o d d d o o o o o o 1851 ------------------- 1852 .ve 1853 1854 Thus, any entries in the d locations are stored in the d (diagonal) 1855 submatrix, and any entries in the o locations are stored in the 1856 o (off-diagonal) submatrix. Note that the d matrix is stored in 1857 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1858 1859 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1860 plus the diagonal part of the d matrix, 1861 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1862 In general, for PDE problems in which most nonzeros are near the diagonal, 1863 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1864 or you will get TERRIBLE performance; see the users' manual chapter on 1865 matrices. 1866 1867 Level: intermediate 1868 1869 .keywords: matrix, block, aij, compressed row, sparse, parallel 1870 1871 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1872 @*/ 1873 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1874 { 1875 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 1876 1877 PetscFunctionBegin; 1878 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1879 if (f) { 1880 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1881 } 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatCreateMPISBAIJ" 1887 /*@C 1888 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1889 (block compressed row). For good matrix assembly performance 1890 the user should preallocate the matrix storage by setting the parameters 1891 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1892 performance can be increased by more than a factor of 50. 1893 1894 Collective on MPI_Comm 1895 1896 Input Parameters: 1897 + comm - MPI communicator 1898 . bs - size of blockk 1899 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1900 This value should be the same as the local size used in creating the 1901 y vector for the matrix-vector product y = Ax. 1902 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1903 This value should be the same as the local size used in creating the 1904 x vector for the matrix-vector product y = Ax. 1905 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1906 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1907 . d_nz - number of block nonzeros per block row in diagonal portion of local 1908 submatrix (same for all local rows) 1909 . d_nnz - array containing the number of block nonzeros in the various block rows 1910 in the upper triangular portion of the in diagonal portion of the local 1911 (possibly different for each block block row) or PETSC_NULL. 1912 You must leave room for the diagonal entry even if it is zero. 1913 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1914 submatrix (same for all local rows). 1915 - o_nnz - array containing the number of nonzeros in the various block rows of the 1916 off-diagonal portion of the local submatrix (possibly different for 1917 each block row) or PETSC_NULL. 1918 1919 Output Parameter: 1920 . A - the matrix 1921 1922 Options Database Keys: 1923 . -mat_no_unroll - uses code that does not unroll the loops in the 1924 block calculations (much slower) 1925 . -mat_block_size - size of the blocks to use 1926 . -mat_mpi - use the parallel matrix data structures even on one processor 1927 (defaults to using SeqBAIJ format on one processor) 1928 1929 Notes: 1930 The user MUST specify either the local or global matrix dimensions 1931 (possibly both). 1932 1933 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1934 than it must be used on all processors that share the object for that argument. 1935 1936 Storage Information: 1937 For a square global matrix we define each processor's diagonal portion 1938 to be its local rows and the corresponding columns (a square submatrix); 1939 each processor's off-diagonal portion encompasses the remainder of the 1940 local matrix (a rectangular submatrix). 1941 1942 The user can specify preallocated storage for the diagonal part of 1943 the local submatrix with either d_nz or d_nnz (not both). Set 1944 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1945 memory allocation. Likewise, specify preallocated storage for the 1946 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1947 1948 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1949 the figure below we depict these three local rows and all columns (0-11). 1950 1951 .vb 1952 0 1 2 3 4 5 6 7 8 9 10 11 1953 ------------------- 1954 row 3 | o o o d d d o o o o o o 1955 row 4 | o o o d d d o o o o o o 1956 row 5 | o o o d d d o o o o o o 1957 ------------------- 1958 .ve 1959 1960 Thus, any entries in the d locations are stored in the d (diagonal) 1961 submatrix, and any entries in the o locations are stored in the 1962 o (off-diagonal) submatrix. Note that the d matrix is stored in 1963 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1964 1965 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1966 plus the diagonal part of the d matrix, 1967 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1968 In general, for PDE problems in which most nonzeros are near the diagonal, 1969 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1970 or you will get TERRIBLE performance; see the users' manual chapter on 1971 matrices. 1972 1973 Level: intermediate 1974 1975 .keywords: matrix, block, aij, compressed row, sparse, parallel 1976 1977 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1978 @*/ 1979 1980 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 1981 { 1982 int ierr,size; 1983 1984 PetscFunctionBegin; 1985 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1986 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1987 if (size > 1) { 1988 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1989 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1990 } else { 1991 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1992 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1993 } 1994 PetscFunctionReturn(0); 1995 } 1996 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2000 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2001 { 2002 Mat mat; 2003 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2004 int ierr,len=0; 2005 2006 PetscFunctionBegin; 2007 *newmat = 0; 2008 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2009 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 2010 mat->preallocated = PETSC_TRUE; 2011 a = (Mat_MPISBAIJ*)mat->data; 2012 a->bs = oldmat->bs; 2013 a->bs2 = oldmat->bs2; 2014 a->mbs = oldmat->mbs; 2015 a->nbs = oldmat->nbs; 2016 a->Mbs = oldmat->Mbs; 2017 a->Nbs = oldmat->Nbs; 2018 2019 a->rstart = oldmat->rstart; 2020 a->rend = oldmat->rend; 2021 a->cstart = oldmat->cstart; 2022 a->cend = oldmat->cend; 2023 a->size = oldmat->size; 2024 a->rank = oldmat->rank; 2025 a->donotstash = oldmat->donotstash; 2026 a->roworiented = oldmat->roworiented; 2027 a->rowindices = 0; 2028 a->rowvalues = 0; 2029 a->getrowactive = PETSC_FALSE; 2030 a->barray = 0; 2031 a->rstart_bs = oldmat->rstart_bs; 2032 a->rend_bs = oldmat->rend_bs; 2033 a->cstart_bs = oldmat->cstart_bs; 2034 a->cend_bs = oldmat->cend_bs; 2035 2036 /* hash table stuff */ 2037 a->ht = 0; 2038 a->hd = 0; 2039 a->ht_size = 0; 2040 a->ht_flag = oldmat->ht_flag; 2041 a->ht_fact = oldmat->ht_fact; 2042 a->ht_total_ct = 0; 2043 a->ht_insert_ct = 0; 2044 2045 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 2046 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2047 a->cowners = a->rowners + a->size + 2; 2048 a->rowners_bs = a->cowners + a->size + 2; 2049 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2050 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2051 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2052 if (oldmat->colmap) { 2053 #if defined (PETSC_USE_CTABLE) 2054 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2055 #else 2056 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2057 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2058 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2059 #endif 2060 } else a->colmap = 0; 2061 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2062 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2063 PetscLogObjectMemory(mat,len*sizeof(int)); 2064 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2065 } else a->garray = 0; 2066 2067 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2068 PetscLogObjectParent(mat,a->lvec); 2069 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2070 2071 PetscLogObjectParent(mat,a->Mvctx); 2072 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2073 PetscLogObjectParent(mat,a->A); 2074 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2075 PetscLogObjectParent(mat,a->B); 2076 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2077 *newmat = mat; 2078 PetscFunctionReturn(0); 2079 } 2080 2081 #include "petscsys.h" 2082 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "MatLoad_MPISBAIJ" 2085 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2086 { 2087 Mat A; 2088 int i,nz,ierr,j,rstart,rend,fd; 2089 PetscScalar *vals,*buf; 2090 MPI_Comm comm = ((PetscObject)viewer)->comm; 2091 MPI_Status status; 2092 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2093 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2094 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2095 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2096 int dcount,kmax,k,nzcount,tmp; 2097 2098 PetscFunctionBegin; 2099 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2100 2101 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2102 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2103 if (!rank) { 2104 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2105 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2106 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2107 if (header[3] < 0) { 2108 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2109 } 2110 } 2111 2112 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2113 M = header[1]; N = header[2]; 2114 2115 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2116 2117 /* 2118 This code adds extra rows to make sure the number of rows is 2119 divisible by the blocksize 2120 */ 2121 Mbs = M/bs; 2122 extra_rows = bs - M + bs*(Mbs); 2123 if (extra_rows == bs) extra_rows = 0; 2124 else Mbs++; 2125 if (extra_rows &&!rank) { 2126 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2127 } 2128 2129 /* determine ownership of all rows */ 2130 mbs = Mbs/size + ((Mbs % size) > rank); 2131 m = mbs*bs; 2132 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2133 browners = rowners + size + 1; 2134 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2135 rowners[0] = 0; 2136 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2137 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2138 rstart = rowners[rank]; 2139 rend = rowners[rank+1]; 2140 2141 /* distribute row lengths to all processors */ 2142 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2143 if (!rank) { 2144 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2145 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2146 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2147 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2148 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2149 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2150 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2151 } else { 2152 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2153 } 2154 2155 if (!rank) { /* procs[0] */ 2156 /* calculate the number of nonzeros on each processor */ 2157 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2158 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2159 for (i=0; i<size; i++) { 2160 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2161 procsnz[i] += rowlengths[j]; 2162 } 2163 } 2164 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2165 2166 /* determine max buffer needed and allocate it */ 2167 maxnz = 0; 2168 for (i=0; i<size; i++) { 2169 maxnz = PetscMax(maxnz,procsnz[i]); 2170 } 2171 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2172 2173 /* read in my part of the matrix column indices */ 2174 nz = procsnz[0]; 2175 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2176 mycols = ibuf; 2177 if (size == 1) nz -= extra_rows; 2178 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2179 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2180 2181 /* read in every ones (except the last) and ship off */ 2182 for (i=1; i<size-1; i++) { 2183 nz = procsnz[i]; 2184 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2185 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2186 } 2187 /* read in the stuff for the last proc */ 2188 if (size != 1) { 2189 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2190 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2191 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2192 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2193 } 2194 ierr = PetscFree(cols);CHKERRQ(ierr); 2195 } else { /* procs[i], i>0 */ 2196 /* determine buffer space needed for message */ 2197 nz = 0; 2198 for (i=0; i<m; i++) { 2199 nz += locrowlens[i]; 2200 } 2201 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2202 mycols = ibuf; 2203 /* receive message of column indices*/ 2204 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2205 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2206 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2207 } 2208 2209 /* loop over local rows, determining number of off diagonal entries */ 2210 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2211 odlens = dlens + (rend-rstart); 2212 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2213 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2214 masked1 = mask + Mbs; 2215 masked2 = masked1 + Mbs; 2216 rowcount = 0; nzcount = 0; 2217 for (i=0; i<mbs; i++) { 2218 dcount = 0; 2219 odcount = 0; 2220 for (j=0; j<bs; j++) { 2221 kmax = locrowlens[rowcount]; 2222 for (k=0; k<kmax; k++) { 2223 tmp = mycols[nzcount++]/bs; /* block col. index */ 2224 if (!mask[tmp]) { 2225 mask[tmp] = 1; 2226 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2227 else masked1[dcount++] = tmp; /* entry in diag portion */ 2228 } 2229 } 2230 rowcount++; 2231 } 2232 2233 dlens[i] = dcount; /* d_nzz[i] */ 2234 odlens[i] = odcount; /* o_nzz[i] */ 2235 2236 /* zero out the mask elements we set */ 2237 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2238 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2239 } 2240 2241 /* create our matrix */ 2242 ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr); 2243 ierr = MatSetType(A,type);CHKERRQ(ierr); 2244 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2245 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2246 2247 if (!rank) { 2248 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2249 /* read in my part of the matrix numerical values */ 2250 nz = procsnz[0]; 2251 vals = buf; 2252 mycols = ibuf; 2253 if (size == 1) nz -= extra_rows; 2254 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2255 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2256 2257 /* insert into matrix */ 2258 jj = rstart*bs; 2259 for (i=0; i<m; i++) { 2260 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2261 mycols += locrowlens[i]; 2262 vals += locrowlens[i]; 2263 jj++; 2264 } 2265 2266 /* read in other processors (except the last one) and ship out */ 2267 for (i=1; i<size-1; i++) { 2268 nz = procsnz[i]; 2269 vals = buf; 2270 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2271 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2272 } 2273 /* the last proc */ 2274 if (size != 1){ 2275 nz = procsnz[i] - extra_rows; 2276 vals = buf; 2277 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2278 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2279 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2280 } 2281 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2282 2283 } else { 2284 /* receive numeric values */ 2285 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2286 2287 /* receive message of values*/ 2288 vals = buf; 2289 mycols = ibuf; 2290 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2291 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2292 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2293 2294 /* insert into matrix */ 2295 jj = rstart*bs; 2296 for (i=0; i<m; i++) { 2297 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2298 mycols += locrowlens[i]; 2299 vals += locrowlens[i]; 2300 jj++; 2301 } 2302 } 2303 2304 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2305 ierr = PetscFree(buf);CHKERRQ(ierr); 2306 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2307 ierr = PetscFree(rowners);CHKERRQ(ierr); 2308 ierr = PetscFree(dlens);CHKERRQ(ierr); 2309 ierr = PetscFree(mask);CHKERRQ(ierr); 2310 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2311 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2312 *newmat = A; 2313 PetscFunctionReturn(0); 2314 } 2315 2316 #undef __FUNCT__ 2317 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2318 /*@ 2319 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2320 2321 Input Parameters: 2322 . mat - the matrix 2323 . fact - factor 2324 2325 Collective on Mat 2326 2327 Level: advanced 2328 2329 Notes: 2330 This can also be set by the command line option: -mat_use_hash_table fact 2331 2332 .keywords: matrix, hashtable, factor, HT 2333 2334 .seealso: MatSetOption() 2335 @*/ 2336 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2337 { 2338 PetscFunctionBegin; 2339 SETERRQ(1,"Function not yet written for SBAIJ format"); 2340 /* PetscFunctionReturn(0); */ 2341 } 2342 2343 #undef __FUNCT__ 2344 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2345 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2346 { 2347 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2348 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2349 PetscReal atmp; 2350 PetscReal *work,*svalues,*rvalues; 2351 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2352 int rank,size,*rowners_bs,dest,count,source; 2353 PetscScalar *va; 2354 MatScalar *ba; 2355 MPI_Status stat; 2356 2357 PetscFunctionBegin; 2358 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2359 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2360 2361 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2362 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2363 2364 bs = a->bs; 2365 mbs = a->mbs; 2366 Mbs = a->Mbs; 2367 ba = b->a; 2368 bi = b->i; 2369 bj = b->j; 2370 /* 2371 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2372 PetscSynchronizedFlush(A->comm); 2373 */ 2374 2375 /* find ownerships */ 2376 rowners_bs = a->rowners_bs; 2377 /* 2378 if (!rank){ 2379 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2380 } 2381 */ 2382 2383 /* each proc creates an array to be distributed */ 2384 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2385 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2386 2387 /* row_max for B */ 2388 if (rank != size-1){ 2389 for (i=0; i<mbs; i++) { 2390 ncols = bi[1] - bi[0]; bi++; 2391 brow = bs*i; 2392 for (j=0; j<ncols; j++){ 2393 bcol = bs*(*bj); 2394 for (kcol=0; kcol<bs; kcol++){ 2395 col = bcol + kcol; /* local col index */ 2396 col += rowners_bs[rank+1]; /* global col index */ 2397 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2398 for (krow=0; krow<bs; krow++){ 2399 atmp = PetscAbsScalar(*ba); ba++; 2400 row = brow + krow; /* local row index */ 2401 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2402 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2403 if (work[col] < atmp) work[col] = atmp; 2404 } 2405 } 2406 bj++; 2407 } 2408 } 2409 /* 2410 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2411 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2412 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2413 */ 2414 2415 /* send values to its owners */ 2416 for (dest=rank+1; dest<size; dest++){ 2417 svalues = work + rowners_bs[dest]; 2418 count = rowners_bs[dest+1]-rowners_bs[dest]; 2419 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2420 /* 2421 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2422 PetscSynchronizedFlush(A->comm); 2423 */ 2424 } 2425 } 2426 2427 /* receive values */ 2428 if (rank){ 2429 rvalues = work; 2430 count = rowners_bs[rank+1]-rowners_bs[rank]; 2431 for (source=0; source<rank; source++){ 2432 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2433 /* process values */ 2434 for (i=0; i<count; i++){ 2435 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2436 } 2437 /* 2438 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2439 PetscSynchronizedFlush(A->comm); 2440 */ 2441 } 2442 } 2443 2444 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2445 ierr = PetscFree(work);CHKERRQ(ierr); 2446 PetscFunctionReturn(0); 2447 } 2448 2449 #undef __FUNCT__ 2450 #define __FUNCT__ "MatRelax_MPISBAIJ" 2451 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2452 { 2453 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2454 int ierr,mbs=mat->mbs,bs=mat->bs; 2455 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2456 Vec bb1; 2457 2458 PetscFunctionBegin; 2459 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2460 if (bs > 1) 2461 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2462 2463 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2464 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2465 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2466 its--; 2467 } 2468 2469 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2470 while (its--){ 2471 2472 /* lower triangular part: slvec0b = - B^T*xx */ 2473 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2474 2475 /* copy xx into slvec0a */ 2476 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2477 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2478 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2479 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2480 2481 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2482 2483 /* copy bb into slvec1a */ 2484 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2485 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2486 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2487 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2488 2489 /* set slvec1b = 0 */ 2490 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2491 2492 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2493 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2494 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2495 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2496 2497 /* upper triangular part: bb1 = bb1 - B*x */ 2498 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2499 2500 /* local diagonal sweep */ 2501 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2502 } 2503 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2504 } else { 2505 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2506 } 2507 PetscFunctionReturn(0); 2508 } 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2512 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2513 { 2514 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2515 int ierr; 2516 PetscScalar mone=-1.0; 2517 Vec lvec1,bb1; 2518 2519 PetscFunctionBegin; 2520 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2521 if (mat->bs > 1) 2522 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2523 2524 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2525 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2526 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2527 its--; 2528 } 2529 2530 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2531 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2532 while (its--){ 2533 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2534 2535 /* lower diagonal part: bb1 = bb - B^T*xx */ 2536 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2537 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2538 2539 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2540 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2541 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2542 2543 /* upper diagonal part: bb1 = bb1 - B*x */ 2544 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2545 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2546 2547 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2548 2549 /* diagonal sweep */ 2550 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2551 } 2552 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2553 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2554 } else { 2555 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2556 } 2557 PetscFunctionReturn(0); 2558 } 2559 2560