1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 extern int MatSetUpMultiply_MPISBAIJ(Mat); 8 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern int DisAssemble_MPISBAIJ(Mat); 10 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 11 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 12 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatPrintHelp_SeqSBAIJ(Mat); 18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 19 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 20 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 21 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 22 23 /* UGLY, ugly, ugly 24 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 25 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 26 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 27 converts the entries into single precision and then calls ..._MatScalar() to put them 28 into the single precision data structures. 29 */ 30 #if defined(PETSC_USE_MAT_SINGLE) 31 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 32 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 33 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 34 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 35 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 36 #else 37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 38 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 40 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 42 #endif 43 44 EXTERN_C_BEGIN 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 47 int MatStoreValues_MPISBAIJ(Mat mat) 48 { 49 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 50 int ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 EXTERN_C_END 58 59 EXTERN_C_BEGIN 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 62 int MatRetrieveValues_MPISBAIJ(Mat mat) 63 { 64 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 65 int ierr; 66 67 PetscFunctionBegin; 68 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 69 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 EXTERN_C_END 73 74 75 #define CHUNKSIZE 10 76 77 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 78 { \ 79 \ 80 brow = row/bs; \ 81 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 82 rmax = aimax[brow]; nrow = ailen[brow]; \ 83 bcol = col/bs; \ 84 ridx = row % bs; cidx = col % bs; \ 85 low = 0; high = nrow; \ 86 while (high-low > 3) { \ 87 t = (low+high)/2; \ 88 if (rp[t] > bcol) high = t; \ 89 else low = t; \ 90 } \ 91 for (_i=low; _i<high; _i++) { \ 92 if (rp[_i] > bcol) break; \ 93 if (rp[_i] == bcol) { \ 94 bap = ap + bs2*_i + bs*cidx + ridx; \ 95 if (addv == ADD_VALUES) *bap += value; \ 96 else *bap = value; \ 97 goto a_noinsert; \ 98 } \ 99 } \ 100 if (a->nonew == 1) goto a_noinsert; \ 101 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 102 if (nrow >= rmax) { \ 103 /* there is no extra room in row, therefore enlarge */ \ 104 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 105 MatScalar *new_a; \ 106 \ 107 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 108 \ 109 /* malloc new storage space */ \ 110 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 111 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 112 new_j = (int*)(new_a + bs2*new_nz); \ 113 new_i = new_j + new_nz; \ 114 \ 115 /* copy over old data into new slots */ \ 116 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 117 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 118 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 119 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 120 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 121 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 122 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 123 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 124 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 125 /* free up old matrix storage */ \ 126 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 127 if (!a->singlemalloc) { \ 128 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 129 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 130 } \ 131 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 132 a->singlemalloc = PETSC_TRUE; \ 133 \ 134 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 135 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 136 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 137 a->maxnz += bs2*CHUNKSIZE; \ 138 a->reallocs++; \ 139 a->nz++; \ 140 } \ 141 N = nrow++ - 1; \ 142 /* shift up all the later entries in this row */ \ 143 for (ii=N; ii>=_i; ii--) { \ 144 rp[ii+1] = rp[ii]; \ 145 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 146 } \ 147 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 148 rp[_i] = bcol; \ 149 ap[bs2*_i + bs*cidx + ridx] = value; \ 150 a_noinsert:; \ 151 ailen[brow] = nrow; \ 152 } 153 #ifndef MatSetValues_SeqBAIJ_B_Private 154 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 155 { \ 156 brow = row/bs; \ 157 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 158 rmax = bimax[brow]; nrow = bilen[brow]; \ 159 bcol = col/bs; \ 160 ridx = row % bs; cidx = col % bs; \ 161 low = 0; high = nrow; \ 162 while (high-low > 3) { \ 163 t = (low+high)/2; \ 164 if (rp[t] > bcol) high = t; \ 165 else low = t; \ 166 } \ 167 for (_i=low; _i<high; _i++) { \ 168 if (rp[_i] > bcol) break; \ 169 if (rp[_i] == bcol) { \ 170 bap = ap + bs2*_i + bs*cidx + ridx; \ 171 if (addv == ADD_VALUES) *bap += value; \ 172 else *bap = value; \ 173 goto b_noinsert; \ 174 } \ 175 } \ 176 if (b->nonew == 1) goto b_noinsert; \ 177 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 178 if (nrow >= rmax) { \ 179 /* there is no extra room in row, therefore enlarge */ \ 180 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 181 MatScalar *new_a; \ 182 \ 183 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 184 \ 185 /* malloc new storage space */ \ 186 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 187 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 188 new_j = (int*)(new_a + bs2*new_nz); \ 189 new_i = new_j + new_nz; \ 190 \ 191 /* copy over old data into new slots */ \ 192 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 193 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 194 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 195 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 196 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 197 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 199 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 200 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 201 /* free up old matrix storage */ \ 202 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 203 if (!b->singlemalloc) { \ 204 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 205 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 206 } \ 207 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 208 b->singlemalloc = PETSC_TRUE; \ 209 \ 210 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 211 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 212 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 213 b->maxnz += bs2*CHUNKSIZE; \ 214 b->reallocs++; \ 215 b->nz++; \ 216 } \ 217 N = nrow++ - 1; \ 218 /* shift up all the later entries in this row */ \ 219 for (ii=N; ii>=_i; ii--) { \ 220 rp[ii+1] = rp[ii]; \ 221 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 222 } \ 223 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 224 rp[_i] = bcol; \ 225 ap[bs2*_i + bs*cidx + ridx] = value; \ 226 b_noinsert:; \ 227 bilen[brow] = nrow; \ 228 } 229 #endif 230 231 #if defined(PETSC_USE_MAT_SINGLE) 232 #undef __FUNCT__ 233 #define __FUNCT__ "MatSetValues_MPISBAIJ" 234 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 235 { 236 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 237 int ierr,i,N = m*n; 238 MatScalar *vsingle; 239 240 PetscFunctionBegin; 241 if (N > b->setvalueslen) { 242 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 243 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 244 b->setvalueslen = N; 245 } 246 vsingle = b->setvaluescopy; 247 248 for (i=0; i<N; i++) { 249 vsingle[i] = v[i]; 250 } 251 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 257 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 258 { 259 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 260 int ierr,i,N = m*n*b->bs2; 261 MatScalar *vsingle; 262 263 PetscFunctionBegin; 264 if (N > b->setvalueslen) { 265 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 266 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 267 b->setvalueslen = N; 268 } 269 vsingle = b->setvaluescopy; 270 for (i=0; i<N; i++) { 271 vsingle[i] = v[i]; 272 } 273 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 279 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 280 { 281 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 282 int ierr,i,N = m*n; 283 MatScalar *vsingle; 284 285 PetscFunctionBegin; 286 SETERRQ(1,"Function not yet written for SBAIJ format"); 287 /* PetscFunctionReturn(0); */ 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 292 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 293 { 294 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 295 int ierr,i,N = m*n*b->bs2; 296 MatScalar *vsingle; 297 298 PetscFunctionBegin; 299 SETERRQ(1,"Function not yet written for SBAIJ format"); 300 /* PetscFunctionReturn(0); */ 301 } 302 #endif 303 304 /* Only add/insert a(i,j) with i<=j (blocks). 305 Any a(i,j) with i>j input by user is ingored. 306 */ 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 309 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 310 { 311 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 312 MatScalar value; 313 PetscTruth roworiented = baij->roworiented; 314 int ierr,i,j,row,col; 315 int rstart_orig=baij->rstart_bs; 316 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 317 int cend_orig=baij->cend_bs,bs=baij->bs; 318 319 /* Some Variables required in the macro */ 320 Mat A = baij->A; 321 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 322 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 323 MatScalar *aa=a->a; 324 325 Mat B = baij->B; 326 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 327 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 328 MatScalar *ba=b->a; 329 330 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 331 int low,high,t,ridx,cidx,bs2=a->bs2; 332 MatScalar *ap,*bap; 333 334 /* for stash */ 335 int n_loc, *in_loc=0; 336 MatScalar *v_loc=0; 337 338 PetscFunctionBegin; 339 340 if(!baij->donotstash){ 341 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 342 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 343 } 344 345 for (i=0; i<m; i++) { 346 if (im[i] < 0) continue; 347 #if defined(PETSC_USE_BOPT_g) 348 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 349 #endif 350 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 351 row = im[i] - rstart_orig; /* local row index */ 352 for (j=0; j<n; j++) { 353 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 354 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 355 col = in[j] - cstart_orig; /* local col index */ 356 brow = row/bs; bcol = col/bs; 357 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 358 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 359 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 360 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 361 } else if (in[j] < 0) continue; 362 #if defined(PETSC_USE_BOPT_g) 363 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 364 #endif 365 else { /* off-diag entry (B) */ 366 if (mat->was_assembled) { 367 if (!baij->colmap) { 368 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 369 } 370 #if defined (PETSC_USE_CTABLE) 371 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 372 col = col - 1; 373 #else 374 col = baij->colmap[in[j]/bs] - 1; 375 #endif 376 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 377 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 378 col = in[j]; 379 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 380 B = baij->B; 381 b = (Mat_SeqBAIJ*)(B)->data; 382 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 383 ba=b->a; 384 } else col += in[j]%bs; 385 } else col = in[j]; 386 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 387 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 388 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 389 } 390 } 391 } else { /* off processor entry */ 392 if (!baij->donotstash) { 393 n_loc = 0; 394 for (j=0; j<n; j++){ 395 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 396 in_loc[n_loc] = in[j]; 397 if (roworiented) { 398 v_loc[n_loc] = v[i*n+j]; 399 } else { 400 v_loc[n_loc] = v[j*m+i]; 401 } 402 n_loc++; 403 } 404 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 405 } 406 } 407 } 408 409 if(!baij->donotstash){ 410 ierr = PetscFree(in_loc);CHKERRQ(ierr); 411 ierr = PetscFree(v_loc);CHKERRQ(ierr); 412 } 413 PetscFunctionReturn(0); 414 } 415 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 418 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 419 { 420 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 421 const MatScalar *value; 422 MatScalar *barray=baij->barray; 423 PetscTruth roworiented = baij->roworiented; 424 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 425 int rend=baij->rend,cstart=baij->cstart,stepval; 426 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 427 428 PetscFunctionBegin; 429 if(!barray) { 430 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 431 baij->barray = barray; 432 } 433 434 if (roworiented) { 435 stepval = (n-1)*bs; 436 } else { 437 stepval = (m-1)*bs; 438 } 439 for (i=0; i<m; i++) { 440 if (im[i] < 0) continue; 441 #if defined(PETSC_USE_BOPT_g) 442 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 443 #endif 444 if (im[i] >= rstart && im[i] < rend) { 445 row = im[i] - rstart; 446 for (j=0; j<n; j++) { 447 /* If NumCol = 1 then a copy is not required */ 448 if ((roworiented) && (n == 1)) { 449 barray = (MatScalar*) v + i*bs2; 450 } else if((!roworiented) && (m == 1)) { 451 barray = (MatScalar*) v + j*bs2; 452 } else { /* Here a copy is required */ 453 if (roworiented) { 454 value = v + i*(stepval+bs)*bs + j*bs; 455 } else { 456 value = v + j*(stepval+bs)*bs + i*bs; 457 } 458 for (ii=0; ii<bs; ii++,value+=stepval) { 459 for (jj=0; jj<bs; jj++) { 460 *barray++ = *value++; 461 } 462 } 463 barray -=bs2; 464 } 465 466 if (in[j] >= cstart && in[j] < cend){ 467 col = in[j] - cstart; 468 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 469 } 470 else if (in[j] < 0) continue; 471 #if defined(PETSC_USE_BOPT_g) 472 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 473 #endif 474 else { 475 if (mat->was_assembled) { 476 if (!baij->colmap) { 477 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 478 } 479 480 #if defined(PETSC_USE_BOPT_g) 481 #if defined (PETSC_USE_CTABLE) 482 { int data; 483 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 484 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 485 } 486 #else 487 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 488 #endif 489 #endif 490 #if defined (PETSC_USE_CTABLE) 491 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 492 col = (col - 1)/bs; 493 #else 494 col = (baij->colmap[in[j]] - 1)/bs; 495 #endif 496 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 497 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 498 col = in[j]; 499 } 500 } 501 else col = in[j]; 502 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 503 } 504 } 505 } else { 506 if (!baij->donotstash) { 507 if (roworiented) { 508 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 509 } else { 510 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 511 } 512 } 513 } 514 } 515 PetscFunctionReturn(0); 516 } 517 518 #define HASH_KEY 0.6180339887 519 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 520 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 521 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 522 #undef __FUNCT__ 523 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 524 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 525 { 526 PetscFunctionBegin; 527 SETERRQ(1,"Function not yet written for SBAIJ format"); 528 /* PetscFunctionReturn(0); */ 529 } 530 531 #undef __FUNCT__ 532 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 533 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 534 { 535 PetscFunctionBegin; 536 SETERRQ(1,"Function not yet written for SBAIJ format"); 537 /* PetscFunctionReturn(0); */ 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatGetValues_MPISBAIJ" 542 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 543 { 544 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 545 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 546 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 547 548 PetscFunctionBegin; 549 for (i=0; i<m; i++) { 550 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 551 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 552 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 553 row = idxm[i] - bsrstart; 554 for (j=0; j<n; j++) { 555 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]); 556 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 557 if (idxn[j] >= bscstart && idxn[j] < bscend){ 558 col = idxn[j] - bscstart; 559 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 560 } else { 561 if (!baij->colmap) { 562 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 563 } 564 #if defined (PETSC_USE_CTABLE) 565 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 566 data --; 567 #else 568 data = baij->colmap[idxn[j]/bs]-1; 569 #endif 570 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 571 else { 572 col = data + idxn[j]%bs; 573 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 574 } 575 } 576 } 577 } else { 578 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 579 } 580 } 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatNorm_MPISBAIJ" 586 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 587 { 588 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 589 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 590 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 591 int ierr; 592 PetscReal sum[2],*lnorm2; 593 594 PetscFunctionBegin; 595 if (baij->size == 1) { 596 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 597 } else { 598 if (type == NORM_FROBENIUS) { 599 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 600 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 601 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 602 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 603 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 604 /* 605 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 606 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 607 */ 608 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 609 /* 610 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 611 PetscSynchronizedFlush(mat->comm); */ 612 613 *norm = sqrt(sum[0] + 2*sum[1]); 614 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 615 } else { 616 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 /* 623 Creates the hash table, and sets the table 624 This table is created only once. 625 If new entried need to be added to the matrix 626 then the hash table has to be destroyed and 627 recreated. 628 */ 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 631 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 632 { 633 PetscFunctionBegin; 634 SETERRQ(1,"Function not yet written for SBAIJ format"); 635 /* PetscFunctionReturn(0); */ 636 } 637 638 #undef __FUNCT__ 639 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 640 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 641 { 642 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 643 int ierr,nstash,reallocs; 644 InsertMode addv; 645 646 PetscFunctionBegin; 647 if (baij->donotstash) { 648 PetscFunctionReturn(0); 649 } 650 651 /* make sure all processors are either in INSERTMODE or ADDMODE */ 652 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 653 if (addv == (ADD_VALUES|INSERT_VALUES)) { 654 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 655 } 656 mat->insertmode = addv; /* in case this processor had no cache */ 657 658 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 659 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 660 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 661 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 662 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 663 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 669 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 670 { 671 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 672 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 673 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 674 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 675 int *row,*col,other_disassembled; 676 PetscTruth r1,r2,r3; 677 MatScalar *val; 678 InsertMode addv = mat->insertmode; 679 680 PetscFunctionBegin; 681 682 if (!baij->donotstash) { 683 while (1) { 684 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 685 /* 686 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 687 PetscSynchronizedFlush(mat->comm); 688 */ 689 if (!flg) break; 690 691 for (i=0; i<n;) { 692 /* Now identify the consecutive vals belonging to the same row */ 693 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 694 if (j < n) ncols = j-i; 695 else ncols = n-i; 696 /* Now assemble all these values with a single function call */ 697 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 698 i = j; 699 } 700 } 701 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 702 /* Now process the block-stash. Since the values are stashed column-oriented, 703 set the roworiented flag to column oriented, and after MatSetValues() 704 restore the original flags */ 705 r1 = baij->roworiented; 706 r2 = a->roworiented; 707 r3 = b->roworiented; 708 baij->roworiented = PETSC_FALSE; 709 a->roworiented = PETSC_FALSE; 710 b->roworiented = PETSC_FALSE; 711 while (1) { 712 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 713 if (!flg) break; 714 715 for (i=0; i<n;) { 716 /* Now identify the consecutive vals belonging to the same row */ 717 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 718 if (j < n) ncols = j-i; 719 else ncols = n-i; 720 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 721 i = j; 722 } 723 } 724 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 725 baij->roworiented = r1; 726 a->roworiented = r2; 727 b->roworiented = r3; 728 } 729 730 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 731 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 732 733 /* determine if any processor has disassembled, if so we must 734 also disassemble ourselfs, in order that we may reassemble. */ 735 /* 736 if nonzero structure of submatrix B cannot change then we know that 737 no processor disassembled thus we can skip this stuff 738 */ 739 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 740 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 741 if (mat->was_assembled && !other_disassembled) { 742 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 743 } 744 } 745 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 748 } 749 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 750 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 751 752 #if defined(PETSC_USE_BOPT_g) 753 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 754 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 755 baij->ht_total_ct = 0; 756 baij->ht_insert_ct = 0; 757 } 758 #endif 759 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 760 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 761 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 762 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 763 } 764 765 if (baij->rowvalues) { 766 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 767 baij->rowvalues = 0; 768 } 769 770 PetscFunctionReturn(0); 771 } 772 773 #undef __FUNCT__ 774 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 775 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 776 { 777 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 778 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 779 PetscTruth isascii,isdraw; 780 PetscViewer sviewer; 781 PetscViewerFormat format; 782 783 PetscFunctionBegin; 784 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 785 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 786 if (isascii) { 787 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 788 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 789 MatInfo info; 790 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 791 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 792 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 793 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 794 baij->bs,(int)info.memory);CHKERRQ(ierr); 795 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 796 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 797 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 798 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 799 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 800 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 801 PetscFunctionReturn(0); 802 } else if (format == PETSC_VIEWER_ASCII_INFO) { 803 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 804 PetscFunctionReturn(0); 805 } 806 } 807 808 if (isdraw) { 809 PetscDraw draw; 810 PetscTruth isnull; 811 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 812 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 813 } 814 815 if (size == 1) { 816 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 817 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 818 } else { 819 /* assemble the entire matrix onto first processor. */ 820 Mat A; 821 Mat_SeqSBAIJ *Aloc; 822 Mat_SeqBAIJ *Bloc; 823 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 824 MatScalar *a; 825 826 if (!rank) { 827 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 828 } else { 829 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 830 } 831 PetscLogObjectParent(mat,A); 832 833 /* copy over the A part */ 834 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 835 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 836 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 837 838 for (i=0; i<mbs; i++) { 839 rvals[0] = bs*(baij->rstart + i); 840 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 841 for (j=ai[i]; j<ai[i+1]; j++) { 842 col = (baij->cstart+aj[j])*bs; 843 for (k=0; k<bs; k++) { 844 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 845 col++; a += bs; 846 } 847 } 848 } 849 /* copy over the B part */ 850 Bloc = (Mat_SeqBAIJ*)baij->B->data; 851 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 852 for (i=0; i<mbs; i++) { 853 rvals[0] = bs*(baij->rstart + i); 854 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 855 for (j=ai[i]; j<ai[i+1]; j++) { 856 col = baij->garray[aj[j]]*bs; 857 for (k=0; k<bs; k++) { 858 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 859 col++; a += bs; 860 } 861 } 862 } 863 ierr = PetscFree(rvals);CHKERRQ(ierr); 864 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 865 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 866 /* 867 Everyone has to call to draw the matrix since the graphics waits are 868 synchronized across all processors that share the PetscDraw object 869 */ 870 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 871 if (!rank) { 872 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 873 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 874 } 875 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 876 ierr = MatDestroy(A);CHKERRQ(ierr); 877 } 878 PetscFunctionReturn(0); 879 } 880 881 #undef __FUNCT__ 882 #define __FUNCT__ "MatView_MPISBAIJ" 883 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 884 { 885 int ierr; 886 PetscTruth isascii,isdraw,issocket,isbinary; 887 888 PetscFunctionBegin; 889 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 890 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 891 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 892 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 893 if (isascii || isdraw || issocket || isbinary) { 894 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 895 } else { 896 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 897 } 898 PetscFunctionReturn(0); 899 } 900 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatDestroy_MPISBAIJ" 903 int MatDestroy_MPISBAIJ(Mat mat) 904 { 905 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 906 int ierr; 907 908 PetscFunctionBegin; 909 #if defined(PETSC_USE_LOG) 910 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 911 #endif 912 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 913 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 914 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 915 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 916 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 917 #if defined (PETSC_USE_CTABLE) 918 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 919 #else 920 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 921 #endif 922 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 923 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 924 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 925 if (baij->slvec0) { 926 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 927 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 928 } 929 if (baij->slvec1) { 930 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 931 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 932 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 933 } 934 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 935 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 936 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 937 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 938 #if defined(PETSC_USE_MAT_SINGLE) 939 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 940 #endif 941 ierr = PetscFree(baij);CHKERRQ(ierr); 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatMult_MPISBAIJ" 947 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 948 { 949 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 950 int ierr,nt,mbs=a->mbs,bs=a->bs; 951 PetscScalar *x,*from,zero=0.0; 952 953 PetscFunctionBegin; 954 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 955 if (nt != A->n) { 956 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 957 } 958 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 959 if (nt != A->m) { 960 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 961 } 962 963 /* diagonal part */ 964 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 965 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 966 967 /* subdiagonal part */ 968 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 969 970 /* copy x into the vec slvec0 */ 971 ierr = VecGetArrayFast(a->slvec0,&from);CHKERRQ(ierr); 972 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 973 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 974 ierr = VecRestoreArrayFast(a->slvec0,&from);CHKERRQ(ierr); 975 976 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 977 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 978 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 979 980 /* supperdiagonal part */ 981 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 982 983 PetscFunctionReturn(0); 984 } 985 986 #undef __FUNCT__ 987 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 988 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 989 { 990 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 991 int ierr,nt; 992 993 PetscFunctionBegin; 994 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 995 if (nt != A->n) { 996 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 997 } 998 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 999 if (nt != A->m) { 1000 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1001 } 1002 1003 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1004 /* do diagonal part */ 1005 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1006 /* do supperdiagonal part */ 1007 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1008 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1009 /* do subdiagonal part */ 1010 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1011 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1012 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1013 1014 PetscFunctionReturn(0); 1015 } 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1019 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1020 { 1021 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1022 int ierr,mbs=a->mbs,bs=a->bs; 1023 PetscScalar *x,*from,zero=0.0; 1024 1025 PetscFunctionBegin; 1026 /* 1027 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1028 PetscSynchronizedFlush(A->comm); 1029 */ 1030 /* diagonal part */ 1031 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1032 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1033 1034 /* subdiagonal part */ 1035 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1036 1037 /* copy x into the vec slvec0 */ 1038 ierr = VecGetArrayFast(a->slvec0,&from);CHKERRQ(ierr); 1039 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1040 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1041 ierr = VecRestoreArrayFast(a->slvec0,&from);CHKERRQ(ierr); 1042 1043 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1044 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1045 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1046 1047 /* supperdiagonal part */ 1048 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1049 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1055 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1056 { 1057 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1058 int ierr; 1059 1060 PetscFunctionBegin; 1061 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1062 /* do diagonal part */ 1063 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1064 /* do supperdiagonal part */ 1065 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1066 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1067 1068 /* do subdiagonal part */ 1069 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1070 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1071 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1072 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1078 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1079 { 1080 int ierr; 1081 1082 PetscFunctionBegin; 1083 ierr = MatMult(A,xx,yy);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1089 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1090 { 1091 int ierr; 1092 1093 PetscFunctionBegin; 1094 ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 /* 1099 This only works correctly for square matrices where the subblock A->A is the 1100 diagonal block 1101 */ 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1104 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1105 { 1106 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1107 int ierr; 1108 1109 PetscFunctionBegin; 1110 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1111 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatScale_MPISBAIJ" 1117 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1118 { 1119 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1120 int ierr; 1121 1122 PetscFunctionBegin; 1123 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1124 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1130 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1131 { 1132 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1133 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1134 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1135 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1136 int *cmap,*idx_p,cstart = mat->cstart; 1137 1138 PetscFunctionBegin; 1139 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1140 mat->getrowactive = PETSC_TRUE; 1141 1142 if (!mat->rowvalues && (idx || v)) { 1143 /* 1144 allocate enough space to hold information from the longest row. 1145 */ 1146 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1147 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1148 int max = 1,mbs = mat->mbs,tmp; 1149 for (i=0; i<mbs; i++) { 1150 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1151 if (max < tmp) { max = tmp; } 1152 } 1153 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1154 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1155 } 1156 1157 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1158 lrow = row - brstart; /* local row index */ 1159 1160 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1161 if (!v) {pvA = 0; pvB = 0;} 1162 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1163 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1164 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1165 nztot = nzA + nzB; 1166 1167 cmap = mat->garray; 1168 if (v || idx) { 1169 if (nztot) { 1170 /* Sort by increasing column numbers, assuming A and B already sorted */ 1171 int imark = -1; 1172 if (v) { 1173 *v = v_p = mat->rowvalues; 1174 for (i=0; i<nzB; i++) { 1175 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1176 else break; 1177 } 1178 imark = i; 1179 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1180 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1181 } 1182 if (idx) { 1183 *idx = idx_p = mat->rowindices; 1184 if (imark > -1) { 1185 for (i=0; i<imark; i++) { 1186 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1187 } 1188 } else { 1189 for (i=0; i<nzB; i++) { 1190 if (cmap[cworkB[i]/bs] < cstart) 1191 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1192 else break; 1193 } 1194 imark = i; 1195 } 1196 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1197 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1198 } 1199 } else { 1200 if (idx) *idx = 0; 1201 if (v) *v = 0; 1202 } 1203 } 1204 *nz = nztot; 1205 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1206 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1212 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1213 { 1214 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1215 1216 PetscFunctionBegin; 1217 if (baij->getrowactive == PETSC_FALSE) { 1218 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1219 } 1220 baij->getrowactive = PETSC_FALSE; 1221 PetscFunctionReturn(0); 1222 } 1223 1224 #undef __FUNCT__ 1225 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1226 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1227 { 1228 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1229 1230 PetscFunctionBegin; 1231 *bs = baij->bs; 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1237 int MatZeroEntries_MPISBAIJ(Mat A) 1238 { 1239 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1240 int ierr; 1241 1242 PetscFunctionBegin; 1243 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1244 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1250 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1251 { 1252 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1253 Mat A = a->A,B = a->B; 1254 int ierr; 1255 PetscReal isend[5],irecv[5]; 1256 1257 PetscFunctionBegin; 1258 info->block_size = (PetscReal)a->bs; 1259 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1260 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1261 isend[3] = info->memory; isend[4] = info->mallocs; 1262 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1263 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1264 isend[3] += info->memory; isend[4] += info->mallocs; 1265 if (flag == MAT_LOCAL) { 1266 info->nz_used = isend[0]; 1267 info->nz_allocated = isend[1]; 1268 info->nz_unneeded = isend[2]; 1269 info->memory = isend[3]; 1270 info->mallocs = isend[4]; 1271 } else if (flag == MAT_GLOBAL_MAX) { 1272 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1273 info->nz_used = irecv[0]; 1274 info->nz_allocated = irecv[1]; 1275 info->nz_unneeded = irecv[2]; 1276 info->memory = irecv[3]; 1277 info->mallocs = irecv[4]; 1278 } else if (flag == MAT_GLOBAL_SUM) { 1279 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1280 info->nz_used = irecv[0]; 1281 info->nz_allocated = irecv[1]; 1282 info->nz_unneeded = irecv[2]; 1283 info->memory = irecv[3]; 1284 info->mallocs = irecv[4]; 1285 } else { 1286 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1287 } 1288 info->rows_global = (PetscReal)A->M; 1289 info->columns_global = (PetscReal)A->N; 1290 info->rows_local = (PetscReal)A->m; 1291 info->columns_local = (PetscReal)A->N; 1292 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1293 info->fill_ratio_needed = 0; 1294 info->factor_mallocs = 0; 1295 PetscFunctionReturn(0); 1296 } 1297 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1300 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1301 { 1302 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1303 int ierr; 1304 1305 PetscFunctionBegin; 1306 switch (op) { 1307 case MAT_NO_NEW_NONZERO_LOCATIONS: 1308 case MAT_YES_NEW_NONZERO_LOCATIONS: 1309 case MAT_COLUMNS_UNSORTED: 1310 case MAT_COLUMNS_SORTED: 1311 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1312 case MAT_KEEP_ZEROED_ROWS: 1313 case MAT_NEW_NONZERO_LOCATION_ERR: 1314 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1315 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1316 break; 1317 case MAT_ROW_ORIENTED: 1318 a->roworiented = PETSC_TRUE; 1319 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1320 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1321 break; 1322 case MAT_ROWS_SORTED: 1323 case MAT_ROWS_UNSORTED: 1324 case MAT_YES_NEW_DIAGONALS: 1325 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1326 break; 1327 case MAT_COLUMN_ORIENTED: 1328 a->roworiented = PETSC_FALSE; 1329 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1330 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1331 break; 1332 case MAT_IGNORE_OFF_PROC_ENTRIES: 1333 a->donotstash = PETSC_TRUE; 1334 break; 1335 case MAT_NO_NEW_DIAGONALS: 1336 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1337 case MAT_USE_HASH_TABLE: 1338 a->ht_flag = PETSC_TRUE; 1339 break; 1340 case MAT_NOT_SYMMETRIC: 1341 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1342 case MAT_HERMITIAN: 1343 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1344 case MAT_SYMMETRIC: 1345 case MAT_STRUCTURALLY_SYMMETRIC: 1346 case MAT_NOT_HERMITIAN: 1347 case MAT_SYMMETRY_ETERNAL: 1348 case MAT_NOT_SYMMETRY_ETERNAL: 1349 break; 1350 default: 1351 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1352 } 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1358 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1359 { 1360 int ierr; 1361 PetscFunctionBegin; 1362 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1368 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1369 { 1370 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1371 Mat a = baij->A,b = baij->B; 1372 int ierr,s1,s2,s3; 1373 1374 PetscFunctionBegin; 1375 if (ll != rr) { 1376 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1377 } 1378 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1379 if (rr) { 1380 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1381 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1382 /* Overlap communication with computation. */ 1383 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1384 /*} if (ll) { */ 1385 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1386 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1387 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1388 /* } */ 1389 /* scale the diagonal block */ 1390 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1391 1392 /* if (rr) { */ 1393 /* Do a scatter end and then right scale the off-diagonal block */ 1394 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1395 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1396 } 1397 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1403 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag) 1404 { 1405 PetscFunctionBegin; 1406 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1411 int MatPrintHelp_MPISBAIJ(Mat A) 1412 { 1413 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1414 MPI_Comm comm = A->comm; 1415 static int called = 0; 1416 int ierr; 1417 1418 PetscFunctionBegin; 1419 if (!a->rank) { 1420 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1421 } 1422 if (called) {PetscFunctionReturn(0);} else called = 1; 1423 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1424 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1430 int MatSetUnfactored_MPISBAIJ(Mat A) 1431 { 1432 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1433 int ierr; 1434 1435 PetscFunctionBegin; 1436 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1441 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatEqual_MPISBAIJ" 1444 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1445 { 1446 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1447 Mat a,b,c,d; 1448 PetscTruth flg; 1449 int ierr; 1450 1451 PetscFunctionBegin; 1452 a = matA->A; b = matA->B; 1453 c = matB->A; d = matB->B; 1454 1455 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1456 if (flg == PETSC_TRUE) { 1457 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1458 } 1459 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1465 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1466 { 1467 int ierr; 1468 1469 PetscFunctionBegin; 1470 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1476 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1477 { 1478 int i,ierr; 1479 PetscTruth flg; 1480 1481 for (i=0; i<n; i++) { 1482 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1483 if (!flg) { 1484 SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1485 } 1486 } 1487 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1488 PetscFunctionReturn(0); 1489 } 1490 1491 1492 /* -------------------------------------------------------------------*/ 1493 static struct _MatOps MatOps_Values = { 1494 MatSetValues_MPISBAIJ, 1495 MatGetRow_MPISBAIJ, 1496 MatRestoreRow_MPISBAIJ, 1497 MatMult_MPISBAIJ, 1498 /* 4*/ MatMultAdd_MPISBAIJ, 1499 MatMultTranspose_MPISBAIJ, 1500 MatMultTransposeAdd_MPISBAIJ, 1501 0, 1502 0, 1503 0, 1504 /*10*/ 0, 1505 0, 1506 0, 1507 MatRelax_MPISBAIJ, 1508 MatTranspose_MPISBAIJ, 1509 /*15*/ MatGetInfo_MPISBAIJ, 1510 MatEqual_MPISBAIJ, 1511 MatGetDiagonal_MPISBAIJ, 1512 MatDiagonalScale_MPISBAIJ, 1513 MatNorm_MPISBAIJ, 1514 /*20*/ MatAssemblyBegin_MPISBAIJ, 1515 MatAssemblyEnd_MPISBAIJ, 1516 0, 1517 MatSetOption_MPISBAIJ, 1518 MatZeroEntries_MPISBAIJ, 1519 /*25*/ MatZeroRows_MPISBAIJ, 1520 0, 1521 0, 1522 0, 1523 0, 1524 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1525 0, 1526 0, 1527 0, 1528 0, 1529 /*35*/ MatDuplicate_MPISBAIJ, 1530 0, 1531 0, 1532 0, 1533 0, 1534 /*40*/ 0, 1535 MatGetSubMatrices_MPISBAIJ, 1536 0, 1537 MatGetValues_MPISBAIJ, 1538 0, 1539 /*45*/ MatPrintHelp_MPISBAIJ, 1540 MatScale_MPISBAIJ, 1541 0, 1542 0, 1543 0, 1544 /*50*/ MatGetBlockSize_MPISBAIJ, 1545 0, 1546 0, 1547 0, 1548 0, 1549 /*55*/ 0, 1550 0, 1551 MatSetUnfactored_MPISBAIJ, 1552 0, 1553 MatSetValuesBlocked_MPISBAIJ, 1554 /*60*/ 0, 1555 0, 1556 0, 1557 MatGetPetscMaps_Petsc, 1558 0, 1559 /*65*/ 0, 1560 0, 1561 0, 1562 0, 1563 0, 1564 /*70*/ MatGetRowMax_MPISBAIJ, 1565 0, 1566 0, 1567 0, 1568 0, 1569 /*75*/ 0, 1570 0, 1571 0, 1572 0, 1573 0, 1574 /*80*/ 0, 1575 0, 1576 0, 1577 0, 1578 /*85*/ MatLoad_MPISBAIJ 1579 }; 1580 1581 1582 EXTERN_C_BEGIN 1583 #undef __FUNCT__ 1584 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1585 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1586 { 1587 PetscFunctionBegin; 1588 *a = ((Mat_MPISBAIJ *)A->data)->A; 1589 *iscopy = PETSC_FALSE; 1590 PetscFunctionReturn(0); 1591 } 1592 EXTERN_C_END 1593 1594 EXTERN_C_BEGIN 1595 #undef __FUNCT__ 1596 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1597 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1598 { 1599 Mat_MPISBAIJ *b; 1600 int ierr,i,mbs,Mbs; 1601 1602 PetscFunctionBegin; 1603 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1604 1605 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1606 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1607 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1608 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1609 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1610 if (d_nnz) { 1611 for (i=0; i<B->m/bs; i++) { 1612 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1613 } 1614 } 1615 if (o_nnz) { 1616 for (i=0; i<B->m/bs; i++) { 1617 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1618 } 1619 } 1620 B->preallocated = PETSC_TRUE; 1621 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1622 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1623 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1624 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1625 1626 b = (Mat_MPISBAIJ*)B->data; 1627 mbs = B->m/bs; 1628 Mbs = B->M/bs; 1629 if (mbs*bs != B->m) { 1630 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1631 } 1632 1633 b->bs = bs; 1634 b->bs2 = bs*bs; 1635 b->mbs = mbs; 1636 b->nbs = mbs; 1637 b->Mbs = Mbs; 1638 b->Nbs = Mbs; 1639 1640 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1641 b->rowners[0] = 0; 1642 for (i=2; i<=b->size; i++) { 1643 b->rowners[i] += b->rowners[i-1]; 1644 } 1645 b->rstart = b->rowners[b->rank]; 1646 b->rend = b->rowners[b->rank+1]; 1647 b->cstart = b->rstart; 1648 b->cend = b->rend; 1649 for (i=0; i<=b->size; i++) { 1650 b->rowners_bs[i] = b->rowners[i]*bs; 1651 } 1652 b->rstart_bs = b-> rstart*bs; 1653 b->rend_bs = b->rend*bs; 1654 1655 b->cstart_bs = b->cstart*bs; 1656 b->cend_bs = b->cend*bs; 1657 1658 1659 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1660 PetscLogObjectParent(B,b->A); 1661 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1662 PetscLogObjectParent(B,b->B); 1663 1664 /* build cache for off array entries formed */ 1665 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1666 1667 PetscFunctionReturn(0); 1668 } 1669 EXTERN_C_END 1670 1671 /*MC 1672 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1673 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1674 1675 Options Database Keys: 1676 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1677 1678 Level: beginner 1679 1680 .seealso: MatCreateMPISBAIJ 1681 M*/ 1682 1683 EXTERN_C_BEGIN 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatCreate_MPISBAIJ" 1686 int MatCreate_MPISBAIJ(Mat B) 1687 { 1688 Mat_MPISBAIJ *b; 1689 int ierr; 1690 PetscTruth flg; 1691 1692 PetscFunctionBegin; 1693 1694 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1695 B->data = (void*)b; 1696 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1697 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1698 1699 B->ops->destroy = MatDestroy_MPISBAIJ; 1700 B->ops->view = MatView_MPISBAIJ; 1701 B->mapping = 0; 1702 B->factor = 0; 1703 B->assembled = PETSC_FALSE; 1704 1705 B->insertmode = NOT_SET_VALUES; 1706 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1707 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1708 1709 /* build local table of row and column ownerships */ 1710 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1711 b->cowners = b->rowners + b->size + 2; 1712 b->rowners_bs = b->cowners + b->size + 2; 1713 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1714 1715 /* build cache for off array entries formed */ 1716 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1717 b->donotstash = PETSC_FALSE; 1718 b->colmap = PETSC_NULL; 1719 b->garray = PETSC_NULL; 1720 b->roworiented = PETSC_TRUE; 1721 1722 #if defined(PETSC_USE_MAT_SINGLE) 1723 /* stuff for MatSetValues_XXX in single precision */ 1724 b->setvalueslen = 0; 1725 b->setvaluescopy = PETSC_NULL; 1726 #endif 1727 1728 /* stuff used in block assembly */ 1729 b->barray = 0; 1730 1731 /* stuff used for matrix vector multiply */ 1732 b->lvec = 0; 1733 b->Mvctx = 0; 1734 b->slvec0 = 0; 1735 b->slvec0b = 0; 1736 b->slvec1 = 0; 1737 b->slvec1a = 0; 1738 b->slvec1b = 0; 1739 b->sMvctx = 0; 1740 1741 /* stuff for MatGetRow() */ 1742 b->rowindices = 0; 1743 b->rowvalues = 0; 1744 b->getrowactive = PETSC_FALSE; 1745 1746 /* hash table stuff */ 1747 b->ht = 0; 1748 b->hd = 0; 1749 b->ht_size = 0; 1750 b->ht_flag = PETSC_FALSE; 1751 b->ht_fact = 0; 1752 b->ht_total_ct = 0; 1753 b->ht_insert_ct = 0; 1754 1755 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1756 if (flg) { 1757 PetscReal fact = 1.39; 1758 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1759 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1760 if (fact <= 1.0) fact = 1.39; 1761 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1762 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1763 } 1764 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1765 "MatStoreValues_MPISBAIJ", 1766 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1767 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1768 "MatRetrieveValues_MPISBAIJ", 1769 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1770 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1771 "MatGetDiagonalBlock_MPISBAIJ", 1772 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1773 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1774 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1775 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1776 PetscFunctionReturn(0); 1777 } 1778 EXTERN_C_END 1779 1780 /*MC 1781 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1782 1783 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1784 and MATMPISBAIJ otherwise. 1785 1786 Options Database Keys: 1787 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1788 1789 Level: beginner 1790 1791 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1792 M*/ 1793 1794 EXTERN_C_BEGIN 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "MatCreate_SBAIJ" 1797 int MatCreate_SBAIJ(Mat A) { 1798 int ierr,size; 1799 1800 PetscFunctionBegin; 1801 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1802 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1803 if (size == 1) { 1804 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1805 } else { 1806 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1807 } 1808 PetscFunctionReturn(0); 1809 } 1810 EXTERN_C_END 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1814 /*@C 1815 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1816 the user should preallocate the matrix storage by setting the parameters 1817 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1818 performance can be increased by more than a factor of 50. 1819 1820 Collective on Mat 1821 1822 Input Parameters: 1823 + A - the matrix 1824 . bs - size of blockk 1825 . d_nz - number of block nonzeros per block row in diagonal portion of local 1826 submatrix (same for all local rows) 1827 . d_nnz - array containing the number of block nonzeros in the various block rows 1828 in the upper triangular and diagonal part of the in diagonal portion of the local 1829 (possibly different for each block row) or PETSC_NULL. You must leave room 1830 for the diagonal entry even if it is zero. 1831 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1832 submatrix (same for all local rows). 1833 - o_nnz - array containing the number of nonzeros in the various block rows of the 1834 off-diagonal portion of the local submatrix (possibly different for 1835 each block row) or PETSC_NULL. 1836 1837 1838 Options Database Keys: 1839 . -mat_no_unroll - uses code that does not unroll the loops in the 1840 block calculations (much slower) 1841 . -mat_block_size - size of the blocks to use 1842 1843 Notes: 1844 1845 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1846 than it must be used on all processors that share the object for that argument. 1847 1848 Storage Information: 1849 For a square global matrix we define each processor's diagonal portion 1850 to be its local rows and the corresponding columns (a square submatrix); 1851 each processor's off-diagonal portion encompasses the remainder of the 1852 local matrix (a rectangular submatrix). 1853 1854 The user can specify preallocated storage for the diagonal part of 1855 the local submatrix with either d_nz or d_nnz (not both). Set 1856 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1857 memory allocation. Likewise, specify preallocated storage for the 1858 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1859 1860 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1861 the figure below we depict these three local rows and all columns (0-11). 1862 1863 .vb 1864 0 1 2 3 4 5 6 7 8 9 10 11 1865 ------------------- 1866 row 3 | o o o d d d o o o o o o 1867 row 4 | o o o d d d o o o o o o 1868 row 5 | o o o d d d o o o o o o 1869 ------------------- 1870 .ve 1871 1872 Thus, any entries in the d locations are stored in the d (diagonal) 1873 submatrix, and any entries in the o locations are stored in the 1874 o (off-diagonal) submatrix. Note that the d matrix is stored in 1875 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1876 1877 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1878 plus the diagonal part of the d matrix, 1879 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1880 In general, for PDE problems in which most nonzeros are near the diagonal, 1881 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1882 or you will get TERRIBLE performance; see the users' manual chapter on 1883 matrices. 1884 1885 Level: intermediate 1886 1887 .keywords: matrix, block, aij, compressed row, sparse, parallel 1888 1889 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1890 @*/ 1891 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1892 { 1893 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 1894 1895 PetscFunctionBegin; 1896 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1897 if (f) { 1898 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 1903 #undef __FUNCT__ 1904 #define __FUNCT__ "MatCreateMPISBAIJ" 1905 /*@C 1906 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1907 (block compressed row). For good matrix assembly performance 1908 the user should preallocate the matrix storage by setting the parameters 1909 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1910 performance can be increased by more than a factor of 50. 1911 1912 Collective on MPI_Comm 1913 1914 Input Parameters: 1915 + comm - MPI communicator 1916 . bs - size of blockk 1917 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1918 This value should be the same as the local size used in creating the 1919 y vector for the matrix-vector product y = Ax. 1920 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1921 This value should be the same as the local size used in creating the 1922 x vector for the matrix-vector product y = Ax. 1923 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1924 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1925 . d_nz - number of block nonzeros per block row in diagonal portion of local 1926 submatrix (same for all local rows) 1927 . d_nnz - array containing the number of block nonzeros in the various block rows 1928 in the upper triangular portion of the in diagonal portion of the local 1929 (possibly different for each block block row) or PETSC_NULL. 1930 You must leave room for the diagonal entry even if it is zero. 1931 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1932 submatrix (same for all local rows). 1933 - o_nnz - array containing the number of nonzeros in the various block rows of the 1934 off-diagonal portion of the local submatrix (possibly different for 1935 each block row) or PETSC_NULL. 1936 1937 Output Parameter: 1938 . A - the matrix 1939 1940 Options Database Keys: 1941 . -mat_no_unroll - uses code that does not unroll the loops in the 1942 block calculations (much slower) 1943 . -mat_block_size - size of the blocks to use 1944 . -mat_mpi - use the parallel matrix data structures even on one processor 1945 (defaults to using SeqBAIJ format on one processor) 1946 1947 Notes: 1948 The user MUST specify either the local or global matrix dimensions 1949 (possibly both). 1950 1951 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1952 than it must be used on all processors that share the object for that argument. 1953 1954 Storage Information: 1955 For a square global matrix we define each processor's diagonal portion 1956 to be its local rows and the corresponding columns (a square submatrix); 1957 each processor's off-diagonal portion encompasses the remainder of the 1958 local matrix (a rectangular submatrix). 1959 1960 The user can specify preallocated storage for the diagonal part of 1961 the local submatrix with either d_nz or d_nnz (not both). Set 1962 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1963 memory allocation. Likewise, specify preallocated storage for the 1964 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1965 1966 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1967 the figure below we depict these three local rows and all columns (0-11). 1968 1969 .vb 1970 0 1 2 3 4 5 6 7 8 9 10 11 1971 ------------------- 1972 row 3 | o o o d d d o o o o o o 1973 row 4 | o o o d d d o o o o o o 1974 row 5 | o o o d d d o o o o o o 1975 ------------------- 1976 .ve 1977 1978 Thus, any entries in the d locations are stored in the d (diagonal) 1979 submatrix, and any entries in the o locations are stored in the 1980 o (off-diagonal) submatrix. Note that the d matrix is stored in 1981 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1982 1983 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1984 plus the diagonal part of the d matrix, 1985 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1986 In general, for PDE problems in which most nonzeros are near the diagonal, 1987 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1988 or you will get TERRIBLE performance; see the users' manual chapter on 1989 matrices. 1990 1991 Level: intermediate 1992 1993 .keywords: matrix, block, aij, compressed row, sparse, parallel 1994 1995 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1996 @*/ 1997 1998 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 1999 { 2000 int ierr,size; 2001 2002 PetscFunctionBegin; 2003 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2004 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2005 if (size > 1) { 2006 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2007 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2008 } else { 2009 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2010 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2011 } 2012 PetscFunctionReturn(0); 2013 } 2014 2015 2016 #undef __FUNCT__ 2017 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2018 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2019 { 2020 Mat mat; 2021 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2022 int ierr,len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs; 2023 PetscScalar *array; 2024 2025 PetscFunctionBegin; 2026 *newmat = 0; 2027 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2028 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 2029 /* ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); -- cause error? */ 2030 mat->factor = matin->factor; 2031 mat->preallocated = PETSC_TRUE; 2032 mat->assembled = PETSC_TRUE; 2033 a = (Mat_MPISBAIJ*)mat->data; 2034 a->bs = oldmat->bs; 2035 a->bs2 = oldmat->bs2; 2036 a->mbs = oldmat->mbs; 2037 a->nbs = oldmat->nbs; 2038 a->Mbs = oldmat->Mbs; 2039 a->Nbs = oldmat->Nbs; 2040 2041 a->rstart = oldmat->rstart; 2042 a->rend = oldmat->rend; 2043 a->cstart = oldmat->cstart; 2044 a->cend = oldmat->cend; 2045 a->size = oldmat->size; 2046 a->rank = oldmat->rank; 2047 a->donotstash = oldmat->donotstash; 2048 a->roworiented = oldmat->roworiented; 2049 a->rowindices = 0; 2050 a->rowvalues = 0; 2051 a->getrowactive = PETSC_FALSE; 2052 a->barray = 0; 2053 a->rstart_bs = oldmat->rstart_bs; 2054 a->rend_bs = oldmat->rend_bs; 2055 a->cstart_bs = oldmat->cstart_bs; 2056 a->cend_bs = oldmat->cend_bs; 2057 2058 /* hash table stuff */ 2059 a->ht = 0; 2060 a->hd = 0; 2061 a->ht_size = 0; 2062 a->ht_flag = oldmat->ht_flag; 2063 a->ht_fact = oldmat->ht_fact; 2064 a->ht_total_ct = 0; 2065 a->ht_insert_ct = 0; 2066 2067 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2068 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2069 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2070 if (oldmat->colmap) { 2071 #if defined (PETSC_USE_CTABLE) 2072 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2073 #else 2074 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2075 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2076 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2077 #endif 2078 } else a->colmap = 0; 2079 2080 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2081 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2082 PetscLogObjectMemory(mat,len*sizeof(int)); 2083 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2084 } else a->garray = 0; 2085 2086 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2087 PetscLogObjectParent(mat,a->lvec); 2088 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2089 PetscLogObjectParent(mat,a->Mvctx); 2090 2091 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2092 PetscLogObjectParent(mat,a->slvec0); 2093 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2094 PetscLogObjectParent(mat,a->slvec1); 2095 2096 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2097 ierr = VecGetArrayFast(a->slvec1,&array);CHKERRQ(ierr); 2098 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2099 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2100 ierr = VecRestoreArrayFast(a->slvec1,&array);CHKERRQ(ierr); 2101 ierr = VecGetArrayFast(a->slvec0,&array);CHKERRQ(ierr); 2102 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2103 ierr = VecRestoreArrayFast(a->slvec0,&array);CHKERRQ(ierr); 2104 PetscLogObjectParent(mat,a->slvec0); 2105 PetscLogObjectParent(mat,a->slvec1); 2106 PetscLogObjectParent(mat,a->slvec0b); 2107 PetscLogObjectParent(mat,a->slvec1a); 2108 PetscLogObjectParent(mat,a->slvec1b); 2109 2110 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2111 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2112 a->sMvctx = oldmat->sMvctx; 2113 PetscLogObjectParent(mat,a->sMvctx); 2114 2115 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2116 PetscLogObjectParent(mat,a->A); 2117 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2118 PetscLogObjectParent(mat,a->B); 2119 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2120 *newmat = mat; 2121 PetscFunctionReturn(0); 2122 } 2123 2124 #include "petscsys.h" 2125 2126 #undef __FUNCT__ 2127 #define __FUNCT__ "MatLoad_MPISBAIJ" 2128 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2129 { 2130 Mat A; 2131 int i,nz,ierr,j,rstart,rend,fd; 2132 PetscScalar *vals,*buf; 2133 MPI_Comm comm = ((PetscObject)viewer)->comm; 2134 MPI_Status status; 2135 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2136 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2137 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2138 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2139 int dcount,kmax,k,nzcount,tmp; 2140 2141 PetscFunctionBegin; 2142 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2143 2144 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2145 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2146 if (!rank) { 2147 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2148 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2149 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2150 if (header[3] < 0) { 2151 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2152 } 2153 } 2154 2155 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2156 M = header[1]; N = header[2]; 2157 2158 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2159 2160 /* 2161 This code adds extra rows to make sure the number of rows is 2162 divisible by the blocksize 2163 */ 2164 Mbs = M/bs; 2165 extra_rows = bs - M + bs*(Mbs); 2166 if (extra_rows == bs) extra_rows = 0; 2167 else Mbs++; 2168 if (extra_rows &&!rank) { 2169 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2170 } 2171 2172 /* determine ownership of all rows */ 2173 mbs = Mbs/size + ((Mbs % size) > rank); 2174 m = mbs*bs; 2175 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2176 browners = rowners + size + 1; 2177 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2178 rowners[0] = 0; 2179 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2180 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2181 rstart = rowners[rank]; 2182 rend = rowners[rank+1]; 2183 2184 /* distribute row lengths to all processors */ 2185 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2186 if (!rank) { 2187 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2188 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2189 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2190 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2191 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2192 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2193 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2194 } else { 2195 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2196 } 2197 2198 if (!rank) { /* procs[0] */ 2199 /* calculate the number of nonzeros on each processor */ 2200 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2201 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2202 for (i=0; i<size; i++) { 2203 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2204 procsnz[i] += rowlengths[j]; 2205 } 2206 } 2207 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2208 2209 /* determine max buffer needed and allocate it */ 2210 maxnz = 0; 2211 for (i=0; i<size; i++) { 2212 maxnz = PetscMax(maxnz,procsnz[i]); 2213 } 2214 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2215 2216 /* read in my part of the matrix column indices */ 2217 nz = procsnz[0]; 2218 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2219 mycols = ibuf; 2220 if (size == 1) nz -= extra_rows; 2221 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2222 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2223 2224 /* read in every ones (except the last) and ship off */ 2225 for (i=1; i<size-1; i++) { 2226 nz = procsnz[i]; 2227 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2228 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2229 } 2230 /* read in the stuff for the last proc */ 2231 if (size != 1) { 2232 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2233 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2234 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2235 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2236 } 2237 ierr = PetscFree(cols);CHKERRQ(ierr); 2238 } else { /* procs[i], i>0 */ 2239 /* determine buffer space needed for message */ 2240 nz = 0; 2241 for (i=0; i<m; i++) { 2242 nz += locrowlens[i]; 2243 } 2244 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2245 mycols = ibuf; 2246 /* receive message of column indices*/ 2247 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2248 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2249 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2250 } 2251 2252 /* loop over local rows, determining number of off diagonal entries */ 2253 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2254 odlens = dlens + (rend-rstart); 2255 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2256 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2257 masked1 = mask + Mbs; 2258 masked2 = masked1 + Mbs; 2259 rowcount = 0; nzcount = 0; 2260 for (i=0; i<mbs; i++) { 2261 dcount = 0; 2262 odcount = 0; 2263 for (j=0; j<bs; j++) { 2264 kmax = locrowlens[rowcount]; 2265 for (k=0; k<kmax; k++) { 2266 tmp = mycols[nzcount++]/bs; /* block col. index */ 2267 if (!mask[tmp]) { 2268 mask[tmp] = 1; 2269 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2270 else masked1[dcount++] = tmp; /* entry in diag portion */ 2271 } 2272 } 2273 rowcount++; 2274 } 2275 2276 dlens[i] = dcount; /* d_nzz[i] */ 2277 odlens[i] = odcount; /* o_nzz[i] */ 2278 2279 /* zero out the mask elements we set */ 2280 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2281 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2282 } 2283 2284 /* create our matrix */ 2285 ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr); 2286 ierr = MatSetType(A,type);CHKERRQ(ierr); 2287 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2288 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2289 2290 if (!rank) { 2291 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2292 /* read in my part of the matrix numerical values */ 2293 nz = procsnz[0]; 2294 vals = buf; 2295 mycols = ibuf; 2296 if (size == 1) nz -= extra_rows; 2297 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2298 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2299 2300 /* insert into matrix */ 2301 jj = rstart*bs; 2302 for (i=0; i<m; i++) { 2303 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2304 mycols += locrowlens[i]; 2305 vals += locrowlens[i]; 2306 jj++; 2307 } 2308 2309 /* read in other processors (except the last one) and ship out */ 2310 for (i=1; i<size-1; i++) { 2311 nz = procsnz[i]; 2312 vals = buf; 2313 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2314 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2315 } 2316 /* the last proc */ 2317 if (size != 1){ 2318 nz = procsnz[i] - extra_rows; 2319 vals = buf; 2320 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2321 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2322 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2323 } 2324 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2325 2326 } else { 2327 /* receive numeric values */ 2328 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2329 2330 /* receive message of values*/ 2331 vals = buf; 2332 mycols = ibuf; 2333 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2334 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2335 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2336 2337 /* insert into matrix */ 2338 jj = rstart*bs; 2339 for (i=0; i<m; i++) { 2340 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2341 mycols += locrowlens[i]; 2342 vals += locrowlens[i]; 2343 jj++; 2344 } 2345 } 2346 2347 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2348 ierr = PetscFree(buf);CHKERRQ(ierr); 2349 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2350 ierr = PetscFree(rowners);CHKERRQ(ierr); 2351 ierr = PetscFree(dlens);CHKERRQ(ierr); 2352 ierr = PetscFree(mask);CHKERRQ(ierr); 2353 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2354 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2355 *newmat = A; 2356 PetscFunctionReturn(0); 2357 } 2358 2359 #undef __FUNCT__ 2360 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2361 /*@ 2362 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2363 2364 Input Parameters: 2365 . mat - the matrix 2366 . fact - factor 2367 2368 Collective on Mat 2369 2370 Level: advanced 2371 2372 Notes: 2373 This can also be set by the command line option: -mat_use_hash_table fact 2374 2375 .keywords: matrix, hashtable, factor, HT 2376 2377 .seealso: MatSetOption() 2378 @*/ 2379 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2380 { 2381 PetscFunctionBegin; 2382 SETERRQ(1,"Function not yet written for SBAIJ format"); 2383 /* PetscFunctionReturn(0); */ 2384 } 2385 2386 #undef __FUNCT__ 2387 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2388 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2389 { 2390 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2391 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2392 PetscReal atmp; 2393 PetscReal *work,*svalues,*rvalues; 2394 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2395 int rank,size,*rowners_bs,dest,count,source; 2396 PetscScalar *va; 2397 MatScalar *ba; 2398 MPI_Status stat; 2399 2400 PetscFunctionBegin; 2401 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2402 ierr = VecGetArrayFast(v,&va);CHKERRQ(ierr); 2403 2404 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2405 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2406 2407 bs = a->bs; 2408 mbs = a->mbs; 2409 Mbs = a->Mbs; 2410 ba = b->a; 2411 bi = b->i; 2412 bj = b->j; 2413 /* 2414 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2415 PetscSynchronizedFlush(A->comm); 2416 */ 2417 2418 /* find ownerships */ 2419 rowners_bs = a->rowners_bs; 2420 /* 2421 if (!rank){ 2422 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2423 } 2424 */ 2425 2426 /* each proc creates an array to be distributed */ 2427 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2428 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2429 2430 /* row_max for B */ 2431 if (rank != size-1){ 2432 for (i=0; i<mbs; i++) { 2433 ncols = bi[1] - bi[0]; bi++; 2434 brow = bs*i; 2435 for (j=0; j<ncols; j++){ 2436 bcol = bs*(*bj); 2437 for (kcol=0; kcol<bs; kcol++){ 2438 col = bcol + kcol; /* local col index */ 2439 col += rowners_bs[rank+1]; /* global col index */ 2440 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2441 for (krow=0; krow<bs; krow++){ 2442 atmp = PetscAbsScalar(*ba); ba++; 2443 row = brow + krow; /* local row index */ 2444 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2445 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2446 if (work[col] < atmp) work[col] = atmp; 2447 } 2448 } 2449 bj++; 2450 } 2451 } 2452 /* 2453 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2454 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2455 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2456 */ 2457 2458 /* send values to its owners */ 2459 for (dest=rank+1; dest<size; dest++){ 2460 svalues = work + rowners_bs[dest]; 2461 count = rowners_bs[dest+1]-rowners_bs[dest]; 2462 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2463 /* 2464 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2465 PetscSynchronizedFlush(A->comm); 2466 */ 2467 } 2468 } 2469 2470 /* receive values */ 2471 if (rank){ 2472 rvalues = work; 2473 count = rowners_bs[rank+1]-rowners_bs[rank]; 2474 for (source=0; source<rank; source++){ 2475 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2476 /* process values */ 2477 for (i=0; i<count; i++){ 2478 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2479 } 2480 /* 2481 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2482 PetscSynchronizedFlush(A->comm); 2483 */ 2484 } 2485 } 2486 2487 ierr = VecRestoreArrayFast(v,&va);CHKERRQ(ierr); 2488 ierr = PetscFree(work);CHKERRQ(ierr); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNCT__ 2493 #define __FUNCT__ "MatRelax_MPISBAIJ" 2494 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2495 { 2496 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2497 int ierr,mbs=mat->mbs,bs=mat->bs; 2498 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2499 Vec bb1; 2500 2501 PetscFunctionBegin; 2502 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2503 if (bs > 1) 2504 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2505 2506 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2507 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2508 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2509 its--; 2510 } 2511 2512 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2513 while (its--){ 2514 2515 /* lower triangular part: slvec0b = - B^T*xx */ 2516 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2517 2518 /* copy xx into slvec0a */ 2519 ierr = VecGetArrayFast(mat->slvec0,&ptr);CHKERRQ(ierr); 2520 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 2521 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2522 ierr = VecRestoreArrayFast(mat->slvec0,&ptr);CHKERRQ(ierr); 2523 2524 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2525 2526 /* copy bb into slvec1a */ 2527 ierr = VecGetArrayFast(mat->slvec1,&ptr);CHKERRQ(ierr); 2528 ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr); 2529 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2530 ierr = VecRestoreArrayFast(mat->slvec1,&ptr);CHKERRQ(ierr); 2531 2532 /* set slvec1b = 0 */ 2533 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2534 2535 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2536 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 2537 ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr); 2538 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2539 2540 /* upper triangular part: bb1 = bb1 - B*x */ 2541 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2542 2543 /* local diagonal sweep */ 2544 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2545 } 2546 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2547 } else { 2548 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2549 } 2550 PetscFunctionReturn(0); 2551 } 2552 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2555 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2556 { 2557 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2558 int ierr; 2559 PetscScalar mone=-1.0; 2560 Vec lvec1,bb1; 2561 2562 PetscFunctionBegin; 2563 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2564 if (mat->bs > 1) 2565 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2566 2567 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2568 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2569 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2570 its--; 2571 } 2572 2573 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2574 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2575 while (its--){ 2576 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2577 2578 /* lower diagonal part: bb1 = bb - B^T*xx */ 2579 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2580 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2581 2582 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2583 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2584 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2585 2586 /* upper diagonal part: bb1 = bb1 - B*x */ 2587 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2588 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2589 2590 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2591 2592 /* diagonal sweep */ 2593 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2594 } 2595 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2596 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2597 } else { 2598 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2599 } 2600 PetscFunctionReturn(0); 2601 } 2602 2603