1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 18 extern int MatPrintHelp_SeqSBAIJ(Mat); 19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 int MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 int ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 int MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 int ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 103 if (nrow >= rmax) { \ 104 /* there is no extra room in row, therefore enlarge */ \ 105 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 106 MatScalar *new_a; \ 107 \ 108 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 109 \ 110 /* malloc new storage space */ \ 111 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 112 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 113 new_j = (int*)(new_a + bs2*new_nz); \ 114 new_i = new_j + new_nz; \ 115 \ 116 /* copy over old data into new slots */ \ 117 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 118 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 119 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 120 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 121 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 122 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 124 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 125 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 126 /* free up old matrix storage */ \ 127 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 128 if (!a->singlemalloc) { \ 129 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 130 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 131 } \ 132 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 133 a->singlemalloc = PETSC_TRUE; \ 134 \ 135 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 136 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 137 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 138 a->maxnz += bs2*CHUNKSIZE; \ 139 a->reallocs++; \ 140 a->nz++; \ 141 } \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 a_noinsert:; \ 152 ailen[brow] = nrow; \ 153 } 154 #ifndef MatSetValues_SeqBAIJ_B_Private 155 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 156 { \ 157 brow = row/bs; \ 158 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 159 rmax = bimax[brow]; nrow = bilen[brow]; \ 160 bcol = col/bs; \ 161 ridx = row % bs; cidx = col % bs; \ 162 low = 0; high = nrow; \ 163 while (high-low > 3) { \ 164 t = (low+high)/2; \ 165 if (rp[t] > bcol) high = t; \ 166 else low = t; \ 167 } \ 168 for (_i=low; _i<high; _i++) { \ 169 if (rp[_i] > bcol) break; \ 170 if (rp[_i] == bcol) { \ 171 bap = ap + bs2*_i + bs*cidx + ridx; \ 172 if (addv == ADD_VALUES) *bap += value; \ 173 else *bap = value; \ 174 goto b_noinsert; \ 175 } \ 176 } \ 177 if (b->nonew == 1) goto b_noinsert; \ 178 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 179 if (nrow >= rmax) { \ 180 /* there is no extra room in row, therefore enlarge */ \ 181 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 182 MatScalar *new_a; \ 183 \ 184 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 185 \ 186 /* malloc new storage space */ \ 187 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 188 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 189 new_j = (int*)(new_a + bs2*new_nz); \ 190 new_i = new_j + new_nz; \ 191 \ 192 /* copy over old data into new slots */ \ 193 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 194 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 195 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 196 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 197 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 198 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 199 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 200 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 201 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 202 /* free up old matrix storage */ \ 203 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 204 if (!b->singlemalloc) { \ 205 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 206 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 207 } \ 208 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 209 b->singlemalloc = PETSC_TRUE; \ 210 \ 211 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 212 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 213 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 214 b->maxnz += bs2*CHUNKSIZE; \ 215 b->reallocs++; \ 216 b->nz++; \ 217 } \ 218 N = nrow++ - 1; \ 219 /* shift up all the later entries in this row */ \ 220 for (ii=N; ii>=_i; ii--) { \ 221 rp[ii+1] = rp[ii]; \ 222 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 223 } \ 224 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 225 rp[_i] = bcol; \ 226 ap[bs2*_i + bs*cidx + ridx] = value; \ 227 b_noinsert:; \ 228 bilen[brow] = nrow; \ 229 } 230 #endif 231 232 #if defined(PETSC_USE_MAT_SINGLE) 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatSetValues_MPISBAIJ" 235 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 236 { 237 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 238 int ierr,i,N = m*n; 239 MatScalar *vsingle; 240 241 PetscFunctionBegin; 242 if (N > b->setvalueslen) { 243 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 244 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 245 b->setvalueslen = N; 246 } 247 vsingle = b->setvaluescopy; 248 249 for (i=0; i<N; i++) { 250 vsingle[i] = v[i]; 251 } 252 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 258 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 259 { 260 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 261 int ierr,i,N = m*n*b->bs2; 262 MatScalar *vsingle; 263 264 PetscFunctionBegin; 265 if (N > b->setvalueslen) { 266 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 267 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 268 b->setvalueslen = N; 269 } 270 vsingle = b->setvaluescopy; 271 for (i=0; i<N; i++) { 272 vsingle[i] = v[i]; 273 } 274 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 280 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 281 { 282 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 283 int ierr,i,N = m*n; 284 MatScalar *vsingle; 285 286 PetscFunctionBegin; 287 SETERRQ(1,"Function not yet written for SBAIJ format"); 288 /* PetscFunctionReturn(0); */ 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 293 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 294 { 295 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 296 int ierr,i,N = m*n*b->bs2; 297 MatScalar *vsingle; 298 299 PetscFunctionBegin; 300 SETERRQ(1,"Function not yet written for SBAIJ format"); 301 /* PetscFunctionReturn(0); */ 302 } 303 #endif 304 305 /* Only add/insert a(i,j) with i<=j (blocks). 306 Any a(i,j) with i>j input by user is ingored. 307 */ 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 310 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 311 { 312 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 313 MatScalar value; 314 PetscTruth roworiented = baij->roworiented; 315 int ierr,i,j,row,col; 316 int rstart_orig=baij->rstart_bs; 317 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 318 int cend_orig=baij->cend_bs,bs=baij->bs; 319 320 /* Some Variables required in the macro */ 321 Mat A = baij->A; 322 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 323 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 324 MatScalar *aa=a->a; 325 326 Mat B = baij->B; 327 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 328 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 329 MatScalar *ba=b->a; 330 331 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 332 int low,high,t,ridx,cidx,bs2=a->bs2; 333 MatScalar *ap,*bap; 334 335 /* for stash */ 336 int n_loc, *in_loc=0; 337 MatScalar *v_loc=0; 338 339 PetscFunctionBegin; 340 341 if(!baij->donotstash){ 342 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 343 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 344 } 345 346 for (i=0; i<m; i++) { 347 if (im[i] < 0) continue; 348 #if defined(PETSC_USE_BOPT_g) 349 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 350 #endif 351 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 352 row = im[i] - rstart_orig; /* local row index */ 353 for (j=0; j<n; j++) { 354 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 355 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 356 col = in[j] - cstart_orig; /* local col index */ 357 brow = row/bs; bcol = col/bs; 358 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 359 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 360 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 361 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 362 } else if (in[j] < 0) continue; 363 #if defined(PETSC_USE_BOPT_g) 364 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 365 #endif 366 else { /* off-diag entry (B) */ 367 if (mat->was_assembled) { 368 if (!baij->colmap) { 369 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 370 } 371 #if defined (PETSC_USE_CTABLE) 372 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 373 col = col - 1; 374 #else 375 col = baij->colmap[in[j]/bs] - 1; 376 #endif 377 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 378 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 379 col = in[j]; 380 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 381 B = baij->B; 382 b = (Mat_SeqBAIJ*)(B)->data; 383 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 384 ba=b->a; 385 } else col += in[j]%bs; 386 } else col = in[j]; 387 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 388 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 389 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 390 } 391 } 392 } else { /* off processor entry */ 393 if (!baij->donotstash) { 394 n_loc = 0; 395 for (j=0; j<n; j++){ 396 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 397 in_loc[n_loc] = in[j]; 398 if (roworiented) { 399 v_loc[n_loc] = v[i*n+j]; 400 } else { 401 v_loc[n_loc] = v[j*m+i]; 402 } 403 n_loc++; 404 } 405 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 406 } 407 } 408 } 409 410 if(!baij->donotstash){ 411 ierr = PetscFree(in_loc);CHKERRQ(ierr); 412 ierr = PetscFree(v_loc);CHKERRQ(ierr); 413 } 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 419 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 420 { 421 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 422 const MatScalar *value; 423 MatScalar *barray=baij->barray; 424 PetscTruth roworiented = baij->roworiented; 425 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 426 int rend=baij->rend,cstart=baij->cstart,stepval; 427 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 428 429 PetscFunctionBegin; 430 if(!barray) { 431 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 432 baij->barray = barray; 433 } 434 435 if (roworiented) { 436 stepval = (n-1)*bs; 437 } else { 438 stepval = (m-1)*bs; 439 } 440 for (i=0; i<m; i++) { 441 if (im[i] < 0) continue; 442 #if defined(PETSC_USE_BOPT_g) 443 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 444 #endif 445 if (im[i] >= rstart && im[i] < rend) { 446 row = im[i] - rstart; 447 for (j=0; j<n; j++) { 448 /* If NumCol = 1 then a copy is not required */ 449 if ((roworiented) && (n == 1)) { 450 barray = (MatScalar*) v + i*bs2; 451 } else if((!roworiented) && (m == 1)) { 452 barray = (MatScalar*) v + j*bs2; 453 } else { /* Here a copy is required */ 454 if (roworiented) { 455 value = v + i*(stepval+bs)*bs + j*bs; 456 } else { 457 value = v + j*(stepval+bs)*bs + i*bs; 458 } 459 for (ii=0; ii<bs; ii++,value+=stepval) { 460 for (jj=0; jj<bs; jj++) { 461 *barray++ = *value++; 462 } 463 } 464 barray -=bs2; 465 } 466 467 if (in[j] >= cstart && in[j] < cend){ 468 col = in[j] - cstart; 469 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 470 } 471 else if (in[j] < 0) continue; 472 #if defined(PETSC_USE_BOPT_g) 473 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 474 #endif 475 else { 476 if (mat->was_assembled) { 477 if (!baij->colmap) { 478 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 479 } 480 481 #if defined(PETSC_USE_BOPT_g) 482 #if defined (PETSC_USE_CTABLE) 483 { int data; 484 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 485 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 486 } 487 #else 488 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 489 #endif 490 #endif 491 #if defined (PETSC_USE_CTABLE) 492 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 493 col = (col - 1)/bs; 494 #else 495 col = (baij->colmap[in[j]] - 1)/bs; 496 #endif 497 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 498 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 499 col = in[j]; 500 } 501 } 502 else col = in[j]; 503 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 504 } 505 } 506 } else { 507 if (!baij->donotstash) { 508 if (roworiented) { 509 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 510 } else { 511 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 512 } 513 } 514 } 515 } 516 PetscFunctionReturn(0); 517 } 518 519 #define HASH_KEY 0.6180339887 520 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 521 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 522 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 525 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 526 { 527 PetscFunctionBegin; 528 SETERRQ(1,"Function not yet written for SBAIJ format"); 529 /* PetscFunctionReturn(0); */ 530 } 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 534 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 535 { 536 PetscFunctionBegin; 537 SETERRQ(1,"Function not yet written for SBAIJ format"); 538 /* PetscFunctionReturn(0); */ 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatGetValues_MPISBAIJ" 543 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 544 { 545 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 546 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 547 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 552 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 553 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 554 row = idxm[i] - bsrstart; 555 for (j=0; j<n; j++) { 556 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]); 557 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 558 if (idxn[j] >= bscstart && idxn[j] < bscend){ 559 col = idxn[j] - bscstart; 560 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 561 } else { 562 if (!baij->colmap) { 563 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 564 } 565 #if defined (PETSC_USE_CTABLE) 566 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 567 data --; 568 #else 569 data = baij->colmap[idxn[j]/bs]-1; 570 #endif 571 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 572 else { 573 col = data + idxn[j]%bs; 574 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 575 } 576 } 577 } 578 } else { 579 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 580 } 581 } 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatNorm_MPISBAIJ" 587 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 588 { 589 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 590 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 591 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 592 int ierr; 593 PetscReal sum[2],*lnorm2; 594 595 PetscFunctionBegin; 596 if (baij->size == 1) { 597 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 598 } else { 599 if (type == NORM_FROBENIUS) { 600 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 601 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 602 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 603 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 605 /* 606 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 607 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 608 */ 609 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 610 /* 611 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 612 PetscSynchronizedFlush(mat->comm); */ 613 614 *norm = sqrt(sum[0] + 2*sum[1]); 615 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 616 } else { 617 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 /* 624 Creates the hash table, and sets the table 625 This table is created only once. 626 If new entried need to be added to the matrix 627 then the hash table has to be destroyed and 628 recreated. 629 */ 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 632 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 633 { 634 PetscFunctionBegin; 635 SETERRQ(1,"Function not yet written for SBAIJ format"); 636 /* PetscFunctionReturn(0); */ 637 } 638 639 #undef __FUNCT__ 640 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 641 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 642 { 643 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 644 int ierr,nstash,reallocs; 645 InsertMode addv; 646 647 PetscFunctionBegin; 648 if (baij->donotstash) { 649 PetscFunctionReturn(0); 650 } 651 652 /* make sure all processors are either in INSERTMODE or ADDMODE */ 653 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 654 if (addv == (ADD_VALUES|INSERT_VALUES)) { 655 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 656 } 657 mat->insertmode = addv; /* in case this processor had no cache */ 658 659 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 660 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 663 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 664 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNCT__ 669 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 670 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 673 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 674 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 675 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 676 int *row,*col,other_disassembled; 677 PetscTruth r1,r2,r3; 678 MatScalar *val; 679 InsertMode addv = mat->insertmode; 680 681 PetscFunctionBegin; 682 683 if (!baij->donotstash) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 /* 687 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 688 PetscSynchronizedFlush(mat->comm); 689 */ 690 if (!flg) break; 691 692 for (i=0; i<n;) { 693 /* Now identify the consecutive vals belonging to the same row */ 694 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 695 if (j < n) ncols = j-i; 696 else ncols = n-i; 697 /* Now assemble all these values with a single function call */ 698 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 699 i = j; 700 } 701 } 702 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 703 /* Now process the block-stash. Since the values are stashed column-oriented, 704 set the roworiented flag to column oriented, and after MatSetValues() 705 restore the original flags */ 706 r1 = baij->roworiented; 707 r2 = a->roworiented; 708 r3 = b->roworiented; 709 baij->roworiented = PETSC_FALSE; 710 a->roworiented = PETSC_FALSE; 711 b->roworiented = PETSC_FALSE; 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n;) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 719 if (j < n) ncols = j-i; 720 else ncols = n-i; 721 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 722 i = j; 723 } 724 } 725 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 726 baij->roworiented = r1; 727 a->roworiented = r2; 728 b->roworiented = r3; 729 } 730 731 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 741 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 747 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 748 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 749 } 750 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 752 753 #if defined(PETSC_USE_BOPT_g) 754 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 755 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 756 baij->ht_total_ct = 0; 757 baij->ht_insert_ct = 0; 758 } 759 #endif 760 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 761 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 762 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 763 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 764 } 765 766 if (baij->rowvalues) { 767 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 768 baij->rowvalues = 0; 769 } 770 771 PetscFunctionReturn(0); 772 } 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 776 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 777 { 778 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 779 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 780 PetscTruth isascii,isdraw; 781 PetscViewer sviewer; 782 PetscViewerFormat format; 783 784 PetscFunctionBegin; 785 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 786 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 787 if (isascii) { 788 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 789 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 790 MatInfo info; 791 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 792 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 793 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 794 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 795 baij->bs,(int)info.memory);CHKERRQ(ierr); 796 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 797 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 798 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 799 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 800 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 801 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 802 PetscFunctionReturn(0); 803 } else if (format == PETSC_VIEWER_ASCII_INFO) { 804 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 } 808 809 if (isdraw) { 810 PetscDraw draw; 811 PetscTruth isnull; 812 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 813 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 814 } 815 816 if (size == 1) { 817 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 818 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 819 } else { 820 /* assemble the entire matrix onto first processor. */ 821 Mat A; 822 Mat_SeqSBAIJ *Aloc; 823 Mat_SeqBAIJ *Bloc; 824 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 825 MatScalar *a; 826 827 if (!rank) { 828 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 829 } else { 830 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 831 } 832 PetscLogObjectParent(mat,A); 833 834 /* copy over the A part */ 835 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 836 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 837 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 838 839 for (i=0; i<mbs; i++) { 840 rvals[0] = bs*(baij->rstart + i); 841 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 842 for (j=ai[i]; j<ai[i+1]; j++) { 843 col = (baij->cstart+aj[j])*bs; 844 for (k=0; k<bs; k++) { 845 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 846 col++; a += bs; 847 } 848 } 849 } 850 /* copy over the B part */ 851 Bloc = (Mat_SeqBAIJ*)baij->B->data; 852 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 853 for (i=0; i<mbs; i++) { 854 rvals[0] = bs*(baij->rstart + i); 855 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 856 for (j=ai[i]; j<ai[i+1]; j++) { 857 col = baij->garray[aj[j]]*bs; 858 for (k=0; k<bs; k++) { 859 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 860 col++; a += bs; 861 } 862 } 863 } 864 ierr = PetscFree(rvals);CHKERRQ(ierr); 865 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 866 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 867 /* 868 Everyone has to call to draw the matrix since the graphics waits are 869 synchronized across all processors that share the PetscDraw object 870 */ 871 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 872 if (!rank) { 873 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 874 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 875 } 876 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 877 ierr = MatDestroy(A);CHKERRQ(ierr); 878 } 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "MatView_MPISBAIJ" 884 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 885 { 886 int ierr; 887 PetscTruth isascii,isdraw,issocket,isbinary; 888 889 PetscFunctionBegin; 890 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 891 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 892 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 893 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 894 if (isascii || isdraw || issocket || isbinary) { 895 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 896 } else { 897 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 898 } 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatDestroy_MPISBAIJ" 904 int MatDestroy_MPISBAIJ(Mat mat) 905 { 906 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 907 int ierr; 908 909 PetscFunctionBegin; 910 #if defined(PETSC_USE_LOG) 911 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 912 #endif 913 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 914 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 915 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 916 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 917 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 918 #if defined (PETSC_USE_CTABLE) 919 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 920 #else 921 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 922 #endif 923 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 924 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 925 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 926 if (baij->slvec0) { 927 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 928 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 929 } 930 if (baij->slvec1) { 931 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 932 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 933 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 934 } 935 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 936 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 937 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 938 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 939 #if defined(PETSC_USE_MAT_SINGLE) 940 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 941 #endif 942 ierr = PetscFree(baij);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 #undef __FUNCT__ 947 #define __FUNCT__ "MatMult_MPISBAIJ" 948 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 949 { 950 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 951 int ierr,nt,mbs=a->mbs,bs=a->bs; 952 PetscScalar *x,*from,zero=0.0; 953 954 PetscFunctionBegin; 955 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 956 if (nt != A->n) { 957 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 958 } 959 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 960 if (nt != A->m) { 961 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 962 } 963 964 /* diagonal part */ 965 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 966 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 967 968 /* subdiagonal part */ 969 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 970 971 /* copy x into the vec slvec0 */ 972 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 973 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 974 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 975 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 976 977 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 978 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 979 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 980 981 /* supperdiagonal part */ 982 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 983 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 989 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 990 { 991 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 992 int ierr,nt; 993 994 PetscFunctionBegin; 995 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 996 if (nt != A->n) { 997 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 998 } 999 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1000 if (nt != A->m) { 1001 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1002 } 1003 1004 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1005 /* do diagonal part */ 1006 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1007 /* do supperdiagonal part */ 1008 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1009 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1010 /* do subdiagonal part */ 1011 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1012 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1013 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1014 1015 PetscFunctionReturn(0); 1016 } 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1020 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1021 { 1022 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1023 int ierr,mbs=a->mbs,bs=a->bs; 1024 PetscScalar *x,*from,zero=0.0; 1025 1026 PetscFunctionBegin; 1027 /* 1028 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1029 PetscSynchronizedFlush(A->comm); 1030 */ 1031 /* diagonal part */ 1032 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1033 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1034 1035 /* subdiagonal part */ 1036 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1037 1038 /* copy x into the vec slvec0 */ 1039 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1040 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1041 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1042 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1043 1044 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1045 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1046 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1047 1048 /* supperdiagonal part */ 1049 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1050 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1056 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1057 { 1058 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1059 int ierr; 1060 1061 PetscFunctionBegin; 1062 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1063 /* do diagonal part */ 1064 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1065 /* do supperdiagonal part */ 1066 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1067 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1068 1069 /* do subdiagonal part */ 1070 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1071 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1072 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1073 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1079 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1080 { 1081 int ierr; 1082 1083 PetscFunctionBegin; 1084 ierr = MatMult(A,xx,yy);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1090 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1091 { 1092 int ierr; 1093 1094 PetscFunctionBegin; 1095 ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /* 1100 This only works correctly for square matrices where the subblock A->A is the 1101 diagonal block 1102 */ 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1105 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1106 { 1107 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1108 int ierr; 1109 1110 PetscFunctionBegin; 1111 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1112 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatScale_MPISBAIJ" 1118 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1119 { 1120 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1121 int ierr; 1122 1123 PetscFunctionBegin; 1124 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1125 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1131 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1132 { 1133 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1134 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1135 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1136 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1137 int *cmap,*idx_p,cstart = mat->cstart; 1138 1139 PetscFunctionBegin; 1140 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1141 mat->getrowactive = PETSC_TRUE; 1142 1143 if (!mat->rowvalues && (idx || v)) { 1144 /* 1145 allocate enough space to hold information from the longest row. 1146 */ 1147 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1148 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1149 int max = 1,mbs = mat->mbs,tmp; 1150 for (i=0; i<mbs; i++) { 1151 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1152 if (max < tmp) { max = tmp; } 1153 } 1154 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1155 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1156 } 1157 1158 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1159 lrow = row - brstart; /* local row index */ 1160 1161 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1162 if (!v) {pvA = 0; pvB = 0;} 1163 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1164 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1165 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1166 nztot = nzA + nzB; 1167 1168 cmap = mat->garray; 1169 if (v || idx) { 1170 if (nztot) { 1171 /* Sort by increasing column numbers, assuming A and B already sorted */ 1172 int imark = -1; 1173 if (v) { 1174 *v = v_p = mat->rowvalues; 1175 for (i=0; i<nzB; i++) { 1176 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1177 else break; 1178 } 1179 imark = i; 1180 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1181 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1182 } 1183 if (idx) { 1184 *idx = idx_p = mat->rowindices; 1185 if (imark > -1) { 1186 for (i=0; i<imark; i++) { 1187 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1188 } 1189 } else { 1190 for (i=0; i<nzB; i++) { 1191 if (cmap[cworkB[i]/bs] < cstart) 1192 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1193 else break; 1194 } 1195 imark = i; 1196 } 1197 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1198 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1199 } 1200 } else { 1201 if (idx) *idx = 0; 1202 if (v) *v = 0; 1203 } 1204 } 1205 *nz = nztot; 1206 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1207 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1213 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1214 { 1215 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1216 1217 PetscFunctionBegin; 1218 if (baij->getrowactive == PETSC_FALSE) { 1219 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1220 } 1221 baij->getrowactive = PETSC_FALSE; 1222 PetscFunctionReturn(0); 1223 } 1224 1225 #undef __FUNCT__ 1226 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1227 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1228 { 1229 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1230 1231 PetscFunctionBegin; 1232 *bs = baij->bs; 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1238 int MatZeroEntries_MPISBAIJ(Mat A) 1239 { 1240 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1241 int ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1245 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1251 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1252 { 1253 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1254 Mat A = a->A,B = a->B; 1255 int ierr; 1256 PetscReal isend[5],irecv[5]; 1257 1258 PetscFunctionBegin; 1259 info->block_size = (PetscReal)a->bs; 1260 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1261 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1262 isend[3] = info->memory; isend[4] = info->mallocs; 1263 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1264 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1265 isend[3] += info->memory; isend[4] += info->mallocs; 1266 if (flag == MAT_LOCAL) { 1267 info->nz_used = isend[0]; 1268 info->nz_allocated = isend[1]; 1269 info->nz_unneeded = isend[2]; 1270 info->memory = isend[3]; 1271 info->mallocs = isend[4]; 1272 } else if (flag == MAT_GLOBAL_MAX) { 1273 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1274 info->nz_used = irecv[0]; 1275 info->nz_allocated = irecv[1]; 1276 info->nz_unneeded = irecv[2]; 1277 info->memory = irecv[3]; 1278 info->mallocs = irecv[4]; 1279 } else if (flag == MAT_GLOBAL_SUM) { 1280 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1281 info->nz_used = irecv[0]; 1282 info->nz_allocated = irecv[1]; 1283 info->nz_unneeded = irecv[2]; 1284 info->memory = irecv[3]; 1285 info->mallocs = irecv[4]; 1286 } else { 1287 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1288 } 1289 info->rows_global = (PetscReal)A->M; 1290 info->columns_global = (PetscReal)A->N; 1291 info->rows_local = (PetscReal)A->m; 1292 info->columns_local = (PetscReal)A->N; 1293 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1294 info->fill_ratio_needed = 0; 1295 info->factor_mallocs = 0; 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1301 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1302 { 1303 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1304 int ierr; 1305 1306 PetscFunctionBegin; 1307 switch (op) { 1308 case MAT_NO_NEW_NONZERO_LOCATIONS: 1309 case MAT_YES_NEW_NONZERO_LOCATIONS: 1310 case MAT_COLUMNS_UNSORTED: 1311 case MAT_COLUMNS_SORTED: 1312 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1313 case MAT_KEEP_ZEROED_ROWS: 1314 case MAT_NEW_NONZERO_LOCATION_ERR: 1315 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1316 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1317 break; 1318 case MAT_ROW_ORIENTED: 1319 a->roworiented = PETSC_TRUE; 1320 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1321 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1322 break; 1323 case MAT_ROWS_SORTED: 1324 case MAT_ROWS_UNSORTED: 1325 case MAT_YES_NEW_DIAGONALS: 1326 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1327 break; 1328 case MAT_COLUMN_ORIENTED: 1329 a->roworiented = PETSC_FALSE; 1330 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1331 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1332 break; 1333 case MAT_IGNORE_OFF_PROC_ENTRIES: 1334 a->donotstash = PETSC_TRUE; 1335 break; 1336 case MAT_NO_NEW_DIAGONALS: 1337 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1338 case MAT_USE_HASH_TABLE: 1339 a->ht_flag = PETSC_TRUE; 1340 break; 1341 case MAT_NOT_SYMMETRIC: 1342 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1343 case MAT_HERMITIAN: 1344 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1345 case MAT_SYMMETRIC: 1346 case MAT_STRUCTURALLY_SYMMETRIC: 1347 case MAT_NOT_HERMITIAN: 1348 case MAT_SYMMETRY_ETERNAL: 1349 case MAT_NOT_SYMMETRY_ETERNAL: 1350 break; 1351 default: 1352 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1353 } 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1359 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1360 { 1361 int ierr; 1362 PetscFunctionBegin; 1363 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1369 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1370 { 1371 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1372 Mat a = baij->A,b = baij->B; 1373 int ierr,s1,s2,s3; 1374 1375 PetscFunctionBegin; 1376 if (ll != rr) { 1377 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1378 } 1379 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1380 if (rr) { 1381 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1382 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1383 /* Overlap communication with computation. */ 1384 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1385 /*} if (ll) { */ 1386 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1387 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1388 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1389 /* } */ 1390 /* scale the diagonal block */ 1391 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1392 1393 /* if (rr) { */ 1394 /* Do a scatter end and then right scale the off-diagonal block */ 1395 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1396 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1397 } 1398 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1404 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag) 1405 { 1406 PetscFunctionBegin; 1407 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1412 int MatPrintHelp_MPISBAIJ(Mat A) 1413 { 1414 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1415 MPI_Comm comm = A->comm; 1416 static int called = 0; 1417 int ierr; 1418 1419 PetscFunctionBegin; 1420 if (!a->rank) { 1421 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1422 } 1423 if (called) {PetscFunctionReturn(0);} else called = 1; 1424 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1425 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1431 int MatSetUnfactored_MPISBAIJ(Mat A) 1432 { 1433 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1434 int ierr; 1435 1436 PetscFunctionBegin; 1437 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1442 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatEqual_MPISBAIJ" 1445 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1446 { 1447 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1448 Mat a,b,c,d; 1449 PetscTruth flg; 1450 int ierr; 1451 1452 PetscFunctionBegin; 1453 a = matA->A; b = matA->B; 1454 c = matB->A; d = matB->B; 1455 1456 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1457 if (flg == PETSC_TRUE) { 1458 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1459 } 1460 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1466 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1467 { 1468 int ierr; 1469 1470 PetscFunctionBegin; 1471 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1477 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1478 { 1479 int i,ierr; 1480 PetscTruth flg; 1481 1482 for (i=0; i<n; i++) { 1483 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1484 if (!flg) { 1485 SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1486 } 1487 } 1488 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1489 PetscFunctionReturn(0); 1490 } 1491 1492 1493 /* -------------------------------------------------------------------*/ 1494 static struct _MatOps MatOps_Values = { 1495 MatSetValues_MPISBAIJ, 1496 MatGetRow_MPISBAIJ, 1497 MatRestoreRow_MPISBAIJ, 1498 MatMult_MPISBAIJ, 1499 /* 4*/ MatMultAdd_MPISBAIJ, 1500 MatMultTranspose_MPISBAIJ, 1501 MatMultTransposeAdd_MPISBAIJ, 1502 0, 1503 0, 1504 0, 1505 /*10*/ 0, 1506 0, 1507 0, 1508 MatRelax_MPISBAIJ, 1509 MatTranspose_MPISBAIJ, 1510 /*15*/ MatGetInfo_MPISBAIJ, 1511 MatEqual_MPISBAIJ, 1512 MatGetDiagonal_MPISBAIJ, 1513 MatDiagonalScale_MPISBAIJ, 1514 MatNorm_MPISBAIJ, 1515 /*20*/ MatAssemblyBegin_MPISBAIJ, 1516 MatAssemblyEnd_MPISBAIJ, 1517 0, 1518 MatSetOption_MPISBAIJ, 1519 MatZeroEntries_MPISBAIJ, 1520 /*25*/ MatZeroRows_MPISBAIJ, 1521 0, 1522 0, 1523 0, 1524 0, 1525 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1526 0, 1527 0, 1528 0, 1529 0, 1530 /*35*/ MatDuplicate_MPISBAIJ, 1531 0, 1532 0, 1533 0, 1534 0, 1535 /*40*/ 0, 1536 MatGetSubMatrices_MPISBAIJ, 1537 0, 1538 MatGetValues_MPISBAIJ, 1539 0, 1540 /*45*/ MatPrintHelp_MPISBAIJ, 1541 MatScale_MPISBAIJ, 1542 0, 1543 0, 1544 0, 1545 /*50*/ MatGetBlockSize_MPISBAIJ, 1546 0, 1547 0, 1548 0, 1549 0, 1550 /*55*/ 0, 1551 0, 1552 MatSetUnfactored_MPISBAIJ, 1553 0, 1554 MatSetValuesBlocked_MPISBAIJ, 1555 /*60*/ 0, 1556 0, 1557 0, 1558 MatGetPetscMaps_Petsc, 1559 0, 1560 /*65*/ 0, 1561 0, 1562 0, 1563 0, 1564 0, 1565 /*70*/ MatGetRowMax_MPISBAIJ, 1566 0, 1567 0, 1568 0, 1569 0, 1570 /*75*/ 0, 1571 0, 1572 0, 1573 0, 1574 0, 1575 /*80*/ 0, 1576 0, 1577 0, 1578 0, 1579 /*85*/ MatLoad_MPISBAIJ 1580 }; 1581 1582 1583 EXTERN_C_BEGIN 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1586 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1587 { 1588 PetscFunctionBegin; 1589 *a = ((Mat_MPISBAIJ *)A->data)->A; 1590 *iscopy = PETSC_FALSE; 1591 PetscFunctionReturn(0); 1592 } 1593 EXTERN_C_END 1594 1595 EXTERN_C_BEGIN 1596 #undef __FUNCT__ 1597 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1598 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1599 { 1600 Mat_MPISBAIJ *b; 1601 int ierr,i,mbs,Mbs; 1602 1603 PetscFunctionBegin; 1604 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1605 1606 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1607 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1608 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1609 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1610 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1611 if (d_nnz) { 1612 for (i=0; i<B->m/bs; i++) { 1613 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1614 } 1615 } 1616 if (o_nnz) { 1617 for (i=0; i<B->m/bs; i++) { 1618 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1619 } 1620 } 1621 B->preallocated = PETSC_TRUE; 1622 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1623 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1624 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1625 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1626 1627 b = (Mat_MPISBAIJ*)B->data; 1628 mbs = B->m/bs; 1629 Mbs = B->M/bs; 1630 if (mbs*bs != B->m) { 1631 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1632 } 1633 1634 b->bs = bs; 1635 b->bs2 = bs*bs; 1636 b->mbs = mbs; 1637 b->nbs = mbs; 1638 b->Mbs = Mbs; 1639 b->Nbs = Mbs; 1640 1641 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1642 b->rowners[0] = 0; 1643 for (i=2; i<=b->size; i++) { 1644 b->rowners[i] += b->rowners[i-1]; 1645 } 1646 b->rstart = b->rowners[b->rank]; 1647 b->rend = b->rowners[b->rank+1]; 1648 b->cstart = b->rstart; 1649 b->cend = b->rend; 1650 for (i=0; i<=b->size; i++) { 1651 b->rowners_bs[i] = b->rowners[i]*bs; 1652 } 1653 b->rstart_bs = b-> rstart*bs; 1654 b->rend_bs = b->rend*bs; 1655 1656 b->cstart_bs = b->cstart*bs; 1657 b->cend_bs = b->cend*bs; 1658 1659 1660 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1661 PetscLogObjectParent(B,b->A); 1662 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1663 PetscLogObjectParent(B,b->B); 1664 1665 /* build cache for off array entries formed */ 1666 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1667 1668 PetscFunctionReturn(0); 1669 } 1670 EXTERN_C_END 1671 1672 /*MC 1673 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1674 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1675 1676 Options Database Keys: 1677 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1678 1679 Level: beginner 1680 1681 .seealso: MatCreateMPISBAIJ 1682 M*/ 1683 1684 EXTERN_C_BEGIN 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatCreate_MPISBAIJ" 1687 int MatCreate_MPISBAIJ(Mat B) 1688 { 1689 Mat_MPISBAIJ *b; 1690 int ierr; 1691 PetscTruth flg; 1692 1693 PetscFunctionBegin; 1694 1695 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1696 B->data = (void*)b; 1697 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1698 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1699 1700 B->ops->destroy = MatDestroy_MPISBAIJ; 1701 B->ops->view = MatView_MPISBAIJ; 1702 B->mapping = 0; 1703 B->factor = 0; 1704 B->assembled = PETSC_FALSE; 1705 1706 B->insertmode = NOT_SET_VALUES; 1707 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1708 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1709 1710 /* build local table of row and column ownerships */ 1711 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1712 b->cowners = b->rowners + b->size + 2; 1713 b->rowners_bs = b->cowners + b->size + 2; 1714 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1715 1716 /* build cache for off array entries formed */ 1717 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1718 b->donotstash = PETSC_FALSE; 1719 b->colmap = PETSC_NULL; 1720 b->garray = PETSC_NULL; 1721 b->roworiented = PETSC_TRUE; 1722 1723 #if defined(PETSC_USE_MAT_SINGLE) 1724 /* stuff for MatSetValues_XXX in single precision */ 1725 b->setvalueslen = 0; 1726 b->setvaluescopy = PETSC_NULL; 1727 #endif 1728 1729 /* stuff used in block assembly */ 1730 b->barray = 0; 1731 1732 /* stuff used for matrix vector multiply */ 1733 b->lvec = 0; 1734 b->Mvctx = 0; 1735 b->slvec0 = 0; 1736 b->slvec0b = 0; 1737 b->slvec1 = 0; 1738 b->slvec1a = 0; 1739 b->slvec1b = 0; 1740 b->sMvctx = 0; 1741 1742 /* stuff for MatGetRow() */ 1743 b->rowindices = 0; 1744 b->rowvalues = 0; 1745 b->getrowactive = PETSC_FALSE; 1746 1747 /* hash table stuff */ 1748 b->ht = 0; 1749 b->hd = 0; 1750 b->ht_size = 0; 1751 b->ht_flag = PETSC_FALSE; 1752 b->ht_fact = 0; 1753 b->ht_total_ct = 0; 1754 b->ht_insert_ct = 0; 1755 1756 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1757 if (flg) { 1758 PetscReal fact = 1.39; 1759 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1760 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1761 if (fact <= 1.0) fact = 1.39; 1762 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1763 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1764 } 1765 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1766 "MatStoreValues_MPISBAIJ", 1767 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1768 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1769 "MatRetrieveValues_MPISBAIJ", 1770 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1771 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1772 "MatGetDiagonalBlock_MPISBAIJ", 1773 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1774 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1775 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1776 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 EXTERN_C_END 1780 1781 /*MC 1782 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1783 1784 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1785 and MATMPISBAIJ otherwise. 1786 1787 Options Database Keys: 1788 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1789 1790 Level: beginner 1791 1792 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1793 M*/ 1794 1795 EXTERN_C_BEGIN 1796 #undef __FUNCT__ 1797 #define __FUNCT__ "MatCreate_SBAIJ" 1798 int MatCreate_SBAIJ(Mat A) { 1799 int ierr,size; 1800 1801 PetscFunctionBegin; 1802 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1803 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1804 if (size == 1) { 1805 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1806 } else { 1807 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1808 } 1809 PetscFunctionReturn(0); 1810 } 1811 EXTERN_C_END 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1815 /*@C 1816 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1817 the user should preallocate the matrix storage by setting the parameters 1818 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1819 performance can be increased by more than a factor of 50. 1820 1821 Collective on Mat 1822 1823 Input Parameters: 1824 + A - the matrix 1825 . bs - size of blockk 1826 . d_nz - number of block nonzeros per block row in diagonal portion of local 1827 submatrix (same for all local rows) 1828 . d_nnz - array containing the number of block nonzeros in the various block rows 1829 in the upper triangular and diagonal part of the in diagonal portion of the local 1830 (possibly different for each block row) or PETSC_NULL. You must leave room 1831 for the diagonal entry even if it is zero. 1832 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1833 submatrix (same for all local rows). 1834 - o_nnz - array containing the number of nonzeros in the various block rows of the 1835 off-diagonal portion of the local submatrix (possibly different for 1836 each block row) or PETSC_NULL. 1837 1838 1839 Options Database Keys: 1840 . -mat_no_unroll - uses code that does not unroll the loops in the 1841 block calculations (much slower) 1842 . -mat_block_size - size of the blocks to use 1843 1844 Notes: 1845 1846 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1847 than it must be used on all processors that share the object for that argument. 1848 1849 Storage Information: 1850 For a square global matrix we define each processor's diagonal portion 1851 to be its local rows and the corresponding columns (a square submatrix); 1852 each processor's off-diagonal portion encompasses the remainder of the 1853 local matrix (a rectangular submatrix). 1854 1855 The user can specify preallocated storage for the diagonal part of 1856 the local submatrix with either d_nz or d_nnz (not both). Set 1857 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1858 memory allocation. Likewise, specify preallocated storage for the 1859 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1860 1861 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1862 the figure below we depict these three local rows and all columns (0-11). 1863 1864 .vb 1865 0 1 2 3 4 5 6 7 8 9 10 11 1866 ------------------- 1867 row 3 | o o o d d d o o o o o o 1868 row 4 | o o o d d d o o o o o o 1869 row 5 | o o o d d d o o o o o o 1870 ------------------- 1871 .ve 1872 1873 Thus, any entries in the d locations are stored in the d (diagonal) 1874 submatrix, and any entries in the o locations are stored in the 1875 o (off-diagonal) submatrix. Note that the d matrix is stored in 1876 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1877 1878 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1879 plus the diagonal part of the d matrix, 1880 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1881 In general, for PDE problems in which most nonzeros are near the diagonal, 1882 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1883 or you will get TERRIBLE performance; see the users' manual chapter on 1884 matrices. 1885 1886 Level: intermediate 1887 1888 .keywords: matrix, block, aij, compressed row, sparse, parallel 1889 1890 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1891 @*/ 1892 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1893 { 1894 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 1895 1896 PetscFunctionBegin; 1897 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1898 if (f) { 1899 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1900 } 1901 PetscFunctionReturn(0); 1902 } 1903 1904 #undef __FUNCT__ 1905 #define __FUNCT__ "MatCreateMPISBAIJ" 1906 /*@C 1907 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1908 (block compressed row). For good matrix assembly performance 1909 the user should preallocate the matrix storage by setting the parameters 1910 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1911 performance can be increased by more than a factor of 50. 1912 1913 Collective on MPI_Comm 1914 1915 Input Parameters: 1916 + comm - MPI communicator 1917 . bs - size of blockk 1918 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1919 This value should be the same as the local size used in creating the 1920 y vector for the matrix-vector product y = Ax. 1921 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1922 This value should be the same as the local size used in creating the 1923 x vector for the matrix-vector product y = Ax. 1924 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1925 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1926 . d_nz - number of block nonzeros per block row in diagonal portion of local 1927 submatrix (same for all local rows) 1928 . d_nnz - array containing the number of block nonzeros in the various block rows 1929 in the upper triangular portion of the in diagonal portion of the local 1930 (possibly different for each block block row) or PETSC_NULL. 1931 You must leave room for the diagonal entry even if it is zero. 1932 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1933 submatrix (same for all local rows). 1934 - o_nnz - array containing the number of nonzeros in the various block rows of the 1935 off-diagonal portion of the local submatrix (possibly different for 1936 each block row) or PETSC_NULL. 1937 1938 Output Parameter: 1939 . A - the matrix 1940 1941 Options Database Keys: 1942 . -mat_no_unroll - uses code that does not unroll the loops in the 1943 block calculations (much slower) 1944 . -mat_block_size - size of the blocks to use 1945 . -mat_mpi - use the parallel matrix data structures even on one processor 1946 (defaults to using SeqBAIJ format on one processor) 1947 1948 Notes: 1949 The user MUST specify either the local or global matrix dimensions 1950 (possibly both). 1951 1952 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1953 than it must be used on all processors that share the object for that argument. 1954 1955 Storage Information: 1956 For a square global matrix we define each processor's diagonal portion 1957 to be its local rows and the corresponding columns (a square submatrix); 1958 each processor's off-diagonal portion encompasses the remainder of the 1959 local matrix (a rectangular submatrix). 1960 1961 The user can specify preallocated storage for the diagonal part of 1962 the local submatrix with either d_nz or d_nnz (not both). Set 1963 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1964 memory allocation. Likewise, specify preallocated storage for the 1965 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1966 1967 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1968 the figure below we depict these three local rows and all columns (0-11). 1969 1970 .vb 1971 0 1 2 3 4 5 6 7 8 9 10 11 1972 ------------------- 1973 row 3 | o o o d d d o o o o o o 1974 row 4 | o o o d d d o o o o o o 1975 row 5 | o o o d d d o o o o o o 1976 ------------------- 1977 .ve 1978 1979 Thus, any entries in the d locations are stored in the d (diagonal) 1980 submatrix, and any entries in the o locations are stored in the 1981 o (off-diagonal) submatrix. Note that the d matrix is stored in 1982 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1983 1984 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1985 plus the diagonal part of the d matrix, 1986 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1987 In general, for PDE problems in which most nonzeros are near the diagonal, 1988 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1989 or you will get TERRIBLE performance; see the users' manual chapter on 1990 matrices. 1991 1992 Level: intermediate 1993 1994 .keywords: matrix, block, aij, compressed row, sparse, parallel 1995 1996 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1997 @*/ 1998 1999 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2000 { 2001 int ierr,size; 2002 2003 PetscFunctionBegin; 2004 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2005 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2006 if (size > 1) { 2007 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2008 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2009 } else { 2010 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2011 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 2017 #undef __FUNCT__ 2018 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2019 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2020 { 2021 Mat mat; 2022 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2023 int ierr,len=0; 2024 2025 PetscFunctionBegin; 2026 *newmat = 0; 2027 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2028 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 2029 /* ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); -- cause error? */ 2030 mat->factor = matin->factor; 2031 mat->preallocated = PETSC_TRUE; 2032 mat->assembled = PETSC_TRUE; 2033 a = (Mat_MPISBAIJ*)mat->data; 2034 a->bs = oldmat->bs; 2035 a->bs2 = oldmat->bs2; 2036 a->mbs = oldmat->mbs; 2037 a->nbs = oldmat->nbs; 2038 a->Mbs = oldmat->Mbs; 2039 a->Nbs = oldmat->Nbs; 2040 2041 a->rstart = oldmat->rstart; 2042 a->rend = oldmat->rend; 2043 a->cstart = oldmat->cstart; 2044 a->cend = oldmat->cend; 2045 a->size = oldmat->size; 2046 a->rank = oldmat->rank; 2047 a->donotstash = oldmat->donotstash; 2048 a->roworiented = oldmat->roworiented; 2049 a->rowindices = 0; 2050 a->rowvalues = 0; 2051 a->getrowactive = PETSC_FALSE; 2052 a->barray = 0; 2053 a->rstart_bs = oldmat->rstart_bs; 2054 a->rend_bs = oldmat->rend_bs; 2055 a->cstart_bs = oldmat->cstart_bs; 2056 a->cend_bs = oldmat->cend_bs; 2057 2058 /* hash table stuff */ 2059 a->ht = 0; 2060 a->hd = 0; 2061 a->ht_size = 0; 2062 a->ht_flag = oldmat->ht_flag; 2063 a->ht_fact = oldmat->ht_fact; 2064 a->ht_total_ct = 0; 2065 a->ht_insert_ct = 0; 2066 2067 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 2068 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2069 a->cowners = a->rowners + a->size + 2; 2070 a->rowners_bs = a->cowners + a->size + 2; 2071 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2072 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2073 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2074 if (oldmat->colmap) { 2075 #if defined (PETSC_USE_CTABLE) 2076 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2077 #else 2078 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2079 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2080 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2081 #endif 2082 } else a->colmap = 0; 2083 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2084 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2085 PetscLogObjectMemory(mat,len*sizeof(int)); 2086 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2087 } else a->garray = 0; 2088 2089 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2090 PetscLogObjectParent(mat,a->lvec); 2091 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2092 PetscLogObjectParent(mat,a->Mvctx); 2093 2094 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2095 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2096 ierr = VecDuplicate(oldmat->slvec0b,&a->slvec0b);CHKERRQ(ierr); 2097 ierr = VecDuplicate(oldmat->slvec1a,&a->slvec1a);CHKERRQ(ierr); 2098 ierr = VecDuplicate(oldmat->slvec1b,&a->slvec1b);CHKERRQ(ierr); 2099 ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx);CHKERRQ(ierr); 2100 2101 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2102 PetscLogObjectParent(mat,a->A); 2103 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2104 PetscLogObjectParent(mat,a->B); 2105 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2106 *newmat = mat; 2107 PetscFunctionReturn(0); 2108 } 2109 2110 #include "petscsys.h" 2111 2112 #undef __FUNCT__ 2113 #define __FUNCT__ "MatLoad_MPISBAIJ" 2114 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2115 { 2116 Mat A; 2117 int i,nz,ierr,j,rstart,rend,fd; 2118 PetscScalar *vals,*buf; 2119 MPI_Comm comm = ((PetscObject)viewer)->comm; 2120 MPI_Status status; 2121 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2122 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2123 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2124 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2125 int dcount,kmax,k,nzcount,tmp; 2126 2127 PetscFunctionBegin; 2128 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2129 2130 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2131 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2132 if (!rank) { 2133 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2134 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2135 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2136 if (header[3] < 0) { 2137 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2138 } 2139 } 2140 2141 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2142 M = header[1]; N = header[2]; 2143 2144 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2145 2146 /* 2147 This code adds extra rows to make sure the number of rows is 2148 divisible by the blocksize 2149 */ 2150 Mbs = M/bs; 2151 extra_rows = bs - M + bs*(Mbs); 2152 if (extra_rows == bs) extra_rows = 0; 2153 else Mbs++; 2154 if (extra_rows &&!rank) { 2155 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2156 } 2157 2158 /* determine ownership of all rows */ 2159 mbs = Mbs/size + ((Mbs % size) > rank); 2160 m = mbs*bs; 2161 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2162 browners = rowners + size + 1; 2163 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2164 rowners[0] = 0; 2165 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2166 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2167 rstart = rowners[rank]; 2168 rend = rowners[rank+1]; 2169 2170 /* distribute row lengths to all processors */ 2171 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2172 if (!rank) { 2173 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2174 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2175 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2176 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2177 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2178 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2179 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2180 } else { 2181 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2182 } 2183 2184 if (!rank) { /* procs[0] */ 2185 /* calculate the number of nonzeros on each processor */ 2186 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2187 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2188 for (i=0; i<size; i++) { 2189 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2190 procsnz[i] += rowlengths[j]; 2191 } 2192 } 2193 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2194 2195 /* determine max buffer needed and allocate it */ 2196 maxnz = 0; 2197 for (i=0; i<size; i++) { 2198 maxnz = PetscMax(maxnz,procsnz[i]); 2199 } 2200 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2201 2202 /* read in my part of the matrix column indices */ 2203 nz = procsnz[0]; 2204 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2205 mycols = ibuf; 2206 if (size == 1) nz -= extra_rows; 2207 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2208 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2209 2210 /* read in every ones (except the last) and ship off */ 2211 for (i=1; i<size-1; i++) { 2212 nz = procsnz[i]; 2213 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2214 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2215 } 2216 /* read in the stuff for the last proc */ 2217 if (size != 1) { 2218 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2219 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2220 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2221 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2222 } 2223 ierr = PetscFree(cols);CHKERRQ(ierr); 2224 } else { /* procs[i], i>0 */ 2225 /* determine buffer space needed for message */ 2226 nz = 0; 2227 for (i=0; i<m; i++) { 2228 nz += locrowlens[i]; 2229 } 2230 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2231 mycols = ibuf; 2232 /* receive message of column indices*/ 2233 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2234 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2235 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2236 } 2237 2238 /* loop over local rows, determining number of off diagonal entries */ 2239 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2240 odlens = dlens + (rend-rstart); 2241 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2242 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2243 masked1 = mask + Mbs; 2244 masked2 = masked1 + Mbs; 2245 rowcount = 0; nzcount = 0; 2246 for (i=0; i<mbs; i++) { 2247 dcount = 0; 2248 odcount = 0; 2249 for (j=0; j<bs; j++) { 2250 kmax = locrowlens[rowcount]; 2251 for (k=0; k<kmax; k++) { 2252 tmp = mycols[nzcount++]/bs; /* block col. index */ 2253 if (!mask[tmp]) { 2254 mask[tmp] = 1; 2255 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2256 else masked1[dcount++] = tmp; /* entry in diag portion */ 2257 } 2258 } 2259 rowcount++; 2260 } 2261 2262 dlens[i] = dcount; /* d_nzz[i] */ 2263 odlens[i] = odcount; /* o_nzz[i] */ 2264 2265 /* zero out the mask elements we set */ 2266 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2267 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2268 } 2269 2270 /* create our matrix */ 2271 ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr); 2272 ierr = MatSetType(A,type);CHKERRQ(ierr); 2273 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2274 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2275 2276 if (!rank) { 2277 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2278 /* read in my part of the matrix numerical values */ 2279 nz = procsnz[0]; 2280 vals = buf; 2281 mycols = ibuf; 2282 if (size == 1) nz -= extra_rows; 2283 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2284 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2285 2286 /* insert into matrix */ 2287 jj = rstart*bs; 2288 for (i=0; i<m; i++) { 2289 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2290 mycols += locrowlens[i]; 2291 vals += locrowlens[i]; 2292 jj++; 2293 } 2294 2295 /* read in other processors (except the last one) and ship out */ 2296 for (i=1; i<size-1; i++) { 2297 nz = procsnz[i]; 2298 vals = buf; 2299 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2300 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2301 } 2302 /* the last proc */ 2303 if (size != 1){ 2304 nz = procsnz[i] - extra_rows; 2305 vals = buf; 2306 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2307 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2308 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2309 } 2310 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2311 2312 } else { 2313 /* receive numeric values */ 2314 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2315 2316 /* receive message of values*/ 2317 vals = buf; 2318 mycols = ibuf; 2319 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2320 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2321 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2322 2323 /* insert into matrix */ 2324 jj = rstart*bs; 2325 for (i=0; i<m; i++) { 2326 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2327 mycols += locrowlens[i]; 2328 vals += locrowlens[i]; 2329 jj++; 2330 } 2331 } 2332 2333 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2334 ierr = PetscFree(buf);CHKERRQ(ierr); 2335 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2336 ierr = PetscFree(rowners);CHKERRQ(ierr); 2337 ierr = PetscFree(dlens);CHKERRQ(ierr); 2338 ierr = PetscFree(mask);CHKERRQ(ierr); 2339 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2340 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2341 *newmat = A; 2342 PetscFunctionReturn(0); 2343 } 2344 2345 #undef __FUNCT__ 2346 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2347 /*@ 2348 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2349 2350 Input Parameters: 2351 . mat - the matrix 2352 . fact - factor 2353 2354 Collective on Mat 2355 2356 Level: advanced 2357 2358 Notes: 2359 This can also be set by the command line option: -mat_use_hash_table fact 2360 2361 .keywords: matrix, hashtable, factor, HT 2362 2363 .seealso: MatSetOption() 2364 @*/ 2365 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2366 { 2367 PetscFunctionBegin; 2368 SETERRQ(1,"Function not yet written for SBAIJ format"); 2369 /* PetscFunctionReturn(0); */ 2370 } 2371 2372 #undef __FUNCT__ 2373 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2374 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2375 { 2376 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2377 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2378 PetscReal atmp; 2379 PetscReal *work,*svalues,*rvalues; 2380 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2381 int rank,size,*rowners_bs,dest,count,source; 2382 PetscScalar *va; 2383 MatScalar *ba; 2384 MPI_Status stat; 2385 2386 PetscFunctionBegin; 2387 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2388 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2389 2390 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2391 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2392 2393 bs = a->bs; 2394 mbs = a->mbs; 2395 Mbs = a->Mbs; 2396 ba = b->a; 2397 bi = b->i; 2398 bj = b->j; 2399 /* 2400 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2401 PetscSynchronizedFlush(A->comm); 2402 */ 2403 2404 /* find ownerships */ 2405 rowners_bs = a->rowners_bs; 2406 /* 2407 if (!rank){ 2408 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2409 } 2410 */ 2411 2412 /* each proc creates an array to be distributed */ 2413 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2414 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2415 2416 /* row_max for B */ 2417 if (rank != size-1){ 2418 for (i=0; i<mbs; i++) { 2419 ncols = bi[1] - bi[0]; bi++; 2420 brow = bs*i; 2421 for (j=0; j<ncols; j++){ 2422 bcol = bs*(*bj); 2423 for (kcol=0; kcol<bs; kcol++){ 2424 col = bcol + kcol; /* local col index */ 2425 col += rowners_bs[rank+1]; /* global col index */ 2426 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2427 for (krow=0; krow<bs; krow++){ 2428 atmp = PetscAbsScalar(*ba); ba++; 2429 row = brow + krow; /* local row index */ 2430 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2431 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2432 if (work[col] < atmp) work[col] = atmp; 2433 } 2434 } 2435 bj++; 2436 } 2437 } 2438 /* 2439 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2440 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2441 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2442 */ 2443 2444 /* send values to its owners */ 2445 for (dest=rank+1; dest<size; dest++){ 2446 svalues = work + rowners_bs[dest]; 2447 count = rowners_bs[dest+1]-rowners_bs[dest]; 2448 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2449 /* 2450 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2451 PetscSynchronizedFlush(A->comm); 2452 */ 2453 } 2454 } 2455 2456 /* receive values */ 2457 if (rank){ 2458 rvalues = work; 2459 count = rowners_bs[rank+1]-rowners_bs[rank]; 2460 for (source=0; source<rank; source++){ 2461 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2462 /* process values */ 2463 for (i=0; i<count; i++){ 2464 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2465 } 2466 /* 2467 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2468 PetscSynchronizedFlush(A->comm); 2469 */ 2470 } 2471 } 2472 2473 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2474 ierr = PetscFree(work);CHKERRQ(ierr); 2475 PetscFunctionReturn(0); 2476 } 2477 2478 #undef __FUNCT__ 2479 #define __FUNCT__ "MatRelax_MPISBAIJ" 2480 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2481 { 2482 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2483 int ierr,mbs=mat->mbs,bs=mat->bs; 2484 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2485 Vec bb1; 2486 2487 PetscFunctionBegin; 2488 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2489 if (bs > 1) 2490 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2491 2492 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2493 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2494 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2495 its--; 2496 } 2497 2498 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2499 while (its--){ 2500 2501 /* lower triangular part: slvec0b = - B^T*xx */ 2502 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2503 2504 /* copy xx into slvec0a */ 2505 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2506 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2507 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2508 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2509 2510 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2511 2512 /* copy bb into slvec1a */ 2513 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2514 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2515 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2516 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2517 2518 /* set slvec1b = 0 */ 2519 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2520 2521 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2522 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2523 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2524 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2525 2526 /* upper triangular part: bb1 = bb1 - B*x */ 2527 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2528 2529 /* local diagonal sweep */ 2530 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2531 } 2532 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2533 } else { 2534 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2535 } 2536 PetscFunctionReturn(0); 2537 } 2538 2539 #undef __FUNCT__ 2540 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2541 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2542 { 2543 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2544 int ierr; 2545 PetscScalar mone=-1.0; 2546 Vec lvec1,bb1; 2547 2548 PetscFunctionBegin; 2549 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2550 if (mat->bs > 1) 2551 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2552 2553 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2554 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2555 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2556 its--; 2557 } 2558 2559 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2560 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2561 while (its--){ 2562 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2563 2564 /* lower diagonal part: bb1 = bb - B^T*xx */ 2565 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2566 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2567 2568 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2569 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2570 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2571 2572 /* upper diagonal part: bb1 = bb1 - B*x */ 2573 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2574 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2575 2576 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2577 2578 /* diagonal sweep */ 2579 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2580 } 2581 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2582 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2583 } else { 2584 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2585 } 2586 PetscFunctionReturn(0); 2587 } 2588 2589