1 2 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 3 #include "mpisbaij.h" 4 #include "src/mat/impls/sbaij/seq/sbaij.h" 5 6 extern int MatSetUpMultiply_MPISBAIJ(Mat); 7 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 8 extern int DisAssemble_MPISBAIJ(Mat); 9 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS[],int); 10 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 11 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 12 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatPrintHelp_SeqSBAIJ(Mat); 18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 19 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 20 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 21 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 22 23 /* UGLY, ugly, ugly 24 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 25 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 26 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 27 converts the entries into single precision and then calls ..._MatScalar() to put them 28 into the single precision data structures. 29 */ 30 #if defined(PETSC_USE_MAT_SINGLE) 31 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 32 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 33 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 34 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 35 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 36 #else 37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 38 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 40 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 42 #endif 43 44 EXTERN_C_BEGIN 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 47 int MatStoreValues_MPISBAIJ(Mat mat) 48 { 49 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 50 int ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 EXTERN_C_END 58 59 EXTERN_C_BEGIN 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 62 int MatRetrieveValues_MPISBAIJ(Mat mat) 63 { 64 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 65 int ierr; 66 67 PetscFunctionBegin; 68 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 69 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 EXTERN_C_END 73 74 75 #define CHUNKSIZE 10 76 77 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 78 { \ 79 \ 80 brow = row/bs; \ 81 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 82 rmax = aimax[brow]; nrow = ailen[brow]; \ 83 bcol = col/bs; \ 84 ridx = row % bs; cidx = col % bs; \ 85 low = 0; high = nrow; \ 86 while (high-low > 3) { \ 87 t = (low+high)/2; \ 88 if (rp[t] > bcol) high = t; \ 89 else low = t; \ 90 } \ 91 for (_i=low; _i<high; _i++) { \ 92 if (rp[_i] > bcol) break; \ 93 if (rp[_i] == bcol) { \ 94 bap = ap + bs2*_i + bs*cidx + ridx; \ 95 if (addv == ADD_VALUES) *bap += value; \ 96 else *bap = value; \ 97 goto a_noinsert; \ 98 } \ 99 } \ 100 if (a->nonew == 1) goto a_noinsert; \ 101 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 102 if (nrow >= rmax) { \ 103 /* there is no extra room in row, therefore enlarge */ \ 104 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 105 MatScalar *new_a; \ 106 \ 107 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 108 \ 109 /* malloc new storage space */ \ 110 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 111 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 112 new_j = (int*)(new_a + bs2*new_nz); \ 113 new_i = new_j + new_nz; \ 114 \ 115 /* copy over old data into new slots */ \ 116 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 117 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 118 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 119 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 120 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 121 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 122 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 123 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 124 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 125 /* free up old matrix storage */ \ 126 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 127 if (!a->singlemalloc) { \ 128 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 129 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 130 } \ 131 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 132 a->singlemalloc = PETSC_TRUE; \ 133 \ 134 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 135 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 136 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 137 a->maxnz += bs2*CHUNKSIZE; \ 138 a->reallocs++; \ 139 a->nz++; \ 140 } \ 141 N = nrow++ - 1; \ 142 /* shift up all the later entries in this row */ \ 143 for (ii=N; ii>=_i; ii--) { \ 144 rp[ii+1] = rp[ii]; \ 145 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 146 } \ 147 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 148 rp[_i] = bcol; \ 149 ap[bs2*_i + bs*cidx + ridx] = value; \ 150 a_noinsert:; \ 151 ailen[brow] = nrow; \ 152 } 153 #ifndef MatSetValues_SeqBAIJ_B_Private 154 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 155 { \ 156 brow = row/bs; \ 157 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 158 rmax = bimax[brow]; nrow = bilen[brow]; \ 159 bcol = col/bs; \ 160 ridx = row % bs; cidx = col % bs; \ 161 low = 0; high = nrow; \ 162 while (high-low > 3) { \ 163 t = (low+high)/2; \ 164 if (rp[t] > bcol) high = t; \ 165 else low = t; \ 166 } \ 167 for (_i=low; _i<high; _i++) { \ 168 if (rp[_i] > bcol) break; \ 169 if (rp[_i] == bcol) { \ 170 bap = ap + bs2*_i + bs*cidx + ridx; \ 171 if (addv == ADD_VALUES) *bap += value; \ 172 else *bap = value; \ 173 goto b_noinsert; \ 174 } \ 175 } \ 176 if (b->nonew == 1) goto b_noinsert; \ 177 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 178 if (nrow >= rmax) { \ 179 /* there is no extra room in row, therefore enlarge */ \ 180 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 181 MatScalar *new_a; \ 182 \ 183 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 184 \ 185 /* malloc new storage space */ \ 186 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 187 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 188 new_j = (int*)(new_a + bs2*new_nz); \ 189 new_i = new_j + new_nz; \ 190 \ 191 /* copy over old data into new slots */ \ 192 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 193 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 194 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 195 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 196 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 197 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 199 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 200 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 201 /* free up old matrix storage */ \ 202 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 203 if (!b->singlemalloc) { \ 204 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 205 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 206 } \ 207 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 208 b->singlemalloc = PETSC_TRUE; \ 209 \ 210 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 211 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 212 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 213 b->maxnz += bs2*CHUNKSIZE; \ 214 b->reallocs++; \ 215 b->nz++; \ 216 } \ 217 N = nrow++ - 1; \ 218 /* shift up all the later entries in this row */ \ 219 for (ii=N; ii>=_i; ii--) { \ 220 rp[ii+1] = rp[ii]; \ 221 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 222 } \ 223 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 224 rp[_i] = bcol; \ 225 ap[bs2*_i + bs*cidx + ridx] = value; \ 226 b_noinsert:; \ 227 bilen[brow] = nrow; \ 228 } 229 #endif 230 231 #if defined(PETSC_USE_MAT_SINGLE) 232 #undef __FUNCT__ 233 #define __FUNCT__ "MatSetValues_MPISBAIJ" 234 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 235 { 236 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 237 int ierr,i,N = m*n; 238 MatScalar *vsingle; 239 240 PetscFunctionBegin; 241 if (N > b->setvalueslen) { 242 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 243 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 244 b->setvalueslen = N; 245 } 246 vsingle = b->setvaluescopy; 247 248 for (i=0; i<N; i++) { 249 vsingle[i] = v[i]; 250 } 251 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 257 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 258 { 259 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 260 int ierr,i,N = m*n*b->bs2; 261 MatScalar *vsingle; 262 263 PetscFunctionBegin; 264 if (N > b->setvalueslen) { 265 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 266 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 267 b->setvalueslen = N; 268 } 269 vsingle = b->setvaluescopy; 270 for (i=0; i<N; i++) { 271 vsingle[i] = v[i]; 272 } 273 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 279 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 280 { 281 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 282 int ierr,i,N = m*n; 283 MatScalar *vsingle; 284 285 PetscFunctionBegin; 286 SETERRQ(1,"Function not yet written for SBAIJ format"); 287 /* PetscFunctionReturn(0); */ 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 292 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 293 { 294 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 295 int ierr,i,N = m*n*b->bs2; 296 MatScalar *vsingle; 297 298 PetscFunctionBegin; 299 SETERRQ(1,"Function not yet written for SBAIJ format"); 300 /* PetscFunctionReturn(0); */ 301 } 302 #endif 303 304 /* Only add/insert a(i,j) with i<=j (blocks). 305 Any a(i,j) with i>j input by user is ingored. 306 */ 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 309 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 310 { 311 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 312 MatScalar value; 313 PetscTruth roworiented = baij->roworiented; 314 int ierr,i,j,row,col; 315 int rstart_orig=baij->rstart_bs; 316 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 317 int cend_orig=baij->cend_bs,bs=baij->bs; 318 319 /* Some Variables required in the macro */ 320 Mat A = baij->A; 321 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 322 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 323 MatScalar *aa=a->a; 324 325 Mat B = baij->B; 326 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 327 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 328 MatScalar *ba=b->a; 329 330 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 331 int low,high,t,ridx,cidx,bs2=a->bs2; 332 MatScalar *ap,*bap; 333 334 /* for stash */ 335 int n_loc, *in_loc=0; 336 MatScalar *v_loc=0; 337 338 PetscFunctionBegin; 339 340 if(!baij->donotstash){ 341 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 342 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 343 } 344 345 for (i=0; i<m; i++) { 346 if (im[i] < 0) continue; 347 #if defined(PETSC_USE_BOPT_g) 348 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 349 #endif 350 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 351 row = im[i] - rstart_orig; /* local row index */ 352 for (j=0; j<n; j++) { 353 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 354 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 355 col = in[j] - cstart_orig; /* local col index */ 356 brow = row/bs; bcol = col/bs; 357 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 358 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 359 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 360 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 361 } else if (in[j] < 0) continue; 362 #if defined(PETSC_USE_BOPT_g) 363 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 364 #endif 365 else { /* off-diag entry (B) */ 366 if (mat->was_assembled) { 367 if (!baij->colmap) { 368 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 369 } 370 #if defined (PETSC_USE_CTABLE) 371 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 372 col = col - 1; 373 #else 374 col = baij->colmap[in[j]/bs] - 1; 375 #endif 376 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 377 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 378 col = in[j]; 379 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 380 B = baij->B; 381 b = (Mat_SeqBAIJ*)(B)->data; 382 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 383 ba=b->a; 384 } else col += in[j]%bs; 385 } else col = in[j]; 386 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 387 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 388 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 389 } 390 } 391 } else { /* off processor entry */ 392 if (!baij->donotstash) { 393 n_loc = 0; 394 for (j=0; j<n; j++){ 395 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 396 in_loc[n_loc] = in[j]; 397 if (roworiented) { 398 v_loc[n_loc] = v[i*n+j]; 399 } else { 400 v_loc[n_loc] = v[j*m+i]; 401 } 402 n_loc++; 403 } 404 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 405 } 406 } 407 } 408 409 if(!baij->donotstash){ 410 ierr = PetscFree(in_loc);CHKERRQ(ierr); 411 ierr = PetscFree(v_loc);CHKERRQ(ierr); 412 } 413 PetscFunctionReturn(0); 414 } 415 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 418 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 419 { 420 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 421 const MatScalar *value; 422 MatScalar *barray=baij->barray; 423 PetscTruth roworiented = baij->roworiented; 424 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 425 int rend=baij->rend,cstart=baij->cstart,stepval; 426 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 427 428 PetscFunctionBegin; 429 if(!barray) { 430 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 431 baij->barray = barray; 432 } 433 434 if (roworiented) { 435 stepval = (n-1)*bs; 436 } else { 437 stepval = (m-1)*bs; 438 } 439 for (i=0; i<m; i++) { 440 if (im[i] < 0) continue; 441 #if defined(PETSC_USE_BOPT_g) 442 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 443 #endif 444 if (im[i] >= rstart && im[i] < rend) { 445 row = im[i] - rstart; 446 for (j=0; j<n; j++) { 447 /* If NumCol = 1 then a copy is not required */ 448 if ((roworiented) && (n == 1)) { 449 barray = (MatScalar*) v + i*bs2; 450 } else if((!roworiented) && (m == 1)) { 451 barray = (MatScalar*) v + j*bs2; 452 } else { /* Here a copy is required */ 453 if (roworiented) { 454 value = v + i*(stepval+bs)*bs + j*bs; 455 } else { 456 value = v + j*(stepval+bs)*bs + i*bs; 457 } 458 for (ii=0; ii<bs; ii++,value+=stepval) { 459 for (jj=0; jj<bs; jj++) { 460 *barray++ = *value++; 461 } 462 } 463 barray -=bs2; 464 } 465 466 if (in[j] >= cstart && in[j] < cend){ 467 col = in[j] - cstart; 468 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 469 } 470 else if (in[j] < 0) continue; 471 #if defined(PETSC_USE_BOPT_g) 472 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 473 #endif 474 else { 475 if (mat->was_assembled) { 476 if (!baij->colmap) { 477 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 478 } 479 480 #if defined(PETSC_USE_BOPT_g) 481 #if defined (PETSC_USE_CTABLE) 482 { int data; 483 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 484 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 485 } 486 #else 487 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 488 #endif 489 #endif 490 #if defined (PETSC_USE_CTABLE) 491 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 492 col = (col - 1)/bs; 493 #else 494 col = (baij->colmap[in[j]] - 1)/bs; 495 #endif 496 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 497 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 498 col = in[j]; 499 } 500 } 501 else col = in[j]; 502 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 503 } 504 } 505 } else { 506 if (!baij->donotstash) { 507 if (roworiented) { 508 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 509 } else { 510 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 511 } 512 } 513 } 514 } 515 PetscFunctionReturn(0); 516 } 517 518 #define HASH_KEY 0.6180339887 519 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 520 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 521 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 522 #undef __FUNCT__ 523 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 524 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 525 { 526 PetscFunctionBegin; 527 SETERRQ(1,"Function not yet written for SBAIJ format"); 528 /* PetscFunctionReturn(0); */ 529 } 530 531 #undef __FUNCT__ 532 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 533 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 534 { 535 PetscFunctionBegin; 536 SETERRQ(1,"Function not yet written for SBAIJ format"); 537 /* PetscFunctionReturn(0); */ 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatGetValues_MPISBAIJ" 542 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 543 { 544 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 545 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 546 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 547 548 PetscFunctionBegin; 549 for (i=0; i<m; i++) { 550 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 551 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 552 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 553 row = idxm[i] - bsrstart; 554 for (j=0; j<n; j++) { 555 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]); 556 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 557 if (idxn[j] >= bscstart && idxn[j] < bscend){ 558 col = idxn[j] - bscstart; 559 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 560 } else { 561 if (!baij->colmap) { 562 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 563 } 564 #if defined (PETSC_USE_CTABLE) 565 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 566 data --; 567 #else 568 data = baij->colmap[idxn[j]/bs]-1; 569 #endif 570 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 571 else { 572 col = data + idxn[j]%bs; 573 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 574 } 575 } 576 } 577 } else { 578 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 579 } 580 } 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatNorm_MPISBAIJ" 586 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 587 { 588 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 589 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 590 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 591 int ierr; 592 PetscReal sum[2],*lnorm2; 593 594 PetscFunctionBegin; 595 if (baij->size == 1) { 596 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 597 } else { 598 if (type == NORM_FROBENIUS) { 599 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 600 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 601 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 602 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 603 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 604 /* 605 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 606 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 607 */ 608 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 609 /* 610 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 611 PetscSynchronizedFlush(mat->comm); */ 612 613 *norm = sqrt(sum[0] + 2*sum[1]); 614 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 615 } else { 616 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 /* 623 Creates the hash table, and sets the table 624 This table is created only once. 625 If new entried need to be added to the matrix 626 then the hash table has to be destroyed and 627 recreated. 628 */ 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 631 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 632 { 633 PetscFunctionBegin; 634 SETERRQ(1,"Function not yet written for SBAIJ format"); 635 /* PetscFunctionReturn(0); */ 636 } 637 638 #undef __FUNCT__ 639 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 640 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 641 { 642 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 643 int ierr,nstash,reallocs; 644 InsertMode addv; 645 646 PetscFunctionBegin; 647 if (baij->donotstash) { 648 PetscFunctionReturn(0); 649 } 650 651 /* make sure all processors are either in INSERTMODE or ADDMODE */ 652 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 653 if (addv == (ADD_VALUES|INSERT_VALUES)) { 654 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 655 } 656 mat->insertmode = addv; /* in case this processor had no cache */ 657 658 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 659 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 660 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 661 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 662 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 663 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 669 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 670 { 671 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 672 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 673 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 674 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 675 int *row,*col,other_disassembled; 676 PetscTruth r1,r2,r3; 677 MatScalar *val; 678 InsertMode addv = mat->insertmode; 679 680 PetscFunctionBegin; 681 682 if (!baij->donotstash) { 683 while (1) { 684 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 685 /* 686 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 687 PetscSynchronizedFlush(mat->comm); 688 */ 689 if (!flg) break; 690 691 for (i=0; i<n;) { 692 /* Now identify the consecutive vals belonging to the same row */ 693 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 694 if (j < n) ncols = j-i; 695 else ncols = n-i; 696 /* Now assemble all these values with a single function call */ 697 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 698 i = j; 699 } 700 } 701 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 702 /* Now process the block-stash. Since the values are stashed column-oriented, 703 set the roworiented flag to column oriented, and after MatSetValues() 704 restore the original flags */ 705 r1 = baij->roworiented; 706 r2 = a->roworiented; 707 r3 = b->roworiented; 708 baij->roworiented = PETSC_FALSE; 709 a->roworiented = PETSC_FALSE; 710 b->roworiented = PETSC_FALSE; 711 while (1) { 712 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 713 if (!flg) break; 714 715 for (i=0; i<n;) { 716 /* Now identify the consecutive vals belonging to the same row */ 717 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 718 if (j < n) ncols = j-i; 719 else ncols = n-i; 720 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 721 i = j; 722 } 723 } 724 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 725 baij->roworiented = r1; 726 a->roworiented = r2; 727 b->roworiented = r3; 728 } 729 730 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 731 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 732 733 /* determine if any processor has disassembled, if so we must 734 also disassemble ourselfs, in order that we may reassemble. */ 735 /* 736 if nonzero structure of submatrix B cannot change then we know that 737 no processor disassembled thus we can skip this stuff 738 */ 739 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 740 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 741 if (mat->was_assembled && !other_disassembled) { 742 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 743 } 744 } 745 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 748 } 749 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 750 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 751 752 #if defined(PETSC_USE_BOPT_g) 753 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 754 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 755 baij->ht_total_ct = 0; 756 baij->ht_insert_ct = 0; 757 } 758 #endif 759 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 760 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 761 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 762 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 763 } 764 765 if (baij->rowvalues) { 766 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 767 baij->rowvalues = 0; 768 } 769 770 PetscFunctionReturn(0); 771 } 772 773 #undef __FUNCT__ 774 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 775 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 776 { 777 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 778 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 779 PetscTruth iascii,isdraw; 780 PetscViewer sviewer; 781 PetscViewerFormat format; 782 783 PetscFunctionBegin; 784 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 785 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 786 if (iascii) { 787 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 788 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 789 MatInfo info; 790 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 791 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 792 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 793 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 794 baij->bs,(int)info.memory);CHKERRQ(ierr); 795 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 796 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 797 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 798 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 799 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 800 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 801 PetscFunctionReturn(0); 802 } else if (format == PETSC_VIEWER_ASCII_INFO) { 803 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 804 PetscFunctionReturn(0); 805 } 806 } 807 808 if (isdraw) { 809 PetscDraw draw; 810 PetscTruth isnull; 811 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 812 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 813 } 814 815 if (size == 1) { 816 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 817 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 818 } else { 819 /* assemble the entire matrix onto first processor. */ 820 Mat A; 821 Mat_SeqSBAIJ *Aloc; 822 Mat_SeqBAIJ *Bloc; 823 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 824 MatScalar *a; 825 826 /* Should this be the same type as mat? */ 827 if (!rank) { 828 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 829 } else { 830 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 831 } 832 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 833 ierr = MatMPISBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 834 PetscLogObjectParent(mat,A); 835 836 /* copy over the A part */ 837 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 838 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 839 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 840 841 for (i=0; i<mbs; i++) { 842 rvals[0] = bs*(baij->rstart + i); 843 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 844 for (j=ai[i]; j<ai[i+1]; j++) { 845 col = (baij->cstart+aj[j])*bs; 846 for (k=0; k<bs; k++) { 847 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 848 col++; a += bs; 849 } 850 } 851 } 852 /* copy over the B part */ 853 Bloc = (Mat_SeqBAIJ*)baij->B->data; 854 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 855 for (i=0; i<mbs; i++) { 856 rvals[0] = bs*(baij->rstart + i); 857 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 858 for (j=ai[i]; j<ai[i+1]; j++) { 859 col = baij->garray[aj[j]]*bs; 860 for (k=0; k<bs; k++) { 861 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 862 col++; a += bs; 863 } 864 } 865 } 866 ierr = PetscFree(rvals);CHKERRQ(ierr); 867 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 868 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 869 /* 870 Everyone has to call to draw the matrix since the graphics waits are 871 synchronized across all processors that share the PetscDraw object 872 */ 873 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 874 if (!rank) { 875 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 876 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 877 } 878 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 879 ierr = MatDestroy(A);CHKERRQ(ierr); 880 } 881 PetscFunctionReturn(0); 882 } 883 884 #undef __FUNCT__ 885 #define __FUNCT__ "MatView_MPISBAIJ" 886 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 887 { 888 int ierr; 889 PetscTruth iascii,isdraw,issocket,isbinary; 890 891 PetscFunctionBegin; 892 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 893 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 894 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 895 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 896 if (iascii || isdraw || issocket || isbinary) { 897 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 898 } else { 899 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 900 } 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatDestroy_MPISBAIJ" 906 int MatDestroy_MPISBAIJ(Mat mat) 907 { 908 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 909 int ierr; 910 911 PetscFunctionBegin; 912 #if defined(PETSC_USE_LOG) 913 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 914 #endif 915 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 916 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 917 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 918 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 919 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 920 #if defined (PETSC_USE_CTABLE) 921 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 922 #else 923 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 924 #endif 925 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 926 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 927 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 928 if (baij->slvec0) { 929 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 930 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 931 } 932 if (baij->slvec1) { 933 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 934 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 935 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 936 } 937 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 938 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 939 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 940 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 941 #if defined(PETSC_USE_MAT_SINGLE) 942 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 943 #endif 944 ierr = PetscFree(baij);CHKERRQ(ierr); 945 PetscFunctionReturn(0); 946 } 947 948 #undef __FUNCT__ 949 #define __FUNCT__ "MatMult_MPISBAIJ" 950 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 951 { 952 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 953 int ierr,nt,mbs=a->mbs,bs=a->bs; 954 PetscScalar *x,*from,zero=0.0; 955 956 PetscFunctionBegin; 957 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 958 if (nt != A->n) { 959 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 960 } 961 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 962 if (nt != A->m) { 963 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 964 } 965 966 /* diagonal part */ 967 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 968 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 969 970 /* subdiagonal part */ 971 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 972 973 /* copy x into the vec slvec0 */ 974 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 975 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 976 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 977 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 978 979 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 980 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 981 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 982 983 /* supperdiagonal part */ 984 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 985 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 991 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 992 { 993 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 994 int ierr,nt; 995 996 PetscFunctionBegin; 997 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 998 if (nt != A->n) { 999 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1000 } 1001 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1002 if (nt != A->m) { 1003 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1004 } 1005 1006 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1007 /* do diagonal part */ 1008 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1009 /* do supperdiagonal part */ 1010 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1011 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1012 /* do subdiagonal part */ 1013 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1014 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1015 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1016 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1022 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1023 { 1024 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1025 int ierr,mbs=a->mbs,bs=a->bs; 1026 PetscScalar *x,*from,zero=0.0; 1027 1028 PetscFunctionBegin; 1029 /* 1030 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1031 PetscSynchronizedFlush(A->comm); 1032 */ 1033 /* diagonal part */ 1034 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1035 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1036 1037 /* subdiagonal part */ 1038 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1039 1040 /* copy x into the vec slvec0 */ 1041 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1042 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1043 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1044 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1045 1046 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1047 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1048 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1049 1050 /* supperdiagonal part */ 1051 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1052 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1058 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1059 { 1060 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1061 int ierr; 1062 1063 PetscFunctionBegin; 1064 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1065 /* do diagonal part */ 1066 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1067 /* do supperdiagonal part */ 1068 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1069 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1070 1071 /* do subdiagonal part */ 1072 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1073 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1074 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1075 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1081 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1082 { 1083 int ierr; 1084 1085 PetscFunctionBegin; 1086 ierr = MatMult(A,xx,yy);CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1092 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1093 { 1094 int ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 /* 1102 This only works correctly for square matrices where the subblock A->A is the 1103 diagonal block 1104 */ 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1107 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1108 { 1109 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1110 int ierr; 1111 1112 PetscFunctionBegin; 1113 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1114 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatScale_MPISBAIJ" 1120 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1121 { 1122 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1123 int ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1127 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1133 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1134 { 1135 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1136 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1137 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1138 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1139 int *cmap,*idx_p,cstart = mat->cstart; 1140 1141 PetscFunctionBegin; 1142 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1143 mat->getrowactive = PETSC_TRUE; 1144 1145 if (!mat->rowvalues && (idx || v)) { 1146 /* 1147 allocate enough space to hold information from the longest row. 1148 */ 1149 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1150 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1151 int max = 1,mbs = mat->mbs,tmp; 1152 for (i=0; i<mbs; i++) { 1153 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1154 if (max < tmp) { max = tmp; } 1155 } 1156 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1157 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1158 } 1159 1160 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1161 lrow = row - brstart; /* local row index */ 1162 1163 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1164 if (!v) {pvA = 0; pvB = 0;} 1165 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1166 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1167 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1168 nztot = nzA + nzB; 1169 1170 cmap = mat->garray; 1171 if (v || idx) { 1172 if (nztot) { 1173 /* Sort by increasing column numbers, assuming A and B already sorted */ 1174 int imark = -1; 1175 if (v) { 1176 *v = v_p = mat->rowvalues; 1177 for (i=0; i<nzB; i++) { 1178 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1179 else break; 1180 } 1181 imark = i; 1182 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1183 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1184 } 1185 if (idx) { 1186 *idx = idx_p = mat->rowindices; 1187 if (imark > -1) { 1188 for (i=0; i<imark; i++) { 1189 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1190 } 1191 } else { 1192 for (i=0; i<nzB; i++) { 1193 if (cmap[cworkB[i]/bs] < cstart) 1194 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1195 else break; 1196 } 1197 imark = i; 1198 } 1199 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1200 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1201 } 1202 } else { 1203 if (idx) *idx = 0; 1204 if (v) *v = 0; 1205 } 1206 } 1207 *nz = nztot; 1208 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1209 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1215 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1216 { 1217 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1218 1219 PetscFunctionBegin; 1220 if (baij->getrowactive == PETSC_FALSE) { 1221 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1222 } 1223 baij->getrowactive = PETSC_FALSE; 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1229 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1230 { 1231 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1232 1233 PetscFunctionBegin; 1234 *bs = baij->bs; 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1240 int MatZeroEntries_MPISBAIJ(Mat A) 1241 { 1242 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1243 int ierr; 1244 1245 PetscFunctionBegin; 1246 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1247 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1253 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1254 { 1255 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1256 Mat A = a->A,B = a->B; 1257 int ierr; 1258 PetscReal isend[5],irecv[5]; 1259 1260 PetscFunctionBegin; 1261 info->block_size = (PetscReal)a->bs; 1262 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1263 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1264 isend[3] = info->memory; isend[4] = info->mallocs; 1265 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1266 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1267 isend[3] += info->memory; isend[4] += info->mallocs; 1268 if (flag == MAT_LOCAL) { 1269 info->nz_used = isend[0]; 1270 info->nz_allocated = isend[1]; 1271 info->nz_unneeded = isend[2]; 1272 info->memory = isend[3]; 1273 info->mallocs = isend[4]; 1274 } else if (flag == MAT_GLOBAL_MAX) { 1275 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1276 info->nz_used = irecv[0]; 1277 info->nz_allocated = irecv[1]; 1278 info->nz_unneeded = irecv[2]; 1279 info->memory = irecv[3]; 1280 info->mallocs = irecv[4]; 1281 } else if (flag == MAT_GLOBAL_SUM) { 1282 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1283 info->nz_used = irecv[0]; 1284 info->nz_allocated = irecv[1]; 1285 info->nz_unneeded = irecv[2]; 1286 info->memory = irecv[3]; 1287 info->mallocs = irecv[4]; 1288 } else { 1289 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1290 } 1291 info->rows_global = (PetscReal)A->M; 1292 info->columns_global = (PetscReal)A->N; 1293 info->rows_local = (PetscReal)A->m; 1294 info->columns_local = (PetscReal)A->N; 1295 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1296 info->fill_ratio_needed = 0; 1297 info->factor_mallocs = 0; 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1303 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1304 { 1305 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1306 int ierr; 1307 1308 PetscFunctionBegin; 1309 switch (op) { 1310 case MAT_NO_NEW_NONZERO_LOCATIONS: 1311 case MAT_YES_NEW_NONZERO_LOCATIONS: 1312 case MAT_COLUMNS_UNSORTED: 1313 case MAT_COLUMNS_SORTED: 1314 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1315 case MAT_KEEP_ZEROED_ROWS: 1316 case MAT_NEW_NONZERO_LOCATION_ERR: 1317 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1318 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1319 break; 1320 case MAT_ROW_ORIENTED: 1321 a->roworiented = PETSC_TRUE; 1322 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1323 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1324 break; 1325 case MAT_ROWS_SORTED: 1326 case MAT_ROWS_UNSORTED: 1327 case MAT_YES_NEW_DIAGONALS: 1328 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1329 break; 1330 case MAT_COLUMN_ORIENTED: 1331 a->roworiented = PETSC_FALSE; 1332 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1333 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1334 break; 1335 case MAT_IGNORE_OFF_PROC_ENTRIES: 1336 a->donotstash = PETSC_TRUE; 1337 break; 1338 case MAT_NO_NEW_DIAGONALS: 1339 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1340 case MAT_USE_HASH_TABLE: 1341 a->ht_flag = PETSC_TRUE; 1342 break; 1343 case MAT_NOT_SYMMETRIC: 1344 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1345 case MAT_HERMITIAN: 1346 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1347 case MAT_SYMMETRIC: 1348 case MAT_STRUCTURALLY_SYMMETRIC: 1349 case MAT_NOT_HERMITIAN: 1350 case MAT_SYMMETRY_ETERNAL: 1351 case MAT_NOT_SYMMETRY_ETERNAL: 1352 break; 1353 default: 1354 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1355 } 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1361 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1362 { 1363 int ierr; 1364 PetscFunctionBegin; 1365 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1371 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1372 { 1373 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1374 Mat a = baij->A,b = baij->B; 1375 int ierr,s1,s2,s3; 1376 1377 PetscFunctionBegin; 1378 if (ll != rr) { 1379 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1380 } 1381 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1382 if (rr) { 1383 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1384 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1385 /* Overlap communication with computation. */ 1386 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1387 /*} if (ll) { */ 1388 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1389 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1390 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1391 /* } */ 1392 /* scale the diagonal block */ 1393 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1394 1395 /* if (rr) { */ 1396 /* Do a scatter end and then right scale the off-diagonal block */ 1397 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1398 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1399 } 1400 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1406 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag) 1407 { 1408 PetscFunctionBegin; 1409 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1414 int MatPrintHelp_MPISBAIJ(Mat A) 1415 { 1416 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1417 MPI_Comm comm = A->comm; 1418 static int called = 0; 1419 int ierr; 1420 1421 PetscFunctionBegin; 1422 if (!a->rank) { 1423 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1424 } 1425 if (called) {PetscFunctionReturn(0);} else called = 1; 1426 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1427 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1433 int MatSetUnfactored_MPISBAIJ(Mat A) 1434 { 1435 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1436 int ierr; 1437 1438 PetscFunctionBegin; 1439 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1440 PetscFunctionReturn(0); 1441 } 1442 1443 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatEqual_MPISBAIJ" 1447 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1448 { 1449 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1450 Mat a,b,c,d; 1451 PetscTruth flg; 1452 int ierr; 1453 1454 PetscFunctionBegin; 1455 a = matA->A; b = matA->B; 1456 c = matB->A; d = matB->B; 1457 1458 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1459 if (flg == PETSC_TRUE) { 1460 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1461 } 1462 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1468 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1469 { 1470 int ierr; 1471 1472 PetscFunctionBegin; 1473 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 #undef __FUNCT__ 1478 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1479 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1480 { 1481 int i,ierr; 1482 PetscTruth flg; 1483 1484 for (i=0; i<n; i++) { 1485 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1486 if (!flg) { 1487 SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1488 } 1489 } 1490 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 1495 /* -------------------------------------------------------------------*/ 1496 static struct _MatOps MatOps_Values = { 1497 MatSetValues_MPISBAIJ, 1498 MatGetRow_MPISBAIJ, 1499 MatRestoreRow_MPISBAIJ, 1500 MatMult_MPISBAIJ, 1501 /* 4*/ MatMultAdd_MPISBAIJ, 1502 MatMultTranspose_MPISBAIJ, 1503 MatMultTransposeAdd_MPISBAIJ, 1504 0, 1505 0, 1506 0, 1507 /*10*/ 0, 1508 0, 1509 0, 1510 MatRelax_MPISBAIJ, 1511 MatTranspose_MPISBAIJ, 1512 /*15*/ MatGetInfo_MPISBAIJ, 1513 MatEqual_MPISBAIJ, 1514 MatGetDiagonal_MPISBAIJ, 1515 MatDiagonalScale_MPISBAIJ, 1516 MatNorm_MPISBAIJ, 1517 /*20*/ MatAssemblyBegin_MPISBAIJ, 1518 MatAssemblyEnd_MPISBAIJ, 1519 0, 1520 MatSetOption_MPISBAIJ, 1521 MatZeroEntries_MPISBAIJ, 1522 /*25*/ MatZeroRows_MPISBAIJ, 1523 0, 1524 0, 1525 0, 1526 0, 1527 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1528 0, 1529 0, 1530 0, 1531 0, 1532 /*35*/ MatDuplicate_MPISBAIJ, 1533 0, 1534 0, 1535 0, 1536 0, 1537 /*40*/ 0, 1538 MatGetSubMatrices_MPISBAIJ, 1539 MatIncreaseOverlap_MPISBAIJ, 1540 MatGetValues_MPISBAIJ, 1541 0, 1542 /*45*/ MatPrintHelp_MPISBAIJ, 1543 MatScale_MPISBAIJ, 1544 0, 1545 0, 1546 0, 1547 /*50*/ MatGetBlockSize_MPISBAIJ, 1548 0, 1549 0, 1550 0, 1551 0, 1552 /*55*/ 0, 1553 0, 1554 MatSetUnfactored_MPISBAIJ, 1555 0, 1556 MatSetValuesBlocked_MPISBAIJ, 1557 /*60*/ 0, 1558 0, 1559 0, 1560 MatGetPetscMaps_Petsc, 1561 0, 1562 /*65*/ 0, 1563 0, 1564 0, 1565 0, 1566 0, 1567 /*70*/ MatGetRowMax_MPISBAIJ, 1568 0, 1569 0, 1570 0, 1571 0, 1572 /*75*/ 0, 1573 0, 1574 0, 1575 0, 1576 0, 1577 /*80*/ 0, 1578 0, 1579 0, 1580 0, 1581 /*85*/ MatLoad_MPISBAIJ 1582 }; 1583 1584 1585 EXTERN_C_BEGIN 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1588 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1589 { 1590 PetscFunctionBegin; 1591 *a = ((Mat_MPISBAIJ *)A->data)->A; 1592 *iscopy = PETSC_FALSE; 1593 PetscFunctionReturn(0); 1594 } 1595 EXTERN_C_END 1596 1597 EXTERN_C_BEGIN 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1600 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1601 { 1602 Mat_MPISBAIJ *b; 1603 int ierr,i,mbs,Mbs; 1604 1605 PetscFunctionBegin; 1606 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1607 1608 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1609 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1610 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1611 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1612 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1613 if (d_nnz) { 1614 for (i=0; i<B->m/bs; i++) { 1615 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1616 } 1617 } 1618 if (o_nnz) { 1619 for (i=0; i<B->m/bs; i++) { 1620 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1621 } 1622 } 1623 B->preallocated = PETSC_TRUE; 1624 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1625 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1626 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1627 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1628 1629 b = (Mat_MPISBAIJ*)B->data; 1630 mbs = B->m/bs; 1631 Mbs = B->M/bs; 1632 if (mbs*bs != B->m) { 1633 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1634 } 1635 1636 b->bs = bs; 1637 b->bs2 = bs*bs; 1638 b->mbs = mbs; 1639 b->nbs = mbs; 1640 b->Mbs = Mbs; 1641 b->Nbs = Mbs; 1642 1643 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1644 b->rowners[0] = 0; 1645 for (i=2; i<=b->size; i++) { 1646 b->rowners[i] += b->rowners[i-1]; 1647 } 1648 b->rstart = b->rowners[b->rank]; 1649 b->rend = b->rowners[b->rank+1]; 1650 b->cstart = b->rstart; 1651 b->cend = b->rend; 1652 for (i=0; i<=b->size; i++) { 1653 b->rowners_bs[i] = b->rowners[i]*bs; 1654 } 1655 b->rstart_bs = b-> rstart*bs; 1656 b->rend_bs = b->rend*bs; 1657 1658 b->cstart_bs = b->cstart*bs; 1659 b->cend_bs = b->cend*bs; 1660 1661 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr); 1662 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1663 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1664 PetscLogObjectParent(B,b->A); 1665 1666 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr); 1667 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1668 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1669 PetscLogObjectParent(B,b->B); 1670 1671 /* build cache for off array entries formed */ 1672 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1673 1674 PetscFunctionReturn(0); 1675 } 1676 EXTERN_C_END 1677 1678 /*MC 1679 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1680 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1681 1682 Options Database Keys: 1683 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1684 1685 Level: beginner 1686 1687 .seealso: MatCreateMPISBAIJ 1688 M*/ 1689 1690 EXTERN_C_BEGIN 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatCreate_MPISBAIJ" 1693 int MatCreate_MPISBAIJ(Mat B) 1694 { 1695 Mat_MPISBAIJ *b; 1696 int ierr; 1697 PetscTruth flg; 1698 1699 PetscFunctionBegin; 1700 1701 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1702 B->data = (void*)b; 1703 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1704 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1705 1706 B->ops->destroy = MatDestroy_MPISBAIJ; 1707 B->ops->view = MatView_MPISBAIJ; 1708 B->mapping = 0; 1709 B->factor = 0; 1710 B->assembled = PETSC_FALSE; 1711 1712 B->insertmode = NOT_SET_VALUES; 1713 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1714 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1715 1716 /* build local table of row and column ownerships */ 1717 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1718 b->cowners = b->rowners + b->size + 2; 1719 b->rowners_bs = b->cowners + b->size + 2; 1720 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1721 1722 /* build cache for off array entries formed */ 1723 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1724 b->donotstash = PETSC_FALSE; 1725 b->colmap = PETSC_NULL; 1726 b->garray = PETSC_NULL; 1727 b->roworiented = PETSC_TRUE; 1728 1729 #if defined(PETSC_USE_MAT_SINGLE) 1730 /* stuff for MatSetValues_XXX in single precision */ 1731 b->setvalueslen = 0; 1732 b->setvaluescopy = PETSC_NULL; 1733 #endif 1734 1735 /* stuff used in block assembly */ 1736 b->barray = 0; 1737 1738 /* stuff used for matrix vector multiply */ 1739 b->lvec = 0; 1740 b->Mvctx = 0; 1741 b->slvec0 = 0; 1742 b->slvec0b = 0; 1743 b->slvec1 = 0; 1744 b->slvec1a = 0; 1745 b->slvec1b = 0; 1746 b->sMvctx = 0; 1747 1748 /* stuff for MatGetRow() */ 1749 b->rowindices = 0; 1750 b->rowvalues = 0; 1751 b->getrowactive = PETSC_FALSE; 1752 1753 /* hash table stuff */ 1754 b->ht = 0; 1755 b->hd = 0; 1756 b->ht_size = 0; 1757 b->ht_flag = PETSC_FALSE; 1758 b->ht_fact = 0; 1759 b->ht_total_ct = 0; 1760 b->ht_insert_ct = 0; 1761 1762 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1763 if (flg) { 1764 PetscReal fact = 1.39; 1765 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1766 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1767 if (fact <= 1.0) fact = 1.39; 1768 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1769 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1770 } 1771 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1772 "MatStoreValues_MPISBAIJ", 1773 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1774 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1775 "MatRetrieveValues_MPISBAIJ", 1776 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1777 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1778 "MatGetDiagonalBlock_MPISBAIJ", 1779 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1780 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1781 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1782 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1783 B->symmetric = PETSC_TRUE; 1784 B->structurally_symmetric = PETSC_TRUE; 1785 B->symmetric_set = PETSC_TRUE; 1786 B->structurally_symmetric_set = PETSC_TRUE; 1787 PetscFunctionReturn(0); 1788 } 1789 EXTERN_C_END 1790 1791 /*MC 1792 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1793 1794 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1795 and MATMPISBAIJ otherwise. 1796 1797 Options Database Keys: 1798 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1799 1800 Level: beginner 1801 1802 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1803 M*/ 1804 1805 EXTERN_C_BEGIN 1806 #undef __FUNCT__ 1807 #define __FUNCT__ "MatCreate_SBAIJ" 1808 int MatCreate_SBAIJ(Mat A) { 1809 int ierr,size; 1810 1811 PetscFunctionBegin; 1812 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1813 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1814 if (size == 1) { 1815 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1816 } else { 1817 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1818 } 1819 PetscFunctionReturn(0); 1820 } 1821 EXTERN_C_END 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1825 /*@C 1826 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1827 the user should preallocate the matrix storage by setting the parameters 1828 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1829 performance can be increased by more than a factor of 50. 1830 1831 Collective on Mat 1832 1833 Input Parameters: 1834 + A - the matrix 1835 . bs - size of blockk 1836 . d_nz - number of block nonzeros per block row in diagonal portion of local 1837 submatrix (same for all local rows) 1838 . d_nnz - array containing the number of block nonzeros in the various block rows 1839 in the upper triangular and diagonal part of the in diagonal portion of the local 1840 (possibly different for each block row) or PETSC_NULL. You must leave room 1841 for the diagonal entry even if it is zero. 1842 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1843 submatrix (same for all local rows). 1844 - o_nnz - array containing the number of nonzeros in the various block rows of the 1845 off-diagonal portion of the local submatrix (possibly different for 1846 each block row) or PETSC_NULL. 1847 1848 1849 Options Database Keys: 1850 . -mat_no_unroll - uses code that does not unroll the loops in the 1851 block calculations (much slower) 1852 . -mat_block_size - size of the blocks to use 1853 1854 Notes: 1855 1856 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1857 than it must be used on all processors that share the object for that argument. 1858 1859 Storage Information: 1860 For a square global matrix we define each processor's diagonal portion 1861 to be its local rows and the corresponding columns (a square submatrix); 1862 each processor's off-diagonal portion encompasses the remainder of the 1863 local matrix (a rectangular submatrix). 1864 1865 The user can specify preallocated storage for the diagonal part of 1866 the local submatrix with either d_nz or d_nnz (not both). Set 1867 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1868 memory allocation. Likewise, specify preallocated storage for the 1869 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1870 1871 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1872 the figure below we depict these three local rows and all columns (0-11). 1873 1874 .vb 1875 0 1 2 3 4 5 6 7 8 9 10 11 1876 ------------------- 1877 row 3 | o o o d d d o o o o o o 1878 row 4 | o o o d d d o o o o o o 1879 row 5 | o o o d d d o o o o o o 1880 ------------------- 1881 .ve 1882 1883 Thus, any entries in the d locations are stored in the d (diagonal) 1884 submatrix, and any entries in the o locations are stored in the 1885 o (off-diagonal) submatrix. Note that the d matrix is stored in 1886 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1887 1888 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1889 plus the diagonal part of the d matrix, 1890 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1891 In general, for PDE problems in which most nonzeros are near the diagonal, 1892 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1893 or you will get TERRIBLE performance; see the users' manual chapter on 1894 matrices. 1895 1896 Level: intermediate 1897 1898 .keywords: matrix, block, aij, compressed row, sparse, parallel 1899 1900 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1901 @*/ 1902 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1903 { 1904 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 1905 1906 PetscFunctionBegin; 1907 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1908 if (f) { 1909 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1910 } 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatCreateMPISBAIJ" 1916 /*@C 1917 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1918 (block compressed row). For good matrix assembly performance 1919 the user should preallocate the matrix storage by setting the parameters 1920 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1921 performance can be increased by more than a factor of 50. 1922 1923 Collective on MPI_Comm 1924 1925 Input Parameters: 1926 + comm - MPI communicator 1927 . bs - size of blockk 1928 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1929 This value should be the same as the local size used in creating the 1930 y vector for the matrix-vector product y = Ax. 1931 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1932 This value should be the same as the local size used in creating the 1933 x vector for the matrix-vector product y = Ax. 1934 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1935 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1936 . d_nz - number of block nonzeros per block row in diagonal portion of local 1937 submatrix (same for all local rows) 1938 . d_nnz - array containing the number of block nonzeros in the various block rows 1939 in the upper triangular portion of the in diagonal portion of the local 1940 (possibly different for each block block row) or PETSC_NULL. 1941 You must leave room for the diagonal entry even if it is zero. 1942 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1943 submatrix (same for all local rows). 1944 - o_nnz - array containing the number of nonzeros in the various block rows of the 1945 off-diagonal portion of the local submatrix (possibly different for 1946 each block row) or PETSC_NULL. 1947 1948 Output Parameter: 1949 . A - the matrix 1950 1951 Options Database Keys: 1952 . -mat_no_unroll - uses code that does not unroll the loops in the 1953 block calculations (much slower) 1954 . -mat_block_size - size of the blocks to use 1955 . -mat_mpi - use the parallel matrix data structures even on one processor 1956 (defaults to using SeqBAIJ format on one processor) 1957 1958 Notes: 1959 The user MUST specify either the local or global matrix dimensions 1960 (possibly both). 1961 1962 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1963 than it must be used on all processors that share the object for that argument. 1964 1965 Storage Information: 1966 For a square global matrix we define each processor's diagonal portion 1967 to be its local rows and the corresponding columns (a square submatrix); 1968 each processor's off-diagonal portion encompasses the remainder of the 1969 local matrix (a rectangular submatrix). 1970 1971 The user can specify preallocated storage for the diagonal part of 1972 the local submatrix with either d_nz or d_nnz (not both). Set 1973 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1974 memory allocation. Likewise, specify preallocated storage for the 1975 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1976 1977 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1978 the figure below we depict these three local rows and all columns (0-11). 1979 1980 .vb 1981 0 1 2 3 4 5 6 7 8 9 10 11 1982 ------------------- 1983 row 3 | o o o d d d o o o o o o 1984 row 4 | o o o d d d o o o o o o 1985 row 5 | o o o d d d o o o o o o 1986 ------------------- 1987 .ve 1988 1989 Thus, any entries in the d locations are stored in the d (diagonal) 1990 submatrix, and any entries in the o locations are stored in the 1991 o (off-diagonal) submatrix. Note that the d matrix is stored in 1992 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1993 1994 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1995 plus the diagonal part of the d matrix, 1996 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1997 In general, for PDE problems in which most nonzeros are near the diagonal, 1998 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1999 or you will get TERRIBLE performance; see the users' manual chapter on 2000 matrices. 2001 2002 Level: intermediate 2003 2004 .keywords: matrix, block, aij, compressed row, sparse, parallel 2005 2006 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2007 @*/ 2008 2009 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2010 { 2011 int ierr,size; 2012 2013 PetscFunctionBegin; 2014 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2015 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2016 if (size > 1) { 2017 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2018 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2019 } else { 2020 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2021 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2022 } 2023 PetscFunctionReturn(0); 2024 } 2025 2026 2027 #undef __FUNCT__ 2028 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2029 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2030 { 2031 Mat mat; 2032 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2033 int ierr,len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs; 2034 PetscScalar *array; 2035 2036 PetscFunctionBegin; 2037 *newmat = 0; 2038 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2039 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2040 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2041 2042 mat->factor = matin->factor; 2043 mat->preallocated = PETSC_TRUE; 2044 mat->assembled = PETSC_TRUE; 2045 mat->insertmode = NOT_SET_VALUES; 2046 2047 a = (Mat_MPISBAIJ*)mat->data; 2048 a->bs = oldmat->bs; 2049 a->bs2 = oldmat->bs2; 2050 a->mbs = oldmat->mbs; 2051 a->nbs = oldmat->nbs; 2052 a->Mbs = oldmat->Mbs; 2053 a->Nbs = oldmat->Nbs; 2054 2055 a->rstart = oldmat->rstart; 2056 a->rend = oldmat->rend; 2057 a->cstart = oldmat->cstart; 2058 a->cend = oldmat->cend; 2059 a->size = oldmat->size; 2060 a->rank = oldmat->rank; 2061 a->donotstash = oldmat->donotstash; 2062 a->roworiented = oldmat->roworiented; 2063 a->rowindices = 0; 2064 a->rowvalues = 0; 2065 a->getrowactive = PETSC_FALSE; 2066 a->barray = 0; 2067 a->rstart_bs = oldmat->rstart_bs; 2068 a->rend_bs = oldmat->rend_bs; 2069 a->cstart_bs = oldmat->cstart_bs; 2070 a->cend_bs = oldmat->cend_bs; 2071 2072 /* hash table stuff */ 2073 a->ht = 0; 2074 a->hd = 0; 2075 a->ht_size = 0; 2076 a->ht_flag = oldmat->ht_flag; 2077 a->ht_fact = oldmat->ht_fact; 2078 a->ht_total_ct = 0; 2079 a->ht_insert_ct = 0; 2080 2081 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2082 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2083 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2084 if (oldmat->colmap) { 2085 #if defined (PETSC_USE_CTABLE) 2086 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2087 #else 2088 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2089 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2090 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2091 #endif 2092 } else a->colmap = 0; 2093 2094 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2095 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2096 PetscLogObjectMemory(mat,len*sizeof(int)); 2097 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2098 } else a->garray = 0; 2099 2100 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2101 PetscLogObjectParent(mat,a->lvec); 2102 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2103 PetscLogObjectParent(mat,a->Mvctx); 2104 2105 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2106 PetscLogObjectParent(mat,a->slvec0); 2107 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2108 PetscLogObjectParent(mat,a->slvec1); 2109 2110 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2111 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2112 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2113 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2114 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2115 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2116 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2117 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2118 PetscLogObjectParent(mat,a->slvec0); 2119 PetscLogObjectParent(mat,a->slvec1); 2120 PetscLogObjectParent(mat,a->slvec0b); 2121 PetscLogObjectParent(mat,a->slvec1a); 2122 PetscLogObjectParent(mat,a->slvec1b); 2123 2124 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2125 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2126 a->sMvctx = oldmat->sMvctx; 2127 PetscLogObjectParent(mat,a->sMvctx); 2128 2129 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2130 PetscLogObjectParent(mat,a->A); 2131 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2132 PetscLogObjectParent(mat,a->B); 2133 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2134 *newmat = mat; 2135 PetscFunctionReturn(0); 2136 } 2137 2138 #include "petscsys.h" 2139 2140 #undef __FUNCT__ 2141 #define __FUNCT__ "MatLoad_MPISBAIJ" 2142 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2143 { 2144 Mat A; 2145 int i,nz,ierr,j,rstart,rend,fd; 2146 PetscScalar *vals,*buf; 2147 MPI_Comm comm = ((PetscObject)viewer)->comm; 2148 MPI_Status status; 2149 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2150 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2151 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2152 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2153 int dcount,kmax,k,nzcount,tmp; 2154 2155 PetscFunctionBegin; 2156 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2157 2158 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2159 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2160 if (!rank) { 2161 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2162 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2163 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2164 if (header[3] < 0) { 2165 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2166 } 2167 } 2168 2169 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2170 M = header[1]; N = header[2]; 2171 2172 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2173 2174 /* 2175 This code adds extra rows to make sure the number of rows is 2176 divisible by the blocksize 2177 */ 2178 Mbs = M/bs; 2179 extra_rows = bs - M + bs*(Mbs); 2180 if (extra_rows == bs) extra_rows = 0; 2181 else Mbs++; 2182 if (extra_rows &&!rank) { 2183 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2184 } 2185 2186 /* determine ownership of all rows */ 2187 mbs = Mbs/size + ((Mbs % size) > rank); 2188 m = mbs*bs; 2189 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2190 browners = rowners + size + 1; 2191 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2192 rowners[0] = 0; 2193 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2194 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2195 rstart = rowners[rank]; 2196 rend = rowners[rank+1]; 2197 2198 /* distribute row lengths to all processors */ 2199 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2200 if (!rank) { 2201 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2202 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2203 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2204 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2205 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2206 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2207 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2208 } else { 2209 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2210 } 2211 2212 if (!rank) { /* procs[0] */ 2213 /* calculate the number of nonzeros on each processor */ 2214 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2215 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2216 for (i=0; i<size; i++) { 2217 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2218 procsnz[i] += rowlengths[j]; 2219 } 2220 } 2221 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2222 2223 /* determine max buffer needed and allocate it */ 2224 maxnz = 0; 2225 for (i=0; i<size; i++) { 2226 maxnz = PetscMax(maxnz,procsnz[i]); 2227 } 2228 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2229 2230 /* read in my part of the matrix column indices */ 2231 nz = procsnz[0]; 2232 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2233 mycols = ibuf; 2234 if (size == 1) nz -= extra_rows; 2235 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2236 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2237 2238 /* read in every ones (except the last) and ship off */ 2239 for (i=1; i<size-1; i++) { 2240 nz = procsnz[i]; 2241 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2242 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2243 } 2244 /* read in the stuff for the last proc */ 2245 if (size != 1) { 2246 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2247 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2248 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2249 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2250 } 2251 ierr = PetscFree(cols);CHKERRQ(ierr); 2252 } else { /* procs[i], i>0 */ 2253 /* determine buffer space needed for message */ 2254 nz = 0; 2255 for (i=0; i<m; i++) { 2256 nz += locrowlens[i]; 2257 } 2258 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2259 mycols = ibuf; 2260 /* receive message of column indices*/ 2261 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2262 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2263 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2264 } 2265 2266 /* loop over local rows, determining number of off diagonal entries */ 2267 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2268 odlens = dlens + (rend-rstart); 2269 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2270 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2271 masked1 = mask + Mbs; 2272 masked2 = masked1 + Mbs; 2273 rowcount = 0; nzcount = 0; 2274 for (i=0; i<mbs; i++) { 2275 dcount = 0; 2276 odcount = 0; 2277 for (j=0; j<bs; j++) { 2278 kmax = locrowlens[rowcount]; 2279 for (k=0; k<kmax; k++) { 2280 tmp = mycols[nzcount++]/bs; /* block col. index */ 2281 if (!mask[tmp]) { 2282 mask[tmp] = 1; 2283 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2284 else masked1[dcount++] = tmp; /* entry in diag portion */ 2285 } 2286 } 2287 rowcount++; 2288 } 2289 2290 dlens[i] = dcount; /* d_nzz[i] */ 2291 odlens[i] = odcount; /* o_nzz[i] */ 2292 2293 /* zero out the mask elements we set */ 2294 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2295 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2296 } 2297 2298 /* create our matrix */ 2299 ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr); 2300 ierr = MatSetType(A,type);CHKERRQ(ierr); 2301 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2302 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2303 2304 if (!rank) { 2305 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2306 /* read in my part of the matrix numerical values */ 2307 nz = procsnz[0]; 2308 vals = buf; 2309 mycols = ibuf; 2310 if (size == 1) nz -= extra_rows; 2311 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2312 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2313 2314 /* insert into matrix */ 2315 jj = rstart*bs; 2316 for (i=0; i<m; i++) { 2317 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2318 mycols += locrowlens[i]; 2319 vals += locrowlens[i]; 2320 jj++; 2321 } 2322 2323 /* read in other processors (except the last one) and ship out */ 2324 for (i=1; i<size-1; i++) { 2325 nz = procsnz[i]; 2326 vals = buf; 2327 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2328 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2329 } 2330 /* the last proc */ 2331 if (size != 1){ 2332 nz = procsnz[i] - extra_rows; 2333 vals = buf; 2334 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2335 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2336 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2337 } 2338 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2339 2340 } else { 2341 /* receive numeric values */ 2342 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2343 2344 /* receive message of values*/ 2345 vals = buf; 2346 mycols = ibuf; 2347 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2348 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2349 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2350 2351 /* insert into matrix */ 2352 jj = rstart*bs; 2353 for (i=0; i<m; i++) { 2354 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2355 mycols += locrowlens[i]; 2356 vals += locrowlens[i]; 2357 jj++; 2358 } 2359 } 2360 2361 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2362 ierr = PetscFree(buf);CHKERRQ(ierr); 2363 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2364 ierr = PetscFree(rowners);CHKERRQ(ierr); 2365 ierr = PetscFree(dlens);CHKERRQ(ierr); 2366 ierr = PetscFree(mask);CHKERRQ(ierr); 2367 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2368 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2369 *newmat = A; 2370 PetscFunctionReturn(0); 2371 } 2372 2373 #undef __FUNCT__ 2374 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2375 /*@ 2376 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2377 2378 Input Parameters: 2379 . mat - the matrix 2380 . fact - factor 2381 2382 Collective on Mat 2383 2384 Level: advanced 2385 2386 Notes: 2387 This can also be set by the command line option: -mat_use_hash_table fact 2388 2389 .keywords: matrix, hashtable, factor, HT 2390 2391 .seealso: MatSetOption() 2392 @*/ 2393 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2394 { 2395 PetscFunctionBegin; 2396 SETERRQ(1,"Function not yet written for SBAIJ format"); 2397 /* PetscFunctionReturn(0); */ 2398 } 2399 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2402 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2403 { 2404 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2405 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2406 PetscReal atmp; 2407 PetscReal *work,*svalues,*rvalues; 2408 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2409 int rank,size,*rowners_bs,dest,count,source; 2410 PetscScalar *va; 2411 MatScalar *ba; 2412 MPI_Status stat; 2413 2414 PetscFunctionBegin; 2415 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2416 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2417 2418 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2419 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2420 2421 bs = a->bs; 2422 mbs = a->mbs; 2423 Mbs = a->Mbs; 2424 ba = b->a; 2425 bi = b->i; 2426 bj = b->j; 2427 /* 2428 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2429 PetscSynchronizedFlush(A->comm); 2430 */ 2431 2432 /* find ownerships */ 2433 rowners_bs = a->rowners_bs; 2434 /* 2435 if (!rank){ 2436 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2437 } 2438 */ 2439 2440 /* each proc creates an array to be distributed */ 2441 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2442 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2443 2444 /* row_max for B */ 2445 if (rank != size-1){ 2446 for (i=0; i<mbs; i++) { 2447 ncols = bi[1] - bi[0]; bi++; 2448 brow = bs*i; 2449 for (j=0; j<ncols; j++){ 2450 bcol = bs*(*bj); 2451 for (kcol=0; kcol<bs; kcol++){ 2452 col = bcol + kcol; /* local col index */ 2453 col += rowners_bs[rank+1]; /* global col index */ 2454 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2455 for (krow=0; krow<bs; krow++){ 2456 atmp = PetscAbsScalar(*ba); ba++; 2457 row = brow + krow; /* local row index */ 2458 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2459 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2460 if (work[col] < atmp) work[col] = atmp; 2461 } 2462 } 2463 bj++; 2464 } 2465 } 2466 /* 2467 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2468 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2469 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2470 */ 2471 2472 /* send values to its owners */ 2473 for (dest=rank+1; dest<size; dest++){ 2474 svalues = work + rowners_bs[dest]; 2475 count = rowners_bs[dest+1]-rowners_bs[dest]; 2476 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2477 /* 2478 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2479 PetscSynchronizedFlush(A->comm); 2480 */ 2481 } 2482 } 2483 2484 /* receive values */ 2485 if (rank){ 2486 rvalues = work; 2487 count = rowners_bs[rank+1]-rowners_bs[rank]; 2488 for (source=0; source<rank; source++){ 2489 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2490 /* process values */ 2491 for (i=0; i<count; i++){ 2492 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2493 } 2494 /* 2495 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2496 PetscSynchronizedFlush(A->comm); 2497 */ 2498 } 2499 } 2500 2501 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2502 ierr = PetscFree(work);CHKERRQ(ierr); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 #undef __FUNCT__ 2507 #define __FUNCT__ "MatRelax_MPISBAIJ" 2508 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2509 { 2510 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2511 int ierr,mbs=mat->mbs,bs=mat->bs; 2512 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2513 Vec bb1; 2514 2515 PetscFunctionBegin; 2516 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2517 if (bs > 1) 2518 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2519 2520 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2521 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2522 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2523 its--; 2524 } 2525 2526 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2527 while (its--){ 2528 2529 /* lower triangular part: slvec0b = - B^T*xx */ 2530 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2531 2532 /* copy xx into slvec0a */ 2533 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2534 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2535 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2536 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2537 2538 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2539 2540 /* copy bb into slvec1a */ 2541 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2542 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2543 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2544 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2545 2546 /* set slvec1b = 0 */ 2547 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2548 2549 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2550 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2551 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2552 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2553 2554 /* upper triangular part: bb1 = bb1 - B*x */ 2555 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2556 2557 /* local diagonal sweep */ 2558 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2559 } 2560 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2561 } else { 2562 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2563 } 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2569 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2570 { 2571 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2572 int ierr; 2573 PetscScalar mone=-1.0; 2574 Vec lvec1,bb1; 2575 2576 PetscFunctionBegin; 2577 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2578 if (mat->bs > 1) 2579 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2580 2581 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2582 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2583 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2584 its--; 2585 } 2586 2587 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2588 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2589 while (its--){ 2590 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2591 2592 /* lower diagonal part: bb1 = bb - B^T*xx */ 2593 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2594 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2595 2596 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2597 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2598 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2599 2600 /* upper diagonal part: bb1 = bb1 - B*x */ 2601 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2602 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2603 2604 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2605 2606 /* diagonal sweep */ 2607 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2608 } 2609 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2610 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2611 } else { 2612 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2613 } 2614 PetscFunctionReturn(0); 2615 } 2616 2617