1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 12 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 14 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 15 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 16 extern int MatPrintHelp_SeqSBAIJ(Mat); 17 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 18 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 19 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 20 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 21 22 /* UGLY, ugly, ugly 23 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 24 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 25 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 26 converts the entries into single precision and then calls ..._MatScalar() to put them 27 into the single precision data structures. 28 */ 29 #if defined(PETSC_USE_MAT_SINGLE) 30 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 31 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 32 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 33 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 35 #else 36 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 37 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 38 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 39 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 40 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 41 #endif 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 46 int MatStoreValues_MPISBAIJ(Mat mat) 47 { 48 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 49 int ierr; 50 51 PetscFunctionBegin; 52 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 53 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 EXTERN_C_END 57 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 61 int MatRetrieveValues_MPISBAIJ(Mat mat) 62 { 63 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 64 int ierr; 65 66 PetscFunctionBegin; 67 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 68 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 69 PetscFunctionReturn(0); 70 } 71 EXTERN_C_END 72 73 /* 74 Local utility routine that creates a mapping from the global column 75 number to the local number in the off-diagonal part of the local 76 storage of the matrix. This is done in a non scable way since the 77 length of colmap equals the global matrix length. 78 */ 79 #undef __FUNCT__ 80 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private" 81 static int CreateColmap_MPISBAIJ_Private(Mat mat) 82 { 83 PetscFunctionBegin; 84 SETERRQ(1,"Function not yet written for SBAIJ format"); 85 /* PetscFunctionReturn(0); */ 86 } 87 88 #define CHUNKSIZE 10 89 90 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 91 { \ 92 \ 93 brow = row/bs; \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow]; nrow = ailen[brow]; \ 96 bcol = col/bs; \ 97 ridx = row % bs; cidx = col % bs; \ 98 low = 0; high = nrow; \ 99 while (high-low > 3) { \ 100 t = (low+high)/2; \ 101 if (rp[t] > bcol) high = t; \ 102 else low = t; \ 103 } \ 104 for (_i=low; _i<high; _i++) { \ 105 if (rp[_i] > bcol) break; \ 106 if (rp[_i] == bcol) { \ 107 bap = ap + bs2*_i + bs*cidx + ridx; \ 108 if (addv == ADD_VALUES) *bap += value; \ 109 else *bap = value; \ 110 goto a_noinsert; \ 111 } \ 112 } \ 113 if (a->nonew == 1) goto a_noinsert; \ 114 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 115 if (nrow >= rmax) { \ 116 /* there is no extra room in row, therefore enlarge */ \ 117 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 118 MatScalar *new_a; \ 119 \ 120 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 121 \ 122 /* malloc new storage space */ \ 123 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 124 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 125 new_j = (int*)(new_a + bs2*new_nz); \ 126 new_i = new_j + new_nz; \ 127 \ 128 /* copy over old data into new slots */ \ 129 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 130 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 131 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 132 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 133 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 134 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 135 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 136 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 137 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 138 /* free up old matrix storage */ \ 139 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 140 if (!a->singlemalloc) { \ 141 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 142 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 143 } \ 144 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 145 a->singlemalloc = PETSC_TRUE; \ 146 \ 147 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 148 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 149 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 150 a->s_maxnz += bs2*CHUNKSIZE; \ 151 a->reallocs++; \ 152 a->s_nz++; \ 153 } \ 154 N = nrow++ - 1; \ 155 /* shift up all the later entries in this row */ \ 156 for (ii=N; ii>=_i; ii--) { \ 157 rp[ii+1] = rp[ii]; \ 158 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 159 } \ 160 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 161 rp[_i] = bcol; \ 162 ap[bs2*_i + bs*cidx + ridx] = value; \ 163 a_noinsert:; \ 164 ailen[brow] = nrow; \ 165 } 166 #ifndef MatSetValues_SeqBAIJ_B_Private 167 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 168 { \ 169 brow = row/bs; \ 170 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 171 rmax = bimax[brow]; nrow = bilen[brow]; \ 172 bcol = col/bs; \ 173 ridx = row % bs; cidx = col % bs; \ 174 low = 0; high = nrow; \ 175 while (high-low > 3) { \ 176 t = (low+high)/2; \ 177 if (rp[t] > bcol) high = t; \ 178 else low = t; \ 179 } \ 180 for (_i=low; _i<high; _i++) { \ 181 if (rp[_i] > bcol) break; \ 182 if (rp[_i] == bcol) { \ 183 bap = ap + bs2*_i + bs*cidx + ridx; \ 184 if (addv == ADD_VALUES) *bap += value; \ 185 else *bap = value; \ 186 goto b_noinsert; \ 187 } \ 188 } \ 189 if (b->nonew == 1) goto b_noinsert; \ 190 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 191 if (nrow >= rmax) { \ 192 /* there is no extra room in row, therefore enlarge */ \ 193 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 194 MatScalar *new_a; \ 195 \ 196 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 197 \ 198 /* malloc new storage space */ \ 199 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 200 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 201 new_j = (int*)(new_a + bs2*new_nz); \ 202 new_i = new_j + new_nz; \ 203 \ 204 /* copy over old data into new slots */ \ 205 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 206 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 207 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 208 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 209 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 210 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 211 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 212 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 213 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 214 /* free up old matrix storage */ \ 215 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 216 if (!b->singlemalloc) { \ 217 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 218 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 219 } \ 220 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 221 b->singlemalloc = PETSC_TRUE; \ 222 \ 223 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 224 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 225 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 226 b->maxnz += bs2*CHUNKSIZE; \ 227 b->reallocs++; \ 228 b->nz++; \ 229 } \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 for (ii=N; ii>=_i; ii--) { \ 233 rp[ii+1] = rp[ii]; \ 234 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 235 } \ 236 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 237 rp[_i] = bcol; \ 238 ap[bs2*_i + bs*cidx + ridx] = value; \ 239 b_noinsert:; \ 240 bilen[brow] = nrow; \ 241 } 242 #endif 243 244 #if defined(PETSC_USE_MAT_SINGLE) 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatSetValues_MPISBAIJ" 247 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 248 { 249 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 250 int ierr,i,N = m*n; 251 MatScalar *vsingle; 252 253 PetscFunctionBegin; 254 if (N > b->setvalueslen) { 255 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 256 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 257 b->setvalueslen = N; 258 } 259 vsingle = b->setvaluescopy; 260 261 for (i=0; i<N; i++) { 262 vsingle[i] = v[i]; 263 } 264 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 270 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 271 { 272 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 273 int ierr,i,N = m*n*b->bs2; 274 MatScalar *vsingle; 275 276 PetscFunctionBegin; 277 if (N > b->setvalueslen) { 278 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 279 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 280 b->setvalueslen = N; 281 } 282 vsingle = b->setvaluescopy; 283 for (i=0; i<N; i++) { 284 vsingle[i] = v[i]; 285 } 286 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 292 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 293 { 294 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 295 int ierr,i,N = m*n; 296 MatScalar *vsingle; 297 298 PetscFunctionBegin; 299 SETERRQ(1,"Function not yet written for SBAIJ format"); 300 /* PetscFunctionReturn(0); */ 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 305 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 308 int ierr,i,N = m*n*b->bs2; 309 MatScalar *vsingle; 310 311 PetscFunctionBegin; 312 SETERRQ(1,"Function not yet written for SBAIJ format"); 313 /* PetscFunctionReturn(0); */ 314 } 315 #endif 316 317 /* Only add/insert a(i,j) with i<=j (blocks). 318 Any a(i,j) with i>j input by user is ingored. 319 */ 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 322 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 323 { 324 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 325 MatScalar value; 326 PetscTruth roworiented = baij->roworiented; 327 int ierr,i,j,row,col; 328 int rstart_orig=baij->rstart_bs; 329 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 330 int cend_orig=baij->cend_bs,bs=baij->bs; 331 332 /* Some Variables required in the macro */ 333 Mat A = baij->A; 334 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 335 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 336 MatScalar *aa=a->a; 337 338 Mat B = baij->B; 339 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 340 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 341 MatScalar *ba=b->a; 342 343 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 344 int low,high,t,ridx,cidx,bs2=a->bs2; 345 MatScalar *ap,*bap; 346 347 /* for stash */ 348 int n_loc, *in_loc=0; 349 MatScalar *v_loc=0; 350 351 PetscFunctionBegin; 352 353 if(!baij->donotstash){ 354 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 355 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 356 } 357 358 for (i=0; i<m; i++) { 359 if (im[i] < 0) continue; 360 #if defined(PETSC_USE_BOPT_g) 361 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 362 #endif 363 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 364 row = im[i] - rstart_orig; /* local row index */ 365 for (j=0; j<n; j++) { 366 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 367 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 368 col = in[j] - cstart_orig; /* local col index */ 369 brow = row/bs; bcol = col/bs; 370 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 371 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 372 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 373 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 374 } else if (in[j] < 0) continue; 375 #if defined(PETSC_USE_BOPT_g) 376 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 377 #endif 378 else { /* off-diag entry (B) */ 379 if (mat->was_assembled) { 380 if (!baij->colmap) { 381 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 382 } 383 #if defined (PETSC_USE_CTABLE) 384 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 385 col = col - 1; 386 #else 387 col = baij->colmap[in[j]/bs] - 1; 388 #endif 389 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 390 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 391 col = in[j]; 392 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 393 B = baij->B; 394 b = (Mat_SeqBAIJ*)(B)->data; 395 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 396 ba=b->a; 397 } else col += in[j]%bs; 398 } else col = in[j]; 399 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 400 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 401 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 402 } 403 } 404 } else { /* off processor entry */ 405 if (!baij->donotstash) { 406 n_loc = 0; 407 for (j=0; j<n; j++){ 408 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 409 in_loc[n_loc] = in[j]; 410 if (roworiented) { 411 v_loc[n_loc] = v[i*n+j]; 412 } else { 413 v_loc[n_loc] = v[j*m+i]; 414 } 415 n_loc++; 416 } 417 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 418 } 419 } 420 } 421 422 if(!baij->donotstash){ 423 ierr = PetscFree(in_loc);CHKERRQ(ierr); 424 ierr = PetscFree(v_loc);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 431 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 432 { 433 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 434 const MatScalar *value; 435 MatScalar *barray=baij->barray; 436 PetscTruth roworiented = baij->roworiented; 437 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 438 int rend=baij->rend,cstart=baij->cstart,stepval; 439 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 440 441 PetscFunctionBegin; 442 if(!barray) { 443 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 444 baij->barray = barray; 445 } 446 447 if (roworiented) { 448 stepval = (n-1)*bs; 449 } else { 450 stepval = (m-1)*bs; 451 } 452 for (i=0; i<m; i++) { 453 if (im[i] < 0) continue; 454 #if defined(PETSC_USE_BOPT_g) 455 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 456 #endif 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 for (j=0; j<n; j++) { 460 /* If NumCol = 1 then a copy is not required */ 461 if ((roworiented) && (n == 1)) { 462 barray = (MatScalar*) v + i*bs2; 463 } else if((!roworiented) && (m == 1)) { 464 barray = (MatScalar*) v + j*bs2; 465 } else { /* Here a copy is required */ 466 if (roworiented) { 467 value = v + i*(stepval+bs)*bs + j*bs; 468 } else { 469 value = v + j*(stepval+bs)*bs + i*bs; 470 } 471 for (ii=0; ii<bs; ii++,value+=stepval) { 472 for (jj=0; jj<bs; jj++) { 473 *barray++ = *value++; 474 } 475 } 476 barray -=bs2; 477 } 478 479 if (in[j] >= cstart && in[j] < cend){ 480 col = in[j] - cstart; 481 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 482 } 483 else if (in[j] < 0) continue; 484 #if defined(PETSC_USE_BOPT_g) 485 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 486 #endif 487 else { 488 if (mat->was_assembled) { 489 if (!baij->colmap) { 490 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 491 } 492 493 #if defined(PETSC_USE_BOPT_g) 494 #if defined (PETSC_USE_CTABLE) 495 { int data; 496 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 497 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 498 } 499 #else 500 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 501 #endif 502 #endif 503 #if defined (PETSC_USE_CTABLE) 504 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 505 col = (col - 1)/bs; 506 #else 507 col = (baij->colmap[in[j]] - 1)/bs; 508 #endif 509 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 510 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 511 col = in[j]; 512 } 513 } 514 else col = in[j]; 515 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 516 } 517 } 518 } else { 519 if (!baij->donotstash) { 520 if (roworiented) { 521 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 522 } else { 523 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 524 } 525 } 526 } 527 } 528 PetscFunctionReturn(0); 529 } 530 531 #define HASH_KEY 0.6180339887 532 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 533 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 534 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 535 #undef __FUNCT__ 536 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 537 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 538 { 539 PetscFunctionBegin; 540 SETERRQ(1,"Function not yet written for SBAIJ format"); 541 /* PetscFunctionReturn(0); */ 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 546 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 547 { 548 PetscFunctionBegin; 549 SETERRQ(1,"Function not yet written for SBAIJ format"); 550 /* PetscFunctionReturn(0); */ 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "MatGetValues_MPISBAIJ" 555 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 556 { 557 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 558 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 559 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 560 561 PetscFunctionBegin; 562 for (i=0; i<m; i++) { 563 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 564 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 565 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 566 row = idxm[i] - bsrstart; 567 for (j=0; j<n; j++) { 568 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]); 569 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 570 if (idxn[j] >= bscstart && idxn[j] < bscend){ 571 col = idxn[j] - bscstart; 572 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 573 } else { 574 if (!baij->colmap) { 575 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 576 } 577 #if defined (PETSC_USE_CTABLE) 578 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 579 data --; 580 #else 581 data = baij->colmap[idxn[j]/bs]-1; 582 #endif 583 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 584 else { 585 col = data + idxn[j]%bs; 586 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 587 } 588 } 589 } 590 } else { 591 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 592 } 593 } 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatNorm_MPISBAIJ" 599 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 600 { 601 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 602 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 603 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 604 int ierr; 605 PetscReal sum[2],*lnorm2; 606 607 PetscFunctionBegin; 608 if (baij->size == 1) { 609 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 610 } else { 611 if (type == NORM_FROBENIUS) { 612 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 613 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 614 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 615 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 616 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 617 /* 618 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 619 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 620 */ 621 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 622 /* 623 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 624 PetscSynchronizedFlush(mat->comm); */ 625 626 *norm = sqrt(sum[0] + 2*sum[1]); 627 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 628 } else { 629 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 Creates the hash table, and sets the table 637 This table is created only once. 638 If new entried need to be added to the matrix 639 then the hash table has to be destroyed and 640 recreated. 641 */ 642 #undef __FUNCT__ 643 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 644 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 645 { 646 PetscFunctionBegin; 647 SETERRQ(1,"Function not yet written for SBAIJ format"); 648 /* PetscFunctionReturn(0); */ 649 } 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 653 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 654 { 655 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 656 int ierr,nstash,reallocs; 657 InsertMode addv; 658 659 PetscFunctionBegin; 660 if (baij->donotstash) { 661 PetscFunctionReturn(0); 662 } 663 664 /* make sure all processors are either in INSERTMODE or ADDMODE */ 665 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 666 if (addv == (ADD_VALUES|INSERT_VALUES)) { 667 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 668 } 669 mat->insertmode = addv; /* in case this processor had no cache */ 670 671 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 672 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 673 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 674 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 675 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 676 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 682 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 683 { 684 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 685 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 686 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 687 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 688 int *row,*col,other_disassembled; 689 PetscTruth r1,r2,r3; 690 MatScalar *val; 691 InsertMode addv = mat->insertmode; 692 693 PetscFunctionBegin; 694 695 if (!baij->donotstash) { 696 while (1) { 697 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 698 /* 699 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 700 PetscSynchronizedFlush(mat->comm); 701 */ 702 if (!flg) break; 703 704 for (i=0; i<n;) { 705 /* Now identify the consecutive vals belonging to the same row */ 706 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 707 if (j < n) ncols = j-i; 708 else ncols = n-i; 709 /* Now assemble all these values with a single function call */ 710 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 711 i = j; 712 } 713 } 714 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 715 /* Now process the block-stash. Since the values are stashed column-oriented, 716 set the roworiented flag to column oriented, and after MatSetValues() 717 restore the original flags */ 718 r1 = baij->roworiented; 719 r2 = a->roworiented; 720 r3 = b->roworiented; 721 baij->roworiented = PETSC_FALSE; 722 a->roworiented = PETSC_FALSE; 723 b->roworiented = PETSC_FALSE; 724 while (1) { 725 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 726 if (!flg) break; 727 728 for (i=0; i<n;) { 729 /* Now identify the consecutive vals belonging to the same row */ 730 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 731 if (j < n) ncols = j-i; 732 else ncols = n-i; 733 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 734 i = j; 735 } 736 } 737 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 738 baij->roworiented = r1; 739 a->roworiented = r2; 740 b->roworiented = r3; 741 } 742 743 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 744 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 745 746 /* determine if any processor has disassembled, if so we must 747 also disassemble ourselfs, in order that we may reassemble. */ 748 /* 749 if nonzero structure of submatrix B cannot change then we know that 750 no processor disassembled thus we can skip this stuff 751 */ 752 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 753 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 754 if (mat->was_assembled && !other_disassembled) { 755 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 756 } 757 } 758 759 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 760 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 761 } 762 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 763 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 764 765 #if defined(PETSC_USE_BOPT_g) 766 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 767 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 768 baij->ht_total_ct = 0; 769 baij->ht_insert_ct = 0; 770 } 771 #endif 772 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 773 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 774 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 775 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 776 } 777 778 if (baij->rowvalues) { 779 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 780 baij->rowvalues = 0; 781 } 782 783 PetscFunctionReturn(0); 784 } 785 786 #undef __FUNCT__ 787 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 788 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 789 { 790 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 791 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 792 PetscTruth isascii,isdraw; 793 PetscViewer sviewer; 794 PetscViewerFormat format; 795 796 PetscFunctionBegin; 797 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 798 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 799 if (isascii) { 800 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 801 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 802 MatInfo info; 803 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 804 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 805 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 806 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 807 baij->bs,(int)info.memory);CHKERRQ(ierr); 808 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 809 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 810 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 811 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 812 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 813 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } else if (format == PETSC_VIEWER_ASCII_INFO) { 816 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 817 PetscFunctionReturn(0); 818 } 819 } 820 821 if (isdraw) { 822 PetscDraw draw; 823 PetscTruth isnull; 824 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 825 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 826 } 827 828 if (size == 1) { 829 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 830 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 831 } else { 832 /* assemble the entire matrix onto first processor. */ 833 Mat A; 834 Mat_SeqSBAIJ *Aloc; 835 Mat_SeqBAIJ *Bloc; 836 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 837 MatScalar *a; 838 839 if (!rank) { 840 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 841 } else { 842 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 843 } 844 PetscLogObjectParent(mat,A); 845 846 /* copy over the A part */ 847 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 848 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 849 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 850 851 for (i=0; i<mbs; i++) { 852 rvals[0] = bs*(baij->rstart + i); 853 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 854 for (j=ai[i]; j<ai[i+1]; j++) { 855 col = (baij->cstart+aj[j])*bs; 856 for (k=0; k<bs; k++) { 857 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 858 col++; a += bs; 859 } 860 } 861 } 862 /* copy over the B part */ 863 Bloc = (Mat_SeqBAIJ*)baij->B->data; 864 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 865 for (i=0; i<mbs; i++) { 866 rvals[0] = bs*(baij->rstart + i); 867 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 868 for (j=ai[i]; j<ai[i+1]; j++) { 869 col = baij->garray[aj[j]]*bs; 870 for (k=0; k<bs; k++) { 871 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 872 col++; a += bs; 873 } 874 } 875 } 876 ierr = PetscFree(rvals);CHKERRQ(ierr); 877 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 878 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 879 /* 880 Everyone has to call to draw the matrix since the graphics waits are 881 synchronized across all processors that share the PetscDraw object 882 */ 883 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 884 if (!rank) { 885 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 886 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 887 } 888 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 889 ierr = MatDestroy(A);CHKERRQ(ierr); 890 } 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatView_MPISBAIJ" 896 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 897 { 898 int ierr; 899 PetscTruth isascii,isdraw,issocket,isbinary; 900 901 PetscFunctionBegin; 902 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 903 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 904 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 905 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 906 if (isascii || isdraw || issocket || isbinary) { 907 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 908 } else { 909 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 910 } 911 PetscFunctionReturn(0); 912 } 913 914 #undef __FUNCT__ 915 #define __FUNCT__ "MatDestroy_MPISBAIJ" 916 int MatDestroy_MPISBAIJ(Mat mat) 917 { 918 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 919 int ierr; 920 921 PetscFunctionBegin; 922 #if defined(PETSC_USE_LOG) 923 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 924 #endif 925 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 926 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 927 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 928 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 929 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 930 #if defined (PETSC_USE_CTABLE) 931 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 932 #else 933 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 934 #endif 935 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 936 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 937 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 938 if (baij->slvec0) { 939 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 940 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 941 } 942 if (baij->slvec1) { 943 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 944 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 945 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 946 } 947 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 948 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 949 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 950 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 951 #if defined(PETSC_USE_MAT_SINGLE) 952 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 953 #endif 954 ierr = PetscFree(baij);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatMult_MPISBAIJ" 960 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 961 { 962 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 963 int ierr,nt,mbs=a->mbs,bs=a->bs; 964 PetscScalar *x,*from,zero=0.0; 965 966 PetscFunctionBegin; 967 /* 968 PetscSynchronizedPrintf(A->comm," _1comm is called ...\n"); 969 PetscSynchronizedFlush(A->comm); 970 */ 971 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 972 if (nt != A->n) { 973 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 974 } 975 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 976 if (nt != A->m) { 977 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 978 } 979 980 /* diagonal part */ 981 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 982 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 983 984 /* subdiagonal part */ 985 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 986 987 /* copy x into the vec slvec0 */ 988 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 989 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 990 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 991 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 992 993 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 994 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 995 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 996 997 /* supperdiagonal part */ 998 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 999 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1005 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1006 { 1007 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1008 int ierr,nt; 1009 1010 PetscFunctionBegin; 1011 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1012 if (nt != A->n) { 1013 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1014 } 1015 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1016 if (nt != A->m) { 1017 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1018 } 1019 1020 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1021 /* do diagonal part */ 1022 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1023 /* do supperdiagonal part */ 1024 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1025 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1026 /* do subdiagonal part */ 1027 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1030 1031 PetscFunctionReturn(0); 1032 } 1033 1034 #undef __FUNCT__ 1035 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1036 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1037 { 1038 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1039 int ierr,mbs=a->mbs,bs=a->bs; 1040 PetscScalar *x,*from,zero=0.0; 1041 1042 PetscFunctionBegin; 1043 /* 1044 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1045 PetscSynchronizedFlush(A->comm); 1046 */ 1047 /* diagonal part */ 1048 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1049 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1050 1051 /* subdiagonal part */ 1052 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1053 1054 /* copy x into the vec slvec0 */ 1055 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1056 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1057 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1058 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1059 1060 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1061 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1062 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1063 1064 /* supperdiagonal part */ 1065 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1066 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1072 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1073 { 1074 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1075 int ierr; 1076 1077 PetscFunctionBegin; 1078 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1079 /* do diagonal part */ 1080 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1081 /* do supperdiagonal part */ 1082 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1083 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1084 1085 /* do subdiagonal part */ 1086 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1087 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1088 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1089 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1095 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1096 { 1097 PetscFunctionBegin; 1098 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1099 /* PetscFunctionReturn(0); */ 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1104 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1105 { 1106 PetscFunctionBegin; 1107 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1108 /* PetscFunctionReturn(0); */ 1109 } 1110 1111 /* 1112 This only works correctly for square matrices where the subblock A->A is the 1113 diagonal block 1114 */ 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1117 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1118 { 1119 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1120 int ierr; 1121 1122 PetscFunctionBegin; 1123 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1124 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatScale_MPISBAIJ" 1130 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1131 { 1132 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1133 int ierr; 1134 1135 PetscFunctionBegin; 1136 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1137 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1143 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1144 { 1145 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1146 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1147 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1148 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1149 int *cmap,*idx_p,cstart = mat->cstart; 1150 1151 PetscFunctionBegin; 1152 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1153 mat->getrowactive = PETSC_TRUE; 1154 1155 if (!mat->rowvalues && (idx || v)) { 1156 /* 1157 allocate enough space to hold information from the longest row. 1158 */ 1159 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1160 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1161 int max = 1,mbs = mat->mbs,tmp; 1162 for (i=0; i<mbs; i++) { 1163 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1164 if (max < tmp) { max = tmp; } 1165 } 1166 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1167 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1168 } 1169 1170 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1171 lrow = row - brstart; /* local row index */ 1172 1173 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1174 if (!v) {pvA = 0; pvB = 0;} 1175 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1176 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1177 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1178 nztot = nzA + nzB; 1179 1180 cmap = mat->garray; 1181 if (v || idx) { 1182 if (nztot) { 1183 /* Sort by increasing column numbers, assuming A and B already sorted */ 1184 int imark = -1; 1185 if (v) { 1186 *v = v_p = mat->rowvalues; 1187 for (i=0; i<nzB; i++) { 1188 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1189 else break; 1190 } 1191 imark = i; 1192 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1193 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1194 } 1195 if (idx) { 1196 *idx = idx_p = mat->rowindices; 1197 if (imark > -1) { 1198 for (i=0; i<imark; i++) { 1199 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1200 } 1201 } else { 1202 for (i=0; i<nzB; i++) { 1203 if (cmap[cworkB[i]/bs] < cstart) 1204 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1205 else break; 1206 } 1207 imark = i; 1208 } 1209 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1210 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1211 } 1212 } else { 1213 if (idx) *idx = 0; 1214 if (v) *v = 0; 1215 } 1216 } 1217 *nz = nztot; 1218 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1219 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 #undef __FUNCT__ 1224 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1225 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1226 { 1227 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1228 1229 PetscFunctionBegin; 1230 if (baij->getrowactive == PETSC_FALSE) { 1231 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1232 } 1233 baij->getrowactive = PETSC_FALSE; 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1239 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1240 { 1241 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1242 1243 PetscFunctionBegin; 1244 *bs = baij->bs; 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1250 int MatZeroEntries_MPISBAIJ(Mat A) 1251 { 1252 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1253 int ierr; 1254 1255 PetscFunctionBegin; 1256 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1257 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1263 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1264 { 1265 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1266 Mat A = a->A,B = a->B; 1267 int ierr; 1268 PetscReal isend[5],irecv[5]; 1269 1270 PetscFunctionBegin; 1271 info->block_size = (PetscReal)a->bs; 1272 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1273 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1274 isend[3] = info->memory; isend[4] = info->mallocs; 1275 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1276 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1277 isend[3] += info->memory; isend[4] += info->mallocs; 1278 if (flag == MAT_LOCAL) { 1279 info->nz_used = isend[0]; 1280 info->nz_allocated = isend[1]; 1281 info->nz_unneeded = isend[2]; 1282 info->memory = isend[3]; 1283 info->mallocs = isend[4]; 1284 } else if (flag == MAT_GLOBAL_MAX) { 1285 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1286 info->nz_used = irecv[0]; 1287 info->nz_allocated = irecv[1]; 1288 info->nz_unneeded = irecv[2]; 1289 info->memory = irecv[3]; 1290 info->mallocs = irecv[4]; 1291 } else if (flag == MAT_GLOBAL_SUM) { 1292 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1293 info->nz_used = irecv[0]; 1294 info->nz_allocated = irecv[1]; 1295 info->nz_unneeded = irecv[2]; 1296 info->memory = irecv[3]; 1297 info->mallocs = irecv[4]; 1298 } else { 1299 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1300 } 1301 info->rows_global = (PetscReal)A->M; 1302 info->columns_global = (PetscReal)A->N; 1303 info->rows_local = (PetscReal)A->m; 1304 info->columns_local = (PetscReal)A->N; 1305 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1306 info->fill_ratio_needed = 0; 1307 info->factor_mallocs = 0; 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1313 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1314 { 1315 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1316 int ierr; 1317 1318 PetscFunctionBegin; 1319 switch (op) { 1320 case MAT_NO_NEW_NONZERO_LOCATIONS: 1321 case MAT_YES_NEW_NONZERO_LOCATIONS: 1322 case MAT_COLUMNS_UNSORTED: 1323 case MAT_COLUMNS_SORTED: 1324 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1325 case MAT_KEEP_ZEROED_ROWS: 1326 case MAT_NEW_NONZERO_LOCATION_ERR: 1327 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1328 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1329 break; 1330 case MAT_ROW_ORIENTED: 1331 a->roworiented = PETSC_TRUE; 1332 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1333 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1334 break; 1335 case MAT_ROWS_SORTED: 1336 case MAT_ROWS_UNSORTED: 1337 case MAT_YES_NEW_DIAGONALS: 1338 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1339 break; 1340 case MAT_COLUMN_ORIENTED: 1341 a->roworiented = PETSC_FALSE; 1342 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1343 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1344 break; 1345 case MAT_IGNORE_OFF_PROC_ENTRIES: 1346 a->donotstash = PETSC_TRUE; 1347 break; 1348 case MAT_NO_NEW_DIAGONALS: 1349 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1350 case MAT_USE_HASH_TABLE: 1351 a->ht_flag = PETSC_TRUE; 1352 break; 1353 default: 1354 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1355 } 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1361 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1362 { 1363 int ierr; 1364 PetscFunctionBegin; 1365 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1371 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1372 { 1373 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1374 Mat a = baij->A,b = baij->B; 1375 int ierr,s1,s2,s3; 1376 1377 PetscFunctionBegin; 1378 if (ll != rr) { 1379 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1380 } 1381 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1382 if (rr) { 1383 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1384 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1385 /* Overlap communication with computation. */ 1386 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1387 /*} if (ll) { */ 1388 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1389 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1390 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1391 /* } */ 1392 /* scale the diagonal block */ 1393 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1394 1395 /* if (rr) { */ 1396 /* Do a scatter end and then right scale the off-diagonal block */ 1397 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1398 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1399 } 1400 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1406 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag) 1407 { 1408 PetscFunctionBegin; 1409 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1414 int MatPrintHelp_MPISBAIJ(Mat A) 1415 { 1416 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1417 MPI_Comm comm = A->comm; 1418 static int called = 0; 1419 int ierr; 1420 1421 PetscFunctionBegin; 1422 if (!a->rank) { 1423 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1424 } 1425 if (called) {PetscFunctionReturn(0);} else called = 1; 1426 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1427 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1433 int MatSetUnfactored_MPISBAIJ(Mat A) 1434 { 1435 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1436 int ierr; 1437 1438 PetscFunctionBegin; 1439 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1440 PetscFunctionReturn(0); 1441 } 1442 1443 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatEqual_MPISBAIJ" 1447 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1448 { 1449 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1450 Mat a,b,c,d; 1451 PetscTruth flg; 1452 int ierr; 1453 1454 PetscFunctionBegin; 1455 a = matA->A; b = matA->B; 1456 c = matB->A; d = matB->B; 1457 1458 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1459 if (flg == PETSC_TRUE) { 1460 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1461 } 1462 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1468 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1469 { 1470 int ierr; 1471 1472 PetscFunctionBegin; 1473 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 /* -------------------------------------------------------------------*/ 1477 static struct _MatOps MatOps_Values = { 1478 MatSetValues_MPISBAIJ, 1479 MatGetRow_MPISBAIJ, 1480 MatRestoreRow_MPISBAIJ, 1481 MatMult_MPISBAIJ, 1482 /* 4*/ MatMultAdd_MPISBAIJ, 1483 MatMultTranspose_MPISBAIJ, 1484 MatMultTransposeAdd_MPISBAIJ, 1485 0, 1486 0, 1487 0, 1488 /*10*/ 0, 1489 0, 1490 0, 1491 MatRelax_MPISBAIJ, 1492 MatTranspose_MPISBAIJ, 1493 /*15*/ MatGetInfo_MPISBAIJ, 1494 MatEqual_MPISBAIJ, 1495 MatGetDiagonal_MPISBAIJ, 1496 MatDiagonalScale_MPISBAIJ, 1497 MatNorm_MPISBAIJ, 1498 /*20*/ MatAssemblyBegin_MPISBAIJ, 1499 MatAssemblyEnd_MPISBAIJ, 1500 0, 1501 MatSetOption_MPISBAIJ, 1502 MatZeroEntries_MPISBAIJ, 1503 /*25*/ MatZeroRows_MPISBAIJ, 1504 0, 1505 0, 1506 0, 1507 0, 1508 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1509 0, 1510 0, 1511 0, 1512 0, 1513 /*35*/ MatDuplicate_MPISBAIJ, 1514 0, 1515 0, 1516 0, 1517 0, 1518 /*40*/ 0, 1519 0, 1520 0, 1521 MatGetValues_MPISBAIJ, 1522 0, 1523 /*45*/ MatPrintHelp_MPISBAIJ, 1524 MatScale_MPISBAIJ, 1525 0, 1526 0, 1527 0, 1528 /*50*/ MatGetBlockSize_MPISBAIJ, 1529 0, 1530 0, 1531 0, 1532 0, 1533 /*55*/ 0, 1534 0, 1535 MatSetUnfactored_MPISBAIJ, 1536 0, 1537 MatSetValuesBlocked_MPISBAIJ, 1538 /*60*/ 0, 1539 0, 1540 0, 1541 MatGetPetscMaps_Petsc, 1542 0, 1543 /*65*/ 0, 1544 0, 1545 0, 1546 0, 1547 0, 1548 /*70*/ MatGetRowMax_MPISBAIJ, 1549 0, 1550 0, 1551 0, 1552 0, 1553 /*75*/ 0, 1554 0, 1555 0, 1556 0, 1557 0, 1558 /*80*/ 0, 1559 0, 1560 0, 1561 0, 1562 0, 1563 /*85*/ MatLoad_MPISBAIJ 1564 }; 1565 1566 1567 EXTERN_C_BEGIN 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1570 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1571 { 1572 PetscFunctionBegin; 1573 *a = ((Mat_MPISBAIJ *)A->data)->A; 1574 *iscopy = PETSC_FALSE; 1575 PetscFunctionReturn(0); 1576 } 1577 EXTERN_C_END 1578 1579 EXTERN_C_BEGIN 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1582 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1583 { 1584 Mat_MPISBAIJ *b; 1585 int ierr,i,mbs,Mbs; 1586 1587 PetscFunctionBegin; 1588 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1589 1590 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1591 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1592 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1593 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1594 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1595 if (d_nnz) { 1596 for (i=0; i<B->m/bs; i++) { 1597 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1598 } 1599 } 1600 if (o_nnz) { 1601 for (i=0; i<B->m/bs; i++) { 1602 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1603 } 1604 } 1605 B->preallocated = PETSC_TRUE; 1606 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1607 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1608 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1609 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1610 1611 b = (Mat_MPISBAIJ*)B->data; 1612 mbs = B->m/bs; 1613 Mbs = B->M/bs; 1614 if (mbs*bs != B->m) { 1615 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1616 } 1617 1618 b->bs = bs; 1619 b->bs2 = bs*bs; 1620 b->mbs = mbs; 1621 b->nbs = mbs; 1622 b->Mbs = Mbs; 1623 b->Nbs = Mbs; 1624 1625 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1626 b->rowners[0] = 0; 1627 for (i=2; i<=b->size; i++) { 1628 b->rowners[i] += b->rowners[i-1]; 1629 } 1630 b->rstart = b->rowners[b->rank]; 1631 b->rend = b->rowners[b->rank+1]; 1632 b->cstart = b->rstart; 1633 b->cend = b->rend; 1634 for (i=0; i<=b->size; i++) { 1635 b->rowners_bs[i] = b->rowners[i]*bs; 1636 } 1637 b->rstart_bs = b-> rstart*bs; 1638 b->rend_bs = b->rend*bs; 1639 1640 b->cstart_bs = b->cstart*bs; 1641 b->cend_bs = b->cend*bs; 1642 1643 1644 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1645 PetscLogObjectParent(B,b->A); 1646 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1647 PetscLogObjectParent(B,b->B); 1648 1649 /* build cache for off array entries formed */ 1650 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1651 1652 PetscFunctionReturn(0); 1653 } 1654 EXTERN_C_END 1655 1656 /*MC 1657 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1658 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1659 1660 Options Database Keys: 1661 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1662 1663 Level: beginner 1664 1665 .seealso: MatCreateMPISBAIJ 1666 M*/ 1667 1668 EXTERN_C_BEGIN 1669 #undef __FUNCT__ 1670 #define __FUNCT__ "MatCreate_MPISBAIJ" 1671 int MatCreate_MPISBAIJ(Mat B) 1672 { 1673 Mat_MPISBAIJ *b; 1674 int ierr; 1675 PetscTruth flg; 1676 1677 PetscFunctionBegin; 1678 1679 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1680 B->data = (void*)b; 1681 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1682 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1683 1684 B->ops->destroy = MatDestroy_MPISBAIJ; 1685 B->ops->view = MatView_MPISBAIJ; 1686 B->mapping = 0; 1687 B->factor = 0; 1688 B->assembled = PETSC_FALSE; 1689 1690 B->insertmode = NOT_SET_VALUES; 1691 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1692 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1693 1694 /* build local table of row and column ownerships */ 1695 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1696 b->cowners = b->rowners + b->size + 2; 1697 b->rowners_bs = b->cowners + b->size + 2; 1698 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1699 1700 /* build cache for off array entries formed */ 1701 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1702 b->donotstash = PETSC_FALSE; 1703 b->colmap = PETSC_NULL; 1704 b->garray = PETSC_NULL; 1705 b->roworiented = PETSC_TRUE; 1706 1707 #if defined(PETSC_USE_MAT_SINGLE) 1708 /* stuff for MatSetValues_XXX in single precision */ 1709 b->setvalueslen = 0; 1710 b->setvaluescopy = PETSC_NULL; 1711 #endif 1712 1713 /* stuff used in block assembly */ 1714 b->barray = 0; 1715 1716 /* stuff used for matrix vector multiply */ 1717 b->lvec = 0; 1718 b->Mvctx = 0; 1719 b->slvec0 = 0; 1720 b->slvec0b = 0; 1721 b->slvec1 = 0; 1722 b->slvec1a = 0; 1723 b->slvec1b = 0; 1724 b->sMvctx = 0; 1725 1726 /* stuff for MatGetRow() */ 1727 b->rowindices = 0; 1728 b->rowvalues = 0; 1729 b->getrowactive = PETSC_FALSE; 1730 1731 /* hash table stuff */ 1732 b->ht = 0; 1733 b->hd = 0; 1734 b->ht_size = 0; 1735 b->ht_flag = PETSC_FALSE; 1736 b->ht_fact = 0; 1737 b->ht_total_ct = 0; 1738 b->ht_insert_ct = 0; 1739 1740 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1741 if (flg) { 1742 PetscReal fact = 1.39; 1743 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1744 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1745 if (fact <= 1.0) fact = 1.39; 1746 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1747 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1748 } 1749 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1750 "MatStoreValues_MPISBAIJ", 1751 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1752 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1753 "MatRetrieveValues_MPISBAIJ", 1754 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1755 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1756 "MatGetDiagonalBlock_MPISBAIJ", 1757 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1758 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1759 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1760 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1761 PetscFunctionReturn(0); 1762 } 1763 EXTERN_C_END 1764 1765 /*MC 1766 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1767 1768 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1769 and MATMPISBAIJ otherwise. 1770 1771 Options Database Keys: 1772 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1773 1774 Level: beginner 1775 1776 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1777 M*/ 1778 1779 EXTERN_C_BEGIN 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatCreate_SBAIJ" 1782 int MatCreate_SBAIJ(Mat A) { 1783 int ierr,size; 1784 1785 PetscFunctionBegin; 1786 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1787 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1788 if (size == 1) { 1789 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1790 } else { 1791 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1792 } 1793 PetscFunctionReturn(0); 1794 } 1795 EXTERN_C_END 1796 1797 #undef __FUNCT__ 1798 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1799 /*@C 1800 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1801 the user should preallocate the matrix storage by setting the parameters 1802 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1803 performance can be increased by more than a factor of 50. 1804 1805 Collective on Mat 1806 1807 Input Parameters: 1808 + A - the matrix 1809 . bs - size of blockk 1810 . d_nz - number of block nonzeros per block row in diagonal portion of local 1811 submatrix (same for all local rows) 1812 . d_nnz - array containing the number of block nonzeros in the various block rows 1813 in the upper triangular and diagonal part of the in diagonal portion of the local 1814 (possibly different for each block row) or PETSC_NULL. You must leave room 1815 for the diagonal entry even if it is zero. 1816 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1817 submatrix (same for all local rows). 1818 - o_nnz - array containing the number of nonzeros in the various block rows of the 1819 off-diagonal portion of the local submatrix (possibly different for 1820 each block row) or PETSC_NULL. 1821 1822 1823 Options Database Keys: 1824 . -mat_no_unroll - uses code that does not unroll the loops in the 1825 block calculations (much slower) 1826 . -mat_block_size - size of the blocks to use 1827 1828 Notes: 1829 1830 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1831 than it must be used on all processors that share the object for that argument. 1832 1833 Storage Information: 1834 For a square global matrix we define each processor's diagonal portion 1835 to be its local rows and the corresponding columns (a square submatrix); 1836 each processor's off-diagonal portion encompasses the remainder of the 1837 local matrix (a rectangular submatrix). 1838 1839 The user can specify preallocated storage for the diagonal part of 1840 the local submatrix with either d_nz or d_nnz (not both). Set 1841 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1842 memory allocation. Likewise, specify preallocated storage for the 1843 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1844 1845 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1846 the figure below we depict these three local rows and all columns (0-11). 1847 1848 .vb 1849 0 1 2 3 4 5 6 7 8 9 10 11 1850 ------------------- 1851 row 3 | o o o d d d o o o o o o 1852 row 4 | o o o d d d o o o o o o 1853 row 5 | o o o d d d o o o o o o 1854 ------------------- 1855 .ve 1856 1857 Thus, any entries in the d locations are stored in the d (diagonal) 1858 submatrix, and any entries in the o locations are stored in the 1859 o (off-diagonal) submatrix. Note that the d matrix is stored in 1860 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1861 1862 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1863 plus the diagonal part of the d matrix, 1864 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1865 In general, for PDE problems in which most nonzeros are near the diagonal, 1866 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1867 or you will get TERRIBLE performance; see the users' manual chapter on 1868 matrices. 1869 1870 Level: intermediate 1871 1872 .keywords: matrix, block, aij, compressed row, sparse, parallel 1873 1874 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1875 @*/ 1876 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1877 { 1878 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 1879 1880 PetscFunctionBegin; 1881 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1882 if (f) { 1883 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1884 } 1885 PetscFunctionReturn(0); 1886 } 1887 1888 #undef __FUNCT__ 1889 #define __FUNCT__ "MatCreateMPISBAIJ" 1890 /*@C 1891 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1892 (block compressed row). For good matrix assembly performance 1893 the user should preallocate the matrix storage by setting the parameters 1894 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1895 performance can be increased by more than a factor of 50. 1896 1897 Collective on MPI_Comm 1898 1899 Input Parameters: 1900 + comm - MPI communicator 1901 . bs - size of blockk 1902 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1903 This value should be the same as the local size used in creating the 1904 y vector for the matrix-vector product y = Ax. 1905 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1906 This value should be the same as the local size used in creating the 1907 x vector for the matrix-vector product y = Ax. 1908 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1909 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1910 . d_nz - number of block nonzeros per block row in diagonal portion of local 1911 submatrix (same for all local rows) 1912 . d_nnz - array containing the number of block nonzeros in the various block rows 1913 in the upper triangular portion of the in diagonal portion of the local 1914 (possibly different for each block block row) or PETSC_NULL. 1915 You must leave room for the diagonal entry even if it is zero. 1916 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1917 submatrix (same for all local rows). 1918 - o_nnz - array containing the number of nonzeros in the various block rows of the 1919 off-diagonal portion of the local submatrix (possibly different for 1920 each block row) or PETSC_NULL. 1921 1922 Output Parameter: 1923 . A - the matrix 1924 1925 Options Database Keys: 1926 . -mat_no_unroll - uses code that does not unroll the loops in the 1927 block calculations (much slower) 1928 . -mat_block_size - size of the blocks to use 1929 . -mat_mpi - use the parallel matrix data structures even on one processor 1930 (defaults to using SeqBAIJ format on one processor) 1931 1932 Notes: 1933 The user MUST specify either the local or global matrix dimensions 1934 (possibly both). 1935 1936 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1937 than it must be used on all processors that share the object for that argument. 1938 1939 Storage Information: 1940 For a square global matrix we define each processor's diagonal portion 1941 to be its local rows and the corresponding columns (a square submatrix); 1942 each processor's off-diagonal portion encompasses the remainder of the 1943 local matrix (a rectangular submatrix). 1944 1945 The user can specify preallocated storage for the diagonal part of 1946 the local submatrix with either d_nz or d_nnz (not both). Set 1947 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1948 memory allocation. Likewise, specify preallocated storage for the 1949 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1950 1951 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1952 the figure below we depict these three local rows and all columns (0-11). 1953 1954 .vb 1955 0 1 2 3 4 5 6 7 8 9 10 11 1956 ------------------- 1957 row 3 | o o o d d d o o o o o o 1958 row 4 | o o o d d d o o o o o o 1959 row 5 | o o o d d d o o o o o o 1960 ------------------- 1961 .ve 1962 1963 Thus, any entries in the d locations are stored in the d (diagonal) 1964 submatrix, and any entries in the o locations are stored in the 1965 o (off-diagonal) submatrix. Note that the d matrix is stored in 1966 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1967 1968 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1969 plus the diagonal part of the d matrix, 1970 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1971 In general, for PDE problems in which most nonzeros are near the diagonal, 1972 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1973 or you will get TERRIBLE performance; see the users' manual chapter on 1974 matrices. 1975 1976 Level: intermediate 1977 1978 .keywords: matrix, block, aij, compressed row, sparse, parallel 1979 1980 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1981 @*/ 1982 1983 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 1984 { 1985 int ierr,size; 1986 1987 PetscFunctionBegin; 1988 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1989 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1990 if (size > 1) { 1991 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1992 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1993 } else { 1994 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1995 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1996 } 1997 PetscFunctionReturn(0); 1998 } 1999 2000 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2003 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2004 { 2005 Mat mat; 2006 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2007 int ierr,len=0; 2008 2009 PetscFunctionBegin; 2010 *newmat = 0; 2011 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2012 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 2013 mat->preallocated = PETSC_TRUE; 2014 a = (Mat_MPISBAIJ*)mat->data; 2015 a->bs = oldmat->bs; 2016 a->bs2 = oldmat->bs2; 2017 a->mbs = oldmat->mbs; 2018 a->nbs = oldmat->nbs; 2019 a->Mbs = oldmat->Mbs; 2020 a->Nbs = oldmat->Nbs; 2021 2022 a->rstart = oldmat->rstart; 2023 a->rend = oldmat->rend; 2024 a->cstart = oldmat->cstart; 2025 a->cend = oldmat->cend; 2026 a->size = oldmat->size; 2027 a->rank = oldmat->rank; 2028 a->donotstash = oldmat->donotstash; 2029 a->roworiented = oldmat->roworiented; 2030 a->rowindices = 0; 2031 a->rowvalues = 0; 2032 a->getrowactive = PETSC_FALSE; 2033 a->barray = 0; 2034 a->rstart_bs = oldmat->rstart_bs; 2035 a->rend_bs = oldmat->rend_bs; 2036 a->cstart_bs = oldmat->cstart_bs; 2037 a->cend_bs = oldmat->cend_bs; 2038 2039 /* hash table stuff */ 2040 a->ht = 0; 2041 a->hd = 0; 2042 a->ht_size = 0; 2043 a->ht_flag = oldmat->ht_flag; 2044 a->ht_fact = oldmat->ht_fact; 2045 a->ht_total_ct = 0; 2046 a->ht_insert_ct = 0; 2047 2048 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 2049 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2050 a->cowners = a->rowners + a->size + 2; 2051 a->rowners_bs = a->cowners + a->size + 2; 2052 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2053 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2054 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2055 if (oldmat->colmap) { 2056 #if defined (PETSC_USE_CTABLE) 2057 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2058 #else 2059 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2060 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2061 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2062 #endif 2063 } else a->colmap = 0; 2064 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2065 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2066 PetscLogObjectMemory(mat,len*sizeof(int)); 2067 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2068 } else a->garray = 0; 2069 2070 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2071 PetscLogObjectParent(mat,a->lvec); 2072 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2073 2074 PetscLogObjectParent(mat,a->Mvctx); 2075 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2076 PetscLogObjectParent(mat,a->A); 2077 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2078 PetscLogObjectParent(mat,a->B); 2079 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2080 *newmat = mat; 2081 PetscFunctionReturn(0); 2082 } 2083 2084 #include "petscsys.h" 2085 2086 #undef __FUNCT__ 2087 #define __FUNCT__ "MatLoad_MPISBAIJ" 2088 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2089 { 2090 Mat A; 2091 int i,nz,ierr,j,rstart,rend,fd; 2092 PetscScalar *vals,*buf; 2093 MPI_Comm comm = ((PetscObject)viewer)->comm; 2094 MPI_Status status; 2095 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2096 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2097 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2098 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2099 int dcount,kmax,k,nzcount,tmp; 2100 2101 PetscFunctionBegin; 2102 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2103 2104 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2105 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2106 if (!rank) { 2107 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2108 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2109 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2110 if (header[3] < 0) { 2111 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2112 } 2113 } 2114 2115 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2116 M = header[1]; N = header[2]; 2117 2118 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2119 2120 /* 2121 This code adds extra rows to make sure the number of rows is 2122 divisible by the blocksize 2123 */ 2124 Mbs = M/bs; 2125 extra_rows = bs - M + bs*(Mbs); 2126 if (extra_rows == bs) extra_rows = 0; 2127 else Mbs++; 2128 if (extra_rows &&!rank) { 2129 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2130 } 2131 2132 /* determine ownership of all rows */ 2133 mbs = Mbs/size + ((Mbs % size) > rank); 2134 m = mbs*bs; 2135 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2136 browners = rowners + size + 1; 2137 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2138 rowners[0] = 0; 2139 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2140 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2141 rstart = rowners[rank]; 2142 rend = rowners[rank+1]; 2143 2144 /* distribute row lengths to all processors */ 2145 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2146 if (!rank) { 2147 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2148 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2149 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2150 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2151 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2152 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2153 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2154 } else { 2155 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2156 } 2157 2158 if (!rank) { /* procs[0] */ 2159 /* calculate the number of nonzeros on each processor */ 2160 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2161 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2162 for (i=0; i<size; i++) { 2163 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2164 procsnz[i] += rowlengths[j]; 2165 } 2166 } 2167 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2168 2169 /* determine max buffer needed and allocate it */ 2170 maxnz = 0; 2171 for (i=0; i<size; i++) { 2172 maxnz = PetscMax(maxnz,procsnz[i]); 2173 } 2174 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2175 2176 /* read in my part of the matrix column indices */ 2177 nz = procsnz[0]; 2178 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2179 mycols = ibuf; 2180 if (size == 1) nz -= extra_rows; 2181 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2182 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2183 2184 /* read in every ones (except the last) and ship off */ 2185 for (i=1; i<size-1; i++) { 2186 nz = procsnz[i]; 2187 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2188 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2189 } 2190 /* read in the stuff for the last proc */ 2191 if (size != 1) { 2192 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2193 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2194 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2195 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2196 } 2197 ierr = PetscFree(cols);CHKERRQ(ierr); 2198 } else { /* procs[i], i>0 */ 2199 /* determine buffer space needed for message */ 2200 nz = 0; 2201 for (i=0; i<m; i++) { 2202 nz += locrowlens[i]; 2203 } 2204 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2205 mycols = ibuf; 2206 /* receive message of column indices*/ 2207 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2208 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2209 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2210 } 2211 2212 /* loop over local rows, determining number of off diagonal entries */ 2213 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2214 odlens = dlens + (rend-rstart); 2215 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2216 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2217 masked1 = mask + Mbs; 2218 masked2 = masked1 + Mbs; 2219 rowcount = 0; nzcount = 0; 2220 for (i=0; i<mbs; i++) { 2221 dcount = 0; 2222 odcount = 0; 2223 for (j=0; j<bs; j++) { 2224 kmax = locrowlens[rowcount]; 2225 for (k=0; k<kmax; k++) { 2226 tmp = mycols[nzcount++]/bs; /* block col. index */ 2227 if (!mask[tmp]) { 2228 mask[tmp] = 1; 2229 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2230 else masked1[dcount++] = tmp; /* entry in diag portion */ 2231 } 2232 } 2233 rowcount++; 2234 } 2235 2236 dlens[i] = dcount; /* d_nzz[i] */ 2237 odlens[i] = odcount; /* o_nzz[i] */ 2238 2239 /* zero out the mask elements we set */ 2240 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2241 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2242 } 2243 2244 /* create our matrix */ 2245 ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr); 2246 ierr = MatSetType(A,type);CHKERRQ(ierr); 2247 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2248 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2249 2250 if (!rank) { 2251 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2252 /* read in my part of the matrix numerical values */ 2253 nz = procsnz[0]; 2254 vals = buf; 2255 mycols = ibuf; 2256 if (size == 1) nz -= extra_rows; 2257 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2258 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2259 2260 /* insert into matrix */ 2261 jj = rstart*bs; 2262 for (i=0; i<m; i++) { 2263 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2264 mycols += locrowlens[i]; 2265 vals += locrowlens[i]; 2266 jj++; 2267 } 2268 2269 /* read in other processors (except the last one) and ship out */ 2270 for (i=1; i<size-1; i++) { 2271 nz = procsnz[i]; 2272 vals = buf; 2273 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2274 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2275 } 2276 /* the last proc */ 2277 if (size != 1){ 2278 nz = procsnz[i] - extra_rows; 2279 vals = buf; 2280 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2281 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2282 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2283 } 2284 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2285 2286 } else { 2287 /* receive numeric values */ 2288 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2289 2290 /* receive message of values*/ 2291 vals = buf; 2292 mycols = ibuf; 2293 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2294 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2295 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2296 2297 /* insert into matrix */ 2298 jj = rstart*bs; 2299 for (i=0; i<m; i++) { 2300 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2301 mycols += locrowlens[i]; 2302 vals += locrowlens[i]; 2303 jj++; 2304 } 2305 } 2306 2307 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2308 ierr = PetscFree(buf);CHKERRQ(ierr); 2309 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2310 ierr = PetscFree(rowners);CHKERRQ(ierr); 2311 ierr = PetscFree(dlens);CHKERRQ(ierr); 2312 ierr = PetscFree(mask);CHKERRQ(ierr); 2313 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2314 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2315 *newmat = A; 2316 PetscFunctionReturn(0); 2317 } 2318 2319 #undef __FUNCT__ 2320 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2321 /*@ 2322 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2323 2324 Input Parameters: 2325 . mat - the matrix 2326 . fact - factor 2327 2328 Collective on Mat 2329 2330 Level: advanced 2331 2332 Notes: 2333 This can also be set by the command line option: -mat_use_hash_table fact 2334 2335 .keywords: matrix, hashtable, factor, HT 2336 2337 .seealso: MatSetOption() 2338 @*/ 2339 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2340 { 2341 PetscFunctionBegin; 2342 SETERRQ(1,"Function not yet written for SBAIJ format"); 2343 /* PetscFunctionReturn(0); */ 2344 } 2345 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2348 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2349 { 2350 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2351 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2352 PetscReal atmp; 2353 PetscReal *work,*svalues,*rvalues; 2354 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2355 int rank,size,*rowners_bs,dest,count,source; 2356 PetscScalar *va; 2357 MatScalar *ba; 2358 MPI_Status stat; 2359 2360 PetscFunctionBegin; 2361 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2362 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2363 2364 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2365 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2366 2367 bs = a->bs; 2368 mbs = a->mbs; 2369 Mbs = a->Mbs; 2370 ba = b->a; 2371 bi = b->i; 2372 bj = b->j; 2373 /* 2374 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2375 PetscSynchronizedFlush(A->comm); 2376 */ 2377 2378 /* find ownerships */ 2379 rowners_bs = a->rowners_bs; 2380 /* 2381 if (!rank){ 2382 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2383 } 2384 */ 2385 2386 /* each proc creates an array to be distributed */ 2387 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2388 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2389 2390 /* row_max for B */ 2391 if (rank != size-1){ 2392 for (i=0; i<mbs; i++) { 2393 ncols = bi[1] - bi[0]; bi++; 2394 brow = bs*i; 2395 for (j=0; j<ncols; j++){ 2396 bcol = bs*(*bj); 2397 for (kcol=0; kcol<bs; kcol++){ 2398 col = bcol + kcol; /* local col index */ 2399 col += rowners_bs[rank+1]; /* global col index */ 2400 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2401 for (krow=0; krow<bs; krow++){ 2402 atmp = PetscAbsScalar(*ba); ba++; 2403 row = brow + krow; /* local row index */ 2404 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2405 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2406 if (work[col] < atmp) work[col] = atmp; 2407 } 2408 } 2409 bj++; 2410 } 2411 } 2412 /* 2413 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2414 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2415 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2416 */ 2417 2418 /* send values to its owners */ 2419 for (dest=rank+1; dest<size; dest++){ 2420 svalues = work + rowners_bs[dest]; 2421 count = rowners_bs[dest+1]-rowners_bs[dest]; 2422 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2423 /* 2424 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2425 PetscSynchronizedFlush(A->comm); 2426 */ 2427 } 2428 } 2429 2430 /* receive values */ 2431 if (rank){ 2432 rvalues = work; 2433 count = rowners_bs[rank+1]-rowners_bs[rank]; 2434 for (source=0; source<rank; source++){ 2435 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2436 /* process values */ 2437 for (i=0; i<count; i++){ 2438 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2439 } 2440 /* 2441 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2442 PetscSynchronizedFlush(A->comm); 2443 */ 2444 } 2445 } 2446 2447 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2448 ierr = PetscFree(work);CHKERRQ(ierr); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatRelax_MPISBAIJ" 2454 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2455 { 2456 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2457 int ierr,mbs=mat->mbs,bs=mat->bs; 2458 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2459 Vec bb1; 2460 2461 PetscFunctionBegin; 2462 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2463 if (bs > 1) 2464 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2465 2466 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2467 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2468 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2469 its--; 2470 } 2471 2472 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2473 while (its--){ 2474 2475 /* lower triangular part: slvec0b = - B^T*xx */ 2476 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2477 2478 /* copy xx into slvec0a */ 2479 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2480 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2481 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2482 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2483 2484 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2485 2486 /* copy bb into slvec1a */ 2487 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2488 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2489 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2490 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2491 2492 /* set slvec1b = 0 */ 2493 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2494 2495 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2496 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2497 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2498 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2499 2500 /* upper triangular part: bb1 = bb1 - B*x */ 2501 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2502 2503 /* local diagonal sweep */ 2504 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2505 } 2506 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2507 } else { 2508 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2509 } 2510 PetscFunctionReturn(0); 2511 } 2512 2513 #undef __FUNCT__ 2514 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2515 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2516 { 2517 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2518 int ierr; 2519 PetscScalar mone=-1.0; 2520 Vec lvec1,bb1; 2521 2522 PetscFunctionBegin; 2523 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2524 if (mat->bs > 1) 2525 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2526 2527 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2528 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2529 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2530 its--; 2531 } 2532 2533 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2534 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2535 while (its--){ 2536 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2537 2538 /* lower diagonal part: bb1 = bb - B^T*xx */ 2539 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2540 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2541 2542 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2543 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2544 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2545 2546 /* upper diagonal part: bb1 = bb1 - B*x */ 2547 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2548 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2549 2550 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2551 2552 /* diagonal sweep */ 2553 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2554 } 2555 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2556 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2557 } else { 2558 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2559 } 2560 PetscFunctionReturn(0); 2561 } 2562 2563