1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 12 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 14 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 15 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 16 extern int MatPrintHelp_SeqSBAIJ(Mat); 17 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 18 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 19 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 20 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 21 22 /* UGLY, ugly, ugly 23 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 24 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 25 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 26 converts the entries into single precision and then calls ..._MatScalar() to put them 27 into the single precision data structures. 28 */ 29 #if defined(PETSC_USE_MAT_SINGLE) 30 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 31 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 32 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 33 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 35 #else 36 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 37 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 38 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 39 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 40 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 41 #endif 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 46 int MatStoreValues_MPISBAIJ(Mat mat) 47 { 48 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 49 int ierr; 50 51 PetscFunctionBegin; 52 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 53 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 EXTERN_C_END 57 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 61 int MatRetrieveValues_MPISBAIJ(Mat mat) 62 { 63 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 64 int ierr; 65 66 PetscFunctionBegin; 67 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 68 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 69 PetscFunctionReturn(0); 70 } 71 EXTERN_C_END 72 73 /* 74 Local utility routine that creates a mapping from the global column 75 number to the local number in the off-diagonal part of the local 76 storage of the matrix. This is done in a non scable way since the 77 length of colmap equals the global matrix length. 78 */ 79 #undef __FUNCT__ 80 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private" 81 static int CreateColmap_MPISBAIJ_Private(Mat mat) 82 { 83 PetscFunctionBegin; 84 SETERRQ(1,"Function not yet written for SBAIJ format"); 85 /* PetscFunctionReturn(0); */ 86 } 87 88 #define CHUNKSIZE 10 89 90 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 91 { \ 92 \ 93 brow = row/bs; \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow]; nrow = ailen[brow]; \ 96 bcol = col/bs; \ 97 ridx = row % bs; cidx = col % bs; \ 98 low = 0; high = nrow; \ 99 while (high-low > 3) { \ 100 t = (low+high)/2; \ 101 if (rp[t] > bcol) high = t; \ 102 else low = t; \ 103 } \ 104 for (_i=low; _i<high; _i++) { \ 105 if (rp[_i] > bcol) break; \ 106 if (rp[_i] == bcol) { \ 107 bap = ap + bs2*_i + bs*cidx + ridx; \ 108 if (addv == ADD_VALUES) *bap += value; \ 109 else *bap = value; \ 110 goto a_noinsert; \ 111 } \ 112 } \ 113 if (a->nonew == 1) goto a_noinsert; \ 114 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 115 if (nrow >= rmax) { \ 116 /* there is no extra room in row, therefore enlarge */ \ 117 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 118 MatScalar *new_a; \ 119 \ 120 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 121 \ 122 /* malloc new storage space */ \ 123 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 124 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 125 new_j = (int*)(new_a + bs2*new_nz); \ 126 new_i = new_j + new_nz; \ 127 \ 128 /* copy over old data into new slots */ \ 129 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 130 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 131 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 132 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 133 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 134 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 135 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 136 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 137 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 138 /* free up old matrix storage */ \ 139 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 140 if (!a->singlemalloc) { \ 141 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 142 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 143 } \ 144 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 145 a->singlemalloc = PETSC_TRUE; \ 146 \ 147 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 148 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 149 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 150 a->s_maxnz += bs2*CHUNKSIZE; \ 151 a->reallocs++; \ 152 a->s_nz++; \ 153 } \ 154 N = nrow++ - 1; \ 155 /* shift up all the later entries in this row */ \ 156 for (ii=N; ii>=_i; ii--) { \ 157 rp[ii+1] = rp[ii]; \ 158 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 159 } \ 160 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 161 rp[_i] = bcol; \ 162 ap[bs2*_i + bs*cidx + ridx] = value; \ 163 a_noinsert:; \ 164 ailen[brow] = nrow; \ 165 } 166 #ifndef MatSetValues_SeqBAIJ_B_Private 167 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 168 { \ 169 brow = row/bs; \ 170 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 171 rmax = bimax[brow]; nrow = bilen[brow]; \ 172 bcol = col/bs; \ 173 ridx = row % bs; cidx = col % bs; \ 174 low = 0; high = nrow; \ 175 while (high-low > 3) { \ 176 t = (low+high)/2; \ 177 if (rp[t] > bcol) high = t; \ 178 else low = t; \ 179 } \ 180 for (_i=low; _i<high; _i++) { \ 181 if (rp[_i] > bcol) break; \ 182 if (rp[_i] == bcol) { \ 183 bap = ap + bs2*_i + bs*cidx + ridx; \ 184 if (addv == ADD_VALUES) *bap += value; \ 185 else *bap = value; \ 186 goto b_noinsert; \ 187 } \ 188 } \ 189 if (b->nonew == 1) goto b_noinsert; \ 190 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 191 if (nrow >= rmax) { \ 192 /* there is no extra room in row, therefore enlarge */ \ 193 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 194 MatScalar *new_a; \ 195 \ 196 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 197 \ 198 /* malloc new storage space */ \ 199 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 200 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 201 new_j = (int*)(new_a + bs2*new_nz); \ 202 new_i = new_j + new_nz; \ 203 \ 204 /* copy over old data into new slots */ \ 205 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 206 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 207 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 208 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 209 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 210 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 211 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 212 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 213 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 214 /* free up old matrix storage */ \ 215 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 216 if (!b->singlemalloc) { \ 217 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 218 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 219 } \ 220 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 221 b->singlemalloc = PETSC_TRUE; \ 222 \ 223 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 224 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 225 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 226 b->maxnz += bs2*CHUNKSIZE; \ 227 b->reallocs++; \ 228 b->nz++; \ 229 } \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 for (ii=N; ii>=_i; ii--) { \ 233 rp[ii+1] = rp[ii]; \ 234 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 235 } \ 236 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 237 rp[_i] = bcol; \ 238 ap[bs2*_i + bs*cidx + ridx] = value; \ 239 b_noinsert:; \ 240 bilen[brow] = nrow; \ 241 } 242 #endif 243 244 #if defined(PETSC_USE_MAT_SINGLE) 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatSetValues_MPISBAIJ" 247 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 248 { 249 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 250 int ierr,i,N = m*n; 251 MatScalar *vsingle; 252 253 PetscFunctionBegin; 254 if (N > b->setvalueslen) { 255 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 256 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 257 b->setvalueslen = N; 258 } 259 vsingle = b->setvaluescopy; 260 261 for (i=0; i<N; i++) { 262 vsingle[i] = v[i]; 263 } 264 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 270 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 271 { 272 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 273 int ierr,i,N = m*n*b->bs2; 274 MatScalar *vsingle; 275 276 PetscFunctionBegin; 277 if (N > b->setvalueslen) { 278 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 279 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 280 b->setvalueslen = N; 281 } 282 vsingle = b->setvaluescopy; 283 for (i=0; i<N; i++) { 284 vsingle[i] = v[i]; 285 } 286 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 292 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 293 { 294 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 295 int ierr,i,N = m*n; 296 MatScalar *vsingle; 297 298 PetscFunctionBegin; 299 SETERRQ(1,"Function not yet written for SBAIJ format"); 300 /* PetscFunctionReturn(0); */ 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 305 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 308 int ierr,i,N = m*n*b->bs2; 309 MatScalar *vsingle; 310 311 PetscFunctionBegin; 312 SETERRQ(1,"Function not yet written for SBAIJ format"); 313 /* PetscFunctionReturn(0); */ 314 } 315 #endif 316 317 /* Only add/insert a(i,j) with i<=j (blocks). 318 Any a(i,j) with i>j input by user is ingored. 319 */ 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 322 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 323 { 324 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 325 MatScalar value; 326 PetscTruth roworiented = baij->roworiented; 327 int ierr,i,j,row,col; 328 int rstart_orig=baij->rstart_bs; 329 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 330 int cend_orig=baij->cend_bs,bs=baij->bs; 331 332 /* Some Variables required in the macro */ 333 Mat A = baij->A; 334 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 335 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 336 MatScalar *aa=a->a; 337 338 Mat B = baij->B; 339 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 340 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 341 MatScalar *ba=b->a; 342 343 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 344 int low,high,t,ridx,cidx,bs2=a->bs2; 345 MatScalar *ap,*bap; 346 347 /* for stash */ 348 int n_loc, *in_loc=0; 349 MatScalar *v_loc=0; 350 351 PetscFunctionBegin; 352 353 if(!baij->donotstash){ 354 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 355 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 356 } 357 358 for (i=0; i<m; i++) { 359 if (im[i] < 0) continue; 360 #if defined(PETSC_USE_BOPT_g) 361 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 362 #endif 363 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 364 row = im[i] - rstart_orig; /* local row index */ 365 for (j=0; j<n; j++) { 366 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 367 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 368 col = in[j] - cstart_orig; /* local col index */ 369 brow = row/bs; bcol = col/bs; 370 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 371 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 372 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 373 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 374 } else if (in[j] < 0) continue; 375 #if defined(PETSC_USE_BOPT_g) 376 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 377 #endif 378 else { /* off-diag entry (B) */ 379 if (mat->was_assembled) { 380 if (!baij->colmap) { 381 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 382 } 383 #if defined (PETSC_USE_CTABLE) 384 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 385 col = col - 1; 386 #else 387 col = baij->colmap[in[j]/bs] - 1; 388 #endif 389 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 390 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 391 col = in[j]; 392 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 393 B = baij->B; 394 b = (Mat_SeqBAIJ*)(B)->data; 395 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 396 ba=b->a; 397 } else col += in[j]%bs; 398 } else col = in[j]; 399 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 400 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 401 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 402 } 403 } 404 } else { /* off processor entry */ 405 if (!baij->donotstash) { 406 n_loc = 0; 407 for (j=0; j<n; j++){ 408 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 409 in_loc[n_loc] = in[j]; 410 if (roworiented) { 411 v_loc[n_loc] = v[i*n+j]; 412 } else { 413 v_loc[n_loc] = v[j*m+i]; 414 } 415 n_loc++; 416 } 417 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 418 } 419 } 420 } 421 422 if(!baij->donotstash){ 423 ierr = PetscFree(in_loc);CHKERRQ(ierr); 424 ierr = PetscFree(v_loc);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 431 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 432 { 433 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 434 const MatScalar *value; 435 MatScalar *barray=baij->barray; 436 PetscTruth roworiented = baij->roworiented; 437 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 438 int rend=baij->rend,cstart=baij->cstart,stepval; 439 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 440 441 PetscFunctionBegin; 442 if(!barray) { 443 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 444 baij->barray = barray; 445 } 446 447 if (roworiented) { 448 stepval = (n-1)*bs; 449 } else { 450 stepval = (m-1)*bs; 451 } 452 for (i=0; i<m; i++) { 453 if (im[i] < 0) continue; 454 #if defined(PETSC_USE_BOPT_g) 455 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 456 #endif 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 for (j=0; j<n; j++) { 460 /* If NumCol = 1 then a copy is not required */ 461 if ((roworiented) && (n == 1)) { 462 barray = (MatScalar*) v + i*bs2; 463 } else if((!roworiented) && (m == 1)) { 464 barray = (MatScalar*) v + j*bs2; 465 } else { /* Here a copy is required */ 466 if (roworiented) { 467 value = v + i*(stepval+bs)*bs + j*bs; 468 } else { 469 value = v + j*(stepval+bs)*bs + i*bs; 470 } 471 for (ii=0; ii<bs; ii++,value+=stepval) { 472 for (jj=0; jj<bs; jj++) { 473 *barray++ = *value++; 474 } 475 } 476 barray -=bs2; 477 } 478 479 if (in[j] >= cstart && in[j] < cend){ 480 col = in[j] - cstart; 481 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 482 } 483 else if (in[j] < 0) continue; 484 #if defined(PETSC_USE_BOPT_g) 485 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 486 #endif 487 else { 488 if (mat->was_assembled) { 489 if (!baij->colmap) { 490 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 491 } 492 493 #if defined(PETSC_USE_BOPT_g) 494 #if defined (PETSC_USE_CTABLE) 495 { int data; 496 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 497 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 498 } 499 #else 500 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 501 #endif 502 #endif 503 #if defined (PETSC_USE_CTABLE) 504 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 505 col = (col - 1)/bs; 506 #else 507 col = (baij->colmap[in[j]] - 1)/bs; 508 #endif 509 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 510 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 511 col = in[j]; 512 } 513 } 514 else col = in[j]; 515 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 516 } 517 } 518 } else { 519 if (!baij->donotstash) { 520 if (roworiented) { 521 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 522 } else { 523 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 524 } 525 } 526 } 527 } 528 PetscFunctionReturn(0); 529 } 530 531 #define HASH_KEY 0.6180339887 532 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 533 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 534 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 535 #undef __FUNCT__ 536 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 537 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 538 { 539 PetscFunctionBegin; 540 SETERRQ(1,"Function not yet written for SBAIJ format"); 541 /* PetscFunctionReturn(0); */ 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 546 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 547 { 548 PetscFunctionBegin; 549 SETERRQ(1,"Function not yet written for SBAIJ format"); 550 /* PetscFunctionReturn(0); */ 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "MatGetValues_MPISBAIJ" 555 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 556 { 557 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 558 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 559 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 560 561 PetscFunctionBegin; 562 for (i=0; i<m; i++) { 563 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 564 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 565 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 566 row = idxm[i] - bsrstart; 567 for (j=0; j<n; j++) { 568 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]); 569 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 570 if (idxn[j] >= bscstart && idxn[j] < bscend){ 571 col = idxn[j] - bscstart; 572 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 573 } else { 574 if (!baij->colmap) { 575 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 576 } 577 #if defined (PETSC_USE_CTABLE) 578 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 579 data --; 580 #else 581 data = baij->colmap[idxn[j]/bs]-1; 582 #endif 583 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 584 else { 585 col = data + idxn[j]%bs; 586 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 587 } 588 } 589 } 590 } else { 591 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 592 } 593 } 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatNorm_MPISBAIJ" 599 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 600 { 601 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 602 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 603 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 604 int ierr; 605 PetscReal sum[2],*lnorm2; 606 607 PetscFunctionBegin; 608 if (baij->size == 1) { 609 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 610 } else { 611 if (type == NORM_FROBENIUS) { 612 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 613 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 614 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 615 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 616 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 617 /* 618 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 619 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 620 */ 621 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 622 /* 623 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 624 PetscSynchronizedFlush(mat->comm); */ 625 626 *norm = sqrt(sum[0] + 2*sum[1]); 627 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 628 } else { 629 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 Creates the hash table, and sets the table 637 This table is created only once. 638 If new entried need to be added to the matrix 639 then the hash table has to be destroyed and 640 recreated. 641 */ 642 #undef __FUNCT__ 643 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 644 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 645 { 646 PetscFunctionBegin; 647 SETERRQ(1,"Function not yet written for SBAIJ format"); 648 /* PetscFunctionReturn(0); */ 649 } 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 653 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 654 { 655 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 656 int ierr,nstash,reallocs; 657 InsertMode addv; 658 659 PetscFunctionBegin; 660 if (baij->donotstash) { 661 PetscFunctionReturn(0); 662 } 663 664 /* make sure all processors are either in INSERTMODE or ADDMODE */ 665 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 666 if (addv == (ADD_VALUES|INSERT_VALUES)) { 667 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 668 } 669 mat->insertmode = addv; /* in case this processor had no cache */ 670 671 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 672 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 673 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 674 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 675 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 676 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 682 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 683 { 684 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 685 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 686 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 687 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 688 int *row,*col,other_disassembled; 689 PetscTruth r1,r2,r3; 690 MatScalar *val; 691 InsertMode addv = mat->insertmode; 692 #if defined(PETSC_HAVE_MUMPS) 693 PetscTruth flag; 694 #endif 695 696 PetscFunctionBegin; 697 698 if (!baij->donotstash) { 699 while (1) { 700 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 701 /* 702 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 703 PetscSynchronizedFlush(mat->comm); 704 */ 705 if (!flg) break; 706 707 for (i=0; i<n;) { 708 /* Now identify the consecutive vals belonging to the same row */ 709 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 710 if (j < n) ncols = j-i; 711 else ncols = n-i; 712 /* Now assemble all these values with a single function call */ 713 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 /* Now process the block-stash. Since the values are stashed column-oriented, 719 set the roworiented flag to column oriented, and after MatSetValues() 720 restore the original flags */ 721 r1 = baij->roworiented; 722 r2 = a->roworiented; 723 r3 = b->roworiented; 724 baij->roworiented = PETSC_FALSE; 725 a->roworiented = PETSC_FALSE; 726 b->roworiented = PETSC_FALSE; 727 while (1) { 728 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 729 if (!flg) break; 730 731 for (i=0; i<n;) { 732 /* Now identify the consecutive vals belonging to the same row */ 733 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 734 if (j < n) ncols = j-i; 735 else ncols = n-i; 736 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 737 i = j; 738 } 739 } 740 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 741 baij->roworiented = r1; 742 a->roworiented = r2; 743 b->roworiented = r3; 744 } 745 746 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 747 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 748 749 /* determine if any processor has disassembled, if so we must 750 also disassemble ourselfs, in order that we may reassemble. */ 751 /* 752 if nonzero structure of submatrix B cannot change then we know that 753 no processor disassembled thus we can skip this stuff 754 */ 755 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 756 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 757 if (mat->was_assembled && !other_disassembled) { 758 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 759 } 760 } 761 762 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 763 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 764 } 765 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 767 768 #if defined(PETSC_USE_BOPT_g) 769 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 770 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 771 baij->ht_total_ct = 0; 772 baij->ht_insert_ct = 0; 773 } 774 #endif 775 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 776 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 777 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 778 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 779 } 780 781 if (baij->rowvalues) { 782 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 783 baij->rowvalues = 0; 784 } 785 786 #if defined(PETSC_HAVE_MUMPS) 787 ierr = PetscOptionsHasName(mat->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 788 if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); } 789 #endif 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 795 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 796 { 797 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 798 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 799 PetscTruth isascii,isdraw; 800 PetscViewer sviewer; 801 PetscViewerFormat format; 802 803 PetscFunctionBegin; 804 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 805 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 806 if (isascii) { 807 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 808 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 809 MatInfo info; 810 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 811 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 812 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 813 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 814 baij->bs,(int)info.memory);CHKERRQ(ierr); 815 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 816 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 817 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 818 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 819 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 820 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 821 PetscFunctionReturn(0); 822 } else if (format == PETSC_VIEWER_ASCII_INFO) { 823 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 } 827 828 if (isdraw) { 829 PetscDraw draw; 830 PetscTruth isnull; 831 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 832 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 833 } 834 835 if (size == 1) { 836 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 837 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 838 } else { 839 /* assemble the entire matrix onto first processor. */ 840 Mat A; 841 Mat_SeqSBAIJ *Aloc; 842 Mat_SeqBAIJ *Bloc; 843 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 844 MatScalar *a; 845 846 if (!rank) { 847 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 848 } else { 849 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 850 } 851 PetscLogObjectParent(mat,A); 852 853 /* copy over the A part */ 854 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 855 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 856 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 857 858 for (i=0; i<mbs; i++) { 859 rvals[0] = bs*(baij->rstart + i); 860 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 861 for (j=ai[i]; j<ai[i+1]; j++) { 862 col = (baij->cstart+aj[j])*bs; 863 for (k=0; k<bs; k++) { 864 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 865 col++; a += bs; 866 } 867 } 868 } 869 /* copy over the B part */ 870 Bloc = (Mat_SeqBAIJ*)baij->B->data; 871 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 872 for (i=0; i<mbs; i++) { 873 rvals[0] = bs*(baij->rstart + i); 874 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 875 for (j=ai[i]; j<ai[i+1]; j++) { 876 col = baij->garray[aj[j]]*bs; 877 for (k=0; k<bs; k++) { 878 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 879 col++; a += bs; 880 } 881 } 882 } 883 ierr = PetscFree(rvals);CHKERRQ(ierr); 884 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 885 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 886 /* 887 Everyone has to call to draw the matrix since the graphics waits are 888 synchronized across all processors that share the PetscDraw object 889 */ 890 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 891 if (!rank) { 892 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 893 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 894 } 895 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 896 ierr = MatDestroy(A);CHKERRQ(ierr); 897 } 898 PetscFunctionReturn(0); 899 } 900 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatView_MPISBAIJ" 903 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 904 { 905 int ierr; 906 PetscTruth isascii,isdraw,issocket,isbinary; 907 908 PetscFunctionBegin; 909 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 910 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 911 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 912 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 913 if (isascii || isdraw || issocket || isbinary) { 914 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 915 } else { 916 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 917 } 918 PetscFunctionReturn(0); 919 } 920 921 #undef __FUNCT__ 922 #define __FUNCT__ "MatDestroy_MPISBAIJ" 923 int MatDestroy_MPISBAIJ(Mat mat) 924 { 925 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 926 int ierr; 927 928 PetscFunctionBegin; 929 #if defined(PETSC_USE_LOG) 930 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 931 #endif 932 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 933 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 934 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 935 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 936 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 937 #if defined (PETSC_USE_CTABLE) 938 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 939 #else 940 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 941 #endif 942 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 943 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 944 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 945 if (baij->slvec0) { 946 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 947 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 948 } 949 if (baij->slvec1) { 950 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 951 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 952 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 953 } 954 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 955 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 956 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 957 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 958 #if defined(PETSC_USE_MAT_SINGLE) 959 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 960 #endif 961 ierr = PetscFree(baij);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNCT__ 966 #define __FUNCT__ "MatMult_MPISBAIJ" 967 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 968 { 969 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 970 int ierr,nt,mbs=a->mbs,bs=a->bs; 971 PetscScalar *x,*from,zero=0.0; 972 973 PetscFunctionBegin; 974 /* 975 PetscSynchronizedPrintf(A->comm," _1comm is called ...\n"); 976 PetscSynchronizedFlush(A->comm); 977 */ 978 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 979 if (nt != A->n) { 980 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 981 } 982 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 983 if (nt != A->m) { 984 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 985 } 986 987 /* diagonal part */ 988 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 989 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 990 991 /* subdiagonal part */ 992 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 993 994 /* copy x into the vec slvec0 */ 995 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 996 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 997 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 998 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 999 1000 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1001 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1002 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1003 1004 /* supperdiagonal part */ 1005 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1006 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1012 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1013 { 1014 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1015 int ierr,nt; 1016 1017 PetscFunctionBegin; 1018 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1019 if (nt != A->n) { 1020 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1021 } 1022 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1023 if (nt != A->m) { 1024 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1025 } 1026 1027 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1028 /* do diagonal part */ 1029 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1030 /* do supperdiagonal part */ 1031 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1032 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1033 /* do subdiagonal part */ 1034 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1035 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1037 1038 PetscFunctionReturn(0); 1039 } 1040 1041 #undef __FUNCT__ 1042 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1043 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1044 { 1045 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1046 int ierr,mbs=a->mbs,bs=a->bs; 1047 PetscScalar *x,*from,zero=0.0; 1048 1049 PetscFunctionBegin; 1050 /* 1051 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1052 PetscSynchronizedFlush(A->comm); 1053 */ 1054 /* diagonal part */ 1055 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1056 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1057 1058 /* subdiagonal part */ 1059 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1060 1061 /* copy x into the vec slvec0 */ 1062 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1063 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1064 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1065 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1066 1067 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1068 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1070 1071 /* supperdiagonal part */ 1072 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1073 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1079 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1080 { 1081 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1082 int ierr; 1083 1084 PetscFunctionBegin; 1085 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1086 /* do diagonal part */ 1087 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1088 /* do supperdiagonal part */ 1089 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1090 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1091 1092 /* do subdiagonal part */ 1093 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1094 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1095 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1096 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1102 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 PetscFunctionBegin; 1105 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1106 /* PetscFunctionReturn(0); */ 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1111 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1112 { 1113 PetscFunctionBegin; 1114 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1115 /* PetscFunctionReturn(0); */ 1116 } 1117 1118 /* 1119 This only works correctly for square matrices where the subblock A->A is the 1120 diagonal block 1121 */ 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1124 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1125 { 1126 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1127 int ierr; 1128 1129 PetscFunctionBegin; 1130 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1131 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatScale_MPISBAIJ" 1137 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1138 { 1139 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1140 int ierr; 1141 1142 PetscFunctionBegin; 1143 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1144 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1150 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1151 { 1152 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1153 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1154 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1155 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1156 int *cmap,*idx_p,cstart = mat->cstart; 1157 1158 PetscFunctionBegin; 1159 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1160 mat->getrowactive = PETSC_TRUE; 1161 1162 if (!mat->rowvalues && (idx || v)) { 1163 /* 1164 allocate enough space to hold information from the longest row. 1165 */ 1166 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1167 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1168 int max = 1,mbs = mat->mbs,tmp; 1169 for (i=0; i<mbs; i++) { 1170 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1171 if (max < tmp) { max = tmp; } 1172 } 1173 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1174 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1175 } 1176 1177 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1178 lrow = row - brstart; /* local row index */ 1179 1180 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1181 if (!v) {pvA = 0; pvB = 0;} 1182 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1183 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1184 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1185 nztot = nzA + nzB; 1186 1187 cmap = mat->garray; 1188 if (v || idx) { 1189 if (nztot) { 1190 /* Sort by increasing column numbers, assuming A and B already sorted */ 1191 int imark = -1; 1192 if (v) { 1193 *v = v_p = mat->rowvalues; 1194 for (i=0; i<nzB; i++) { 1195 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1196 else break; 1197 } 1198 imark = i; 1199 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1200 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1201 } 1202 if (idx) { 1203 *idx = idx_p = mat->rowindices; 1204 if (imark > -1) { 1205 for (i=0; i<imark; i++) { 1206 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1207 } 1208 } else { 1209 for (i=0; i<nzB; i++) { 1210 if (cmap[cworkB[i]/bs] < cstart) 1211 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1212 else break; 1213 } 1214 imark = i; 1215 } 1216 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1217 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1218 } 1219 } else { 1220 if (idx) *idx = 0; 1221 if (v) *v = 0; 1222 } 1223 } 1224 *nz = nztot; 1225 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1226 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1232 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1233 { 1234 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1235 1236 PetscFunctionBegin; 1237 if (baij->getrowactive == PETSC_FALSE) { 1238 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1239 } 1240 baij->getrowactive = PETSC_FALSE; 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1246 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1247 { 1248 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1249 1250 PetscFunctionBegin; 1251 *bs = baij->bs; 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1257 int MatZeroEntries_MPISBAIJ(Mat A) 1258 { 1259 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1260 int ierr; 1261 1262 PetscFunctionBegin; 1263 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1264 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1270 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1271 { 1272 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1273 Mat A = a->A,B = a->B; 1274 int ierr; 1275 PetscReal isend[5],irecv[5]; 1276 1277 PetscFunctionBegin; 1278 info->block_size = (PetscReal)a->bs; 1279 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1280 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1281 isend[3] = info->memory; isend[4] = info->mallocs; 1282 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1283 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1284 isend[3] += info->memory; isend[4] += info->mallocs; 1285 if (flag == MAT_LOCAL) { 1286 info->nz_used = isend[0]; 1287 info->nz_allocated = isend[1]; 1288 info->nz_unneeded = isend[2]; 1289 info->memory = isend[3]; 1290 info->mallocs = isend[4]; 1291 } else if (flag == MAT_GLOBAL_MAX) { 1292 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1293 info->nz_used = irecv[0]; 1294 info->nz_allocated = irecv[1]; 1295 info->nz_unneeded = irecv[2]; 1296 info->memory = irecv[3]; 1297 info->mallocs = irecv[4]; 1298 } else if (flag == MAT_GLOBAL_SUM) { 1299 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1300 info->nz_used = irecv[0]; 1301 info->nz_allocated = irecv[1]; 1302 info->nz_unneeded = irecv[2]; 1303 info->memory = irecv[3]; 1304 info->mallocs = irecv[4]; 1305 } else { 1306 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1307 } 1308 info->rows_global = (PetscReal)A->M; 1309 info->columns_global = (PetscReal)A->N; 1310 info->rows_local = (PetscReal)A->m; 1311 info->columns_local = (PetscReal)A->N; 1312 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1313 info->fill_ratio_needed = 0; 1314 info->factor_mallocs = 0; 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1320 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1321 { 1322 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1323 int ierr; 1324 1325 PetscFunctionBegin; 1326 switch (op) { 1327 case MAT_NO_NEW_NONZERO_LOCATIONS: 1328 case MAT_YES_NEW_NONZERO_LOCATIONS: 1329 case MAT_COLUMNS_UNSORTED: 1330 case MAT_COLUMNS_SORTED: 1331 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1332 case MAT_KEEP_ZEROED_ROWS: 1333 case MAT_NEW_NONZERO_LOCATION_ERR: 1334 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1335 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1336 break; 1337 case MAT_ROW_ORIENTED: 1338 a->roworiented = PETSC_TRUE; 1339 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1340 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1341 break; 1342 case MAT_ROWS_SORTED: 1343 case MAT_ROWS_UNSORTED: 1344 case MAT_YES_NEW_DIAGONALS: 1345 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1346 break; 1347 case MAT_COLUMN_ORIENTED: 1348 a->roworiented = PETSC_FALSE; 1349 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1350 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1351 break; 1352 case MAT_IGNORE_OFF_PROC_ENTRIES: 1353 a->donotstash = PETSC_TRUE; 1354 break; 1355 case MAT_NO_NEW_DIAGONALS: 1356 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1357 case MAT_USE_HASH_TABLE: 1358 a->ht_flag = PETSC_TRUE; 1359 break; 1360 default: 1361 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1362 } 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1368 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1369 { 1370 int ierr; 1371 PetscFunctionBegin; 1372 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 #undef __FUNCT__ 1377 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1378 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1379 { 1380 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1381 Mat a = baij->A,b = baij->B; 1382 int ierr,s1,s2,s3; 1383 1384 PetscFunctionBegin; 1385 if (ll != rr) { 1386 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1387 } 1388 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1389 if (rr) { 1390 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1391 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1392 /* Overlap communication with computation. */ 1393 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1394 /*} if (ll) { */ 1395 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1396 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1397 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1398 /* } */ 1399 /* scale the diagonal block */ 1400 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1401 1402 /* if (rr) { */ 1403 /* Do a scatter end and then right scale the off-diagonal block */ 1404 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1405 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1406 } 1407 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1413 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag) 1414 { 1415 PetscFunctionBegin; 1416 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1417 } 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1421 int MatPrintHelp_MPISBAIJ(Mat A) 1422 { 1423 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1424 MPI_Comm comm = A->comm; 1425 static int called = 0; 1426 int ierr; 1427 1428 PetscFunctionBegin; 1429 if (!a->rank) { 1430 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1431 } 1432 if (called) {PetscFunctionReturn(0);} else called = 1; 1433 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1434 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1440 int MatSetUnfactored_MPISBAIJ(Mat A) 1441 { 1442 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1443 int ierr; 1444 1445 PetscFunctionBegin; 1446 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1447 PetscFunctionReturn(0); 1448 } 1449 1450 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatEqual_MPISBAIJ" 1454 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1455 { 1456 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1457 Mat a,b,c,d; 1458 PetscTruth flg; 1459 int ierr; 1460 1461 PetscFunctionBegin; 1462 a = matA->A; b = matA->B; 1463 c = matB->A; d = matB->B; 1464 1465 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1466 if (flg == PETSC_TRUE) { 1467 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1468 } 1469 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1475 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1476 { 1477 int ierr; 1478 1479 PetscFunctionBegin; 1480 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } 1483 /* -------------------------------------------------------------------*/ 1484 static struct _MatOps MatOps_Values = { 1485 MatSetValues_MPISBAIJ, 1486 MatGetRow_MPISBAIJ, 1487 MatRestoreRow_MPISBAIJ, 1488 MatMult_MPISBAIJ, 1489 MatMultAdd_MPISBAIJ, 1490 MatMultTranspose_MPISBAIJ, 1491 MatMultTransposeAdd_MPISBAIJ, 1492 0, 1493 0, 1494 0, 1495 0, 1496 0, 1497 0, 1498 MatRelax_MPISBAIJ, 1499 MatTranspose_MPISBAIJ, 1500 MatGetInfo_MPISBAIJ, 1501 MatEqual_MPISBAIJ, 1502 MatGetDiagonal_MPISBAIJ, 1503 MatDiagonalScale_MPISBAIJ, 1504 MatNorm_MPISBAIJ, 1505 MatAssemblyBegin_MPISBAIJ, 1506 MatAssemblyEnd_MPISBAIJ, 1507 0, 1508 MatSetOption_MPISBAIJ, 1509 MatZeroEntries_MPISBAIJ, 1510 MatZeroRows_MPISBAIJ, 1511 0, 1512 0, 1513 0, 1514 0, 1515 MatSetUpPreallocation_MPISBAIJ, 1516 0, 1517 0, 1518 0, 1519 0, 1520 MatDuplicate_MPISBAIJ, 1521 0, 1522 0, 1523 0, 1524 0, 1525 0, 1526 0, 1527 0, 1528 MatGetValues_MPISBAIJ, 1529 0, 1530 MatPrintHelp_MPISBAIJ, 1531 MatScale_MPISBAIJ, 1532 0, 1533 0, 1534 0, 1535 MatGetBlockSize_MPISBAIJ, 1536 0, 1537 0, 1538 0, 1539 0, 1540 0, 1541 0, 1542 MatSetUnfactored_MPISBAIJ, 1543 0, 1544 MatSetValuesBlocked_MPISBAIJ, 1545 0, 1546 0, 1547 0, 1548 MatGetPetscMaps_Petsc, 1549 0, 1550 0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 MatGetRowMax_MPISBAIJ}; 1556 1557 1558 EXTERN_C_BEGIN 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1561 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1562 { 1563 PetscFunctionBegin; 1564 *a = ((Mat_MPISBAIJ *)A->data)->A; 1565 *iscopy = PETSC_FALSE; 1566 PetscFunctionReturn(0); 1567 } 1568 EXTERN_C_END 1569 1570 EXTERN_C_BEGIN 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1573 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1574 { 1575 Mat_MPISBAIJ *b; 1576 int ierr,i,mbs,Mbs; 1577 1578 PetscFunctionBegin; 1579 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1580 1581 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1582 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1583 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1584 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1585 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1586 if (d_nnz) { 1587 for (i=0; i<B->m/bs; i++) { 1588 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1589 } 1590 } 1591 if (o_nnz) { 1592 for (i=0; i<B->m/bs; i++) { 1593 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1594 } 1595 } 1596 B->preallocated = PETSC_TRUE; 1597 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1598 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1599 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1600 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1601 1602 b = (Mat_MPISBAIJ*)B->data; 1603 mbs = B->m/bs; 1604 Mbs = B->M/bs; 1605 if (mbs*bs != B->m) { 1606 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1607 } 1608 1609 b->bs = bs; 1610 b->bs2 = bs*bs; 1611 b->mbs = mbs; 1612 b->nbs = mbs; 1613 b->Mbs = Mbs; 1614 b->Nbs = Mbs; 1615 1616 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1617 b->rowners[0] = 0; 1618 for (i=2; i<=b->size; i++) { 1619 b->rowners[i] += b->rowners[i-1]; 1620 } 1621 b->rstart = b->rowners[b->rank]; 1622 b->rend = b->rowners[b->rank+1]; 1623 b->cstart = b->rstart; 1624 b->cend = b->rend; 1625 for (i=0; i<=b->size; i++) { 1626 b->rowners_bs[i] = b->rowners[i]*bs; 1627 } 1628 b->rstart_bs = b-> rstart*bs; 1629 b->rend_bs = b->rend*bs; 1630 1631 b->cstart_bs = b->cstart*bs; 1632 b->cend_bs = b->cend*bs; 1633 1634 1635 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1636 PetscLogObjectParent(B,b->A); 1637 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1638 PetscLogObjectParent(B,b->B); 1639 1640 /* build cache for off array entries formed */ 1641 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1642 1643 PetscFunctionReturn(0); 1644 } 1645 EXTERN_C_END 1646 1647 EXTERN_C_BEGIN 1648 #undef __FUNCT__ 1649 #define __FUNCT__ "MatCreate_MPISBAIJ" 1650 int MatCreate_MPISBAIJ(Mat B) 1651 { 1652 Mat_MPISBAIJ *b; 1653 int ierr; 1654 PetscTruth flg; 1655 1656 PetscFunctionBegin; 1657 1658 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1659 B->data = (void*)b; 1660 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1661 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1662 1663 B->ops->destroy = MatDestroy_MPISBAIJ; 1664 B->ops->view = MatView_MPISBAIJ; 1665 B->mapping = 0; 1666 B->factor = 0; 1667 B->assembled = PETSC_FALSE; 1668 1669 B->insertmode = NOT_SET_VALUES; 1670 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1671 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1672 1673 /* build local table of row and column ownerships */ 1674 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1675 b->cowners = b->rowners + b->size + 2; 1676 b->rowners_bs = b->cowners + b->size + 2; 1677 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1678 1679 /* build cache for off array entries formed */ 1680 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1681 b->donotstash = PETSC_FALSE; 1682 b->colmap = PETSC_NULL; 1683 b->garray = PETSC_NULL; 1684 b->roworiented = PETSC_TRUE; 1685 1686 #if defined(PETSC_USE_MAT_SINGLE) 1687 /* stuff for MatSetValues_XXX in single precision */ 1688 b->setvalueslen = 0; 1689 b->setvaluescopy = PETSC_NULL; 1690 #endif 1691 1692 /* stuff used in block assembly */ 1693 b->barray = 0; 1694 1695 /* stuff used for matrix vector multiply */ 1696 b->lvec = 0; 1697 b->Mvctx = 0; 1698 b->slvec0 = 0; 1699 b->slvec0b = 0; 1700 b->slvec1 = 0; 1701 b->slvec1a = 0; 1702 b->slvec1b = 0; 1703 b->sMvctx = 0; 1704 1705 /* stuff for MatGetRow() */ 1706 b->rowindices = 0; 1707 b->rowvalues = 0; 1708 b->getrowactive = PETSC_FALSE; 1709 1710 /* hash table stuff */ 1711 b->ht = 0; 1712 b->hd = 0; 1713 b->ht_size = 0; 1714 b->ht_flag = PETSC_FALSE; 1715 b->ht_fact = 0; 1716 b->ht_total_ct = 0; 1717 b->ht_insert_ct = 0; 1718 1719 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1720 if (flg) { 1721 PetscReal fact = 1.39; 1722 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1723 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1724 if (fact <= 1.0) fact = 1.39; 1725 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1726 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1727 } 1728 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1729 "MatStoreValues_MPISBAIJ", 1730 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1731 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1732 "MatRetrieveValues_MPISBAIJ", 1733 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1734 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1735 "MatGetDiagonalBlock_MPISBAIJ", 1736 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1737 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1738 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1739 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1740 PetscFunctionReturn(0); 1741 } 1742 EXTERN_C_END 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1746 /*@C 1747 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1748 the user should preallocate the matrix storage by setting the parameters 1749 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1750 performance can be increased by more than a factor of 50. 1751 1752 Collective on Mat 1753 1754 Input Parameters: 1755 + A - the matrix 1756 . bs - size of blockk 1757 . d_nz - number of block nonzeros per block row in diagonal portion of local 1758 submatrix (same for all local rows) 1759 . d_nnz - array containing the number of block nonzeros in the various block rows 1760 in the upper triangular and diagonal part of the in diagonal portion of the local 1761 (possibly different for each block row) or PETSC_NULL. You must leave room 1762 for the diagonal entry even if it is zero. 1763 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1764 submatrix (same for all local rows). 1765 - o_nnz - array containing the number of nonzeros in the various block rows of the 1766 off-diagonal portion of the local submatrix (possibly different for 1767 each block row) or PETSC_NULL. 1768 1769 1770 Options Database Keys: 1771 . -mat_no_unroll - uses code that does not unroll the loops in the 1772 block calculations (much slower) 1773 . -mat_block_size - size of the blocks to use 1774 1775 Notes: 1776 1777 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1778 than it must be used on all processors that share the object for that argument. 1779 1780 Storage Information: 1781 For a square global matrix we define each processor's diagonal portion 1782 to be its local rows and the corresponding columns (a square submatrix); 1783 each processor's off-diagonal portion encompasses the remainder of the 1784 local matrix (a rectangular submatrix). 1785 1786 The user can specify preallocated storage for the diagonal part of 1787 the local submatrix with either d_nz or d_nnz (not both). Set 1788 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1789 memory allocation. Likewise, specify preallocated storage for the 1790 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1791 1792 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1793 the figure below we depict these three local rows and all columns (0-11). 1794 1795 .vb 1796 0 1 2 3 4 5 6 7 8 9 10 11 1797 ------------------- 1798 row 3 | o o o d d d o o o o o o 1799 row 4 | o o o d d d o o o o o o 1800 row 5 | o o o d d d o o o o o o 1801 ------------------- 1802 .ve 1803 1804 Thus, any entries in the d locations are stored in the d (diagonal) 1805 submatrix, and any entries in the o locations are stored in the 1806 o (off-diagonal) submatrix. Note that the d matrix is stored in 1807 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1808 1809 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1810 plus the diagonal part of the d matrix, 1811 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1812 In general, for PDE problems in which most nonzeros are near the diagonal, 1813 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1814 or you will get TERRIBLE performance; see the users' manual chapter on 1815 matrices. 1816 1817 Level: intermediate 1818 1819 .keywords: matrix, block, aij, compressed row, sparse, parallel 1820 1821 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1822 @*/ 1823 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1824 { 1825 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 1826 1827 PetscFunctionBegin; 1828 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1829 if (f) { 1830 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1831 } 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatCreateMPISBAIJ" 1837 /*@C 1838 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1839 (block compressed row). For good matrix assembly performance 1840 the user should preallocate the matrix storage by setting the parameters 1841 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1842 performance can be increased by more than a factor of 50. 1843 1844 Collective on MPI_Comm 1845 1846 Input Parameters: 1847 + comm - MPI communicator 1848 . bs - size of blockk 1849 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1850 This value should be the same as the local size used in creating the 1851 y vector for the matrix-vector product y = Ax. 1852 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1853 This value should be the same as the local size used in creating the 1854 x vector for the matrix-vector product y = Ax. 1855 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1856 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1857 . d_nz - number of block nonzeros per block row in diagonal portion of local 1858 submatrix (same for all local rows) 1859 . d_nnz - array containing the number of block nonzeros in the various block rows 1860 in the upper triangular portion of the in diagonal portion of the local 1861 (possibly different for each block block row) or PETSC_NULL. 1862 You must leave room for the diagonal entry even if it is zero. 1863 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1864 submatrix (same for all local rows). 1865 - o_nnz - array containing the number of nonzeros in the various block rows of the 1866 off-diagonal portion of the local submatrix (possibly different for 1867 each block row) or PETSC_NULL. 1868 1869 Output Parameter: 1870 . A - the matrix 1871 1872 Options Database Keys: 1873 . -mat_no_unroll - uses code that does not unroll the loops in the 1874 block calculations (much slower) 1875 . -mat_block_size - size of the blocks to use 1876 . -mat_mpi - use the parallel matrix data structures even on one processor 1877 (defaults to using SeqBAIJ format on one processor) 1878 1879 Notes: 1880 The user MUST specify either the local or global matrix dimensions 1881 (possibly both). 1882 1883 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1884 than it must be used on all processors that share the object for that argument. 1885 1886 Storage Information: 1887 For a square global matrix we define each processor's diagonal portion 1888 to be its local rows and the corresponding columns (a square submatrix); 1889 each processor's off-diagonal portion encompasses the remainder of the 1890 local matrix (a rectangular submatrix). 1891 1892 The user can specify preallocated storage for the diagonal part of 1893 the local submatrix with either d_nz or d_nnz (not both). Set 1894 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1895 memory allocation. Likewise, specify preallocated storage for the 1896 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1897 1898 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1899 the figure below we depict these three local rows and all columns (0-11). 1900 1901 .vb 1902 0 1 2 3 4 5 6 7 8 9 10 11 1903 ------------------- 1904 row 3 | o o o d d d o o o o o o 1905 row 4 | o o o d d d o o o o o o 1906 row 5 | o o o d d d o o o o o o 1907 ------------------- 1908 .ve 1909 1910 Thus, any entries in the d locations are stored in the d (diagonal) 1911 submatrix, and any entries in the o locations are stored in the 1912 o (off-diagonal) submatrix. Note that the d matrix is stored in 1913 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1914 1915 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1916 plus the diagonal part of the d matrix, 1917 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1918 In general, for PDE problems in which most nonzeros are near the diagonal, 1919 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1920 or you will get TERRIBLE performance; see the users' manual chapter on 1921 matrices. 1922 1923 Level: intermediate 1924 1925 .keywords: matrix, block, aij, compressed row, sparse, parallel 1926 1927 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1928 @*/ 1929 1930 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 1931 { 1932 int ierr,size; 1933 1934 PetscFunctionBegin; 1935 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1936 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1937 if (size > 1) { 1938 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1939 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1940 } else { 1941 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1942 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1943 } 1944 PetscFunctionReturn(0); 1945 } 1946 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1950 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1951 { 1952 Mat mat; 1953 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1954 int ierr,len=0; 1955 1956 PetscFunctionBegin; 1957 *newmat = 0; 1958 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1959 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 1960 mat->preallocated = PETSC_TRUE; 1961 a = (Mat_MPISBAIJ*)mat->data; 1962 a->bs = oldmat->bs; 1963 a->bs2 = oldmat->bs2; 1964 a->mbs = oldmat->mbs; 1965 a->nbs = oldmat->nbs; 1966 a->Mbs = oldmat->Mbs; 1967 a->Nbs = oldmat->Nbs; 1968 1969 a->rstart = oldmat->rstart; 1970 a->rend = oldmat->rend; 1971 a->cstart = oldmat->cstart; 1972 a->cend = oldmat->cend; 1973 a->size = oldmat->size; 1974 a->rank = oldmat->rank; 1975 a->donotstash = oldmat->donotstash; 1976 a->roworiented = oldmat->roworiented; 1977 a->rowindices = 0; 1978 a->rowvalues = 0; 1979 a->getrowactive = PETSC_FALSE; 1980 a->barray = 0; 1981 a->rstart_bs = oldmat->rstart_bs; 1982 a->rend_bs = oldmat->rend_bs; 1983 a->cstart_bs = oldmat->cstart_bs; 1984 a->cend_bs = oldmat->cend_bs; 1985 1986 /* hash table stuff */ 1987 a->ht = 0; 1988 a->hd = 0; 1989 a->ht_size = 0; 1990 a->ht_flag = oldmat->ht_flag; 1991 a->ht_fact = oldmat->ht_fact; 1992 a->ht_total_ct = 0; 1993 a->ht_insert_ct = 0; 1994 1995 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1996 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1997 a->cowners = a->rowners + a->size + 2; 1998 a->rowners_bs = a->cowners + a->size + 2; 1999 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2000 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2001 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2002 if (oldmat->colmap) { 2003 #if defined (PETSC_USE_CTABLE) 2004 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2005 #else 2006 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2007 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2008 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2009 #endif 2010 } else a->colmap = 0; 2011 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2012 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2013 PetscLogObjectMemory(mat,len*sizeof(int)); 2014 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2015 } else a->garray = 0; 2016 2017 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2018 PetscLogObjectParent(mat,a->lvec); 2019 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2020 2021 PetscLogObjectParent(mat,a->Mvctx); 2022 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2023 PetscLogObjectParent(mat,a->A); 2024 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2025 PetscLogObjectParent(mat,a->B); 2026 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2027 *newmat = mat; 2028 PetscFunctionReturn(0); 2029 } 2030 2031 #include "petscsys.h" 2032 2033 EXTERN_C_BEGIN 2034 #undef __FUNCT__ 2035 #define __FUNCT__ "MatLoad_MPISBAIJ" 2036 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2037 { 2038 Mat A; 2039 int i,nz,ierr,j,rstart,rend,fd; 2040 PetscScalar *vals,*buf; 2041 MPI_Comm comm = ((PetscObject)viewer)->comm; 2042 MPI_Status status; 2043 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2044 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2045 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2046 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2047 int dcount,kmax,k,nzcount,tmp; 2048 #if defined(PETSC_HAVE_MUMPS) 2049 PetscTruth flag; 2050 #endif 2051 2052 PetscFunctionBegin; 2053 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2054 2055 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2056 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2057 if (!rank) { 2058 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2059 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2060 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2061 if (header[3] < 0) { 2062 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2063 } 2064 } 2065 2066 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2067 M = header[1]; N = header[2]; 2068 2069 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2070 2071 /* 2072 This code adds extra rows to make sure the number of rows is 2073 divisible by the blocksize 2074 */ 2075 Mbs = M/bs; 2076 extra_rows = bs - M + bs*(Mbs); 2077 if (extra_rows == bs) extra_rows = 0; 2078 else Mbs++; 2079 if (extra_rows &&!rank) { 2080 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2081 } 2082 2083 /* determine ownership of all rows */ 2084 mbs = Mbs/size + ((Mbs % size) > rank); 2085 m = mbs*bs; 2086 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2087 browners = rowners + size + 1; 2088 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2089 rowners[0] = 0; 2090 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2091 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2092 rstart = rowners[rank]; 2093 rend = rowners[rank+1]; 2094 2095 /* distribute row lengths to all processors */ 2096 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2097 if (!rank) { 2098 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2099 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2100 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2101 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2102 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2103 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2104 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2105 } else { 2106 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2107 } 2108 2109 if (!rank) { /* procs[0] */ 2110 /* calculate the number of nonzeros on each processor */ 2111 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2112 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2113 for (i=0; i<size; i++) { 2114 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2115 procsnz[i] += rowlengths[j]; 2116 } 2117 } 2118 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2119 2120 /* determine max buffer needed and allocate it */ 2121 maxnz = 0; 2122 for (i=0; i<size; i++) { 2123 maxnz = PetscMax(maxnz,procsnz[i]); 2124 } 2125 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2126 2127 /* read in my part of the matrix column indices */ 2128 nz = procsnz[0]; 2129 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2130 mycols = ibuf; 2131 if (size == 1) nz -= extra_rows; 2132 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2133 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2134 2135 /* read in every ones (except the last) and ship off */ 2136 for (i=1; i<size-1; i++) { 2137 nz = procsnz[i]; 2138 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2139 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2140 } 2141 /* read in the stuff for the last proc */ 2142 if (size != 1) { 2143 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2144 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2145 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2146 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2147 } 2148 ierr = PetscFree(cols);CHKERRQ(ierr); 2149 } else { /* procs[i], i>0 */ 2150 /* determine buffer space needed for message */ 2151 nz = 0; 2152 for (i=0; i<m; i++) { 2153 nz += locrowlens[i]; 2154 } 2155 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2156 mycols = ibuf; 2157 /* receive message of column indices*/ 2158 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2159 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2160 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2161 } 2162 2163 /* loop over local rows, determining number of off diagonal entries */ 2164 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2165 odlens = dlens + (rend-rstart); 2166 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2167 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2168 masked1 = mask + Mbs; 2169 masked2 = masked1 + Mbs; 2170 rowcount = 0; nzcount = 0; 2171 for (i=0; i<mbs; i++) { 2172 dcount = 0; 2173 odcount = 0; 2174 for (j=0; j<bs; j++) { 2175 kmax = locrowlens[rowcount]; 2176 for (k=0; k<kmax; k++) { 2177 tmp = mycols[nzcount++]/bs; /* block col. index */ 2178 if (!mask[tmp]) { 2179 mask[tmp] = 1; 2180 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2181 else masked1[dcount++] = tmp; /* entry in diag portion */ 2182 } 2183 } 2184 rowcount++; 2185 } 2186 2187 dlens[i] = dcount; /* d_nzz[i] */ 2188 odlens[i] = odcount; /* o_nzz[i] */ 2189 2190 /* zero out the mask elements we set */ 2191 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2192 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2193 } 2194 2195 /* create our matrix */ 2196 ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr); 2197 ierr = MatSetType(A,type);CHKERRQ(ierr); 2198 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2199 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2200 2201 if (!rank) { 2202 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2203 /* read in my part of the matrix numerical values */ 2204 nz = procsnz[0]; 2205 vals = buf; 2206 mycols = ibuf; 2207 if (size == 1) nz -= extra_rows; 2208 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2209 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2210 2211 /* insert into matrix */ 2212 jj = rstart*bs; 2213 for (i=0; i<m; i++) { 2214 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2215 mycols += locrowlens[i]; 2216 vals += locrowlens[i]; 2217 jj++; 2218 } 2219 2220 /* read in other processors (except the last one) and ship out */ 2221 for (i=1; i<size-1; i++) { 2222 nz = procsnz[i]; 2223 vals = buf; 2224 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2225 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2226 } 2227 /* the last proc */ 2228 if (size != 1){ 2229 nz = procsnz[i] - extra_rows; 2230 vals = buf; 2231 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2232 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2233 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2234 } 2235 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2236 2237 } else { 2238 /* receive numeric values */ 2239 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2240 2241 /* receive message of values*/ 2242 vals = buf; 2243 mycols = ibuf; 2244 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2245 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2246 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2247 2248 /* insert into matrix */ 2249 jj = rstart*bs; 2250 for (i=0; i<m; i++) { 2251 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2252 mycols += locrowlens[i]; 2253 vals += locrowlens[i]; 2254 jj++; 2255 } 2256 } 2257 2258 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2259 ierr = PetscFree(buf);CHKERRQ(ierr); 2260 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2261 ierr = PetscFree(rowners);CHKERRQ(ierr); 2262 ierr = PetscFree(dlens);CHKERRQ(ierr); 2263 ierr = PetscFree(mask);CHKERRQ(ierr); 2264 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2265 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2266 #if defined(PETSC_HAVE_MUMPS) 2267 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 2268 if (flag) { 2269 ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); 2270 } 2271 #endif 2272 *newmat = A; 2273 PetscFunctionReturn(0); 2274 } 2275 EXTERN_C_END 2276 2277 #undef __FUNCT__ 2278 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2279 /*@ 2280 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2281 2282 Input Parameters: 2283 . mat - the matrix 2284 . fact - factor 2285 2286 Collective on Mat 2287 2288 Level: advanced 2289 2290 Notes: 2291 This can also be set by the command line option: -mat_use_hash_table fact 2292 2293 .keywords: matrix, hashtable, factor, HT 2294 2295 .seealso: MatSetOption() 2296 @*/ 2297 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2298 { 2299 PetscFunctionBegin; 2300 SETERRQ(1,"Function not yet written for SBAIJ format"); 2301 /* PetscFunctionReturn(0); */ 2302 } 2303 2304 #undef __FUNCT__ 2305 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2306 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2307 { 2308 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2309 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2310 PetscReal atmp; 2311 PetscReal *work,*svalues,*rvalues; 2312 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2313 int rank,size,*rowners_bs,dest,count,source; 2314 PetscScalar *va; 2315 MatScalar *ba; 2316 MPI_Status stat; 2317 2318 PetscFunctionBegin; 2319 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2320 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2321 2322 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2323 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2324 2325 bs = a->bs; 2326 mbs = a->mbs; 2327 Mbs = a->Mbs; 2328 ba = b->a; 2329 bi = b->i; 2330 bj = b->j; 2331 /* 2332 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2333 PetscSynchronizedFlush(A->comm); 2334 */ 2335 2336 /* find ownerships */ 2337 rowners_bs = a->rowners_bs; 2338 /* 2339 if (!rank){ 2340 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2341 } 2342 */ 2343 2344 /* each proc creates an array to be distributed */ 2345 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2346 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2347 2348 /* row_max for B */ 2349 if (rank != size-1){ 2350 for (i=0; i<mbs; i++) { 2351 ncols = bi[1] - bi[0]; bi++; 2352 brow = bs*i; 2353 for (j=0; j<ncols; j++){ 2354 bcol = bs*(*bj); 2355 for (kcol=0; kcol<bs; kcol++){ 2356 col = bcol + kcol; /* local col index */ 2357 col += rowners_bs[rank+1]; /* global col index */ 2358 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2359 for (krow=0; krow<bs; krow++){ 2360 atmp = PetscAbsScalar(*ba); ba++; 2361 row = brow + krow; /* local row index */ 2362 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2363 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2364 if (work[col] < atmp) work[col] = atmp; 2365 } 2366 } 2367 bj++; 2368 } 2369 } 2370 /* 2371 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2372 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2373 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2374 */ 2375 2376 /* send values to its owners */ 2377 for (dest=rank+1; dest<size; dest++){ 2378 svalues = work + rowners_bs[dest]; 2379 count = rowners_bs[dest+1]-rowners_bs[dest]; 2380 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2381 /* 2382 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2383 PetscSynchronizedFlush(A->comm); 2384 */ 2385 } 2386 } 2387 2388 /* receive values */ 2389 if (rank){ 2390 rvalues = work; 2391 count = rowners_bs[rank+1]-rowners_bs[rank]; 2392 for (source=0; source<rank; source++){ 2393 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2394 /* process values */ 2395 for (i=0; i<count; i++){ 2396 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2397 } 2398 /* 2399 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2400 PetscSynchronizedFlush(A->comm); 2401 */ 2402 } 2403 } 2404 2405 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2406 ierr = PetscFree(work);CHKERRQ(ierr); 2407 PetscFunctionReturn(0); 2408 } 2409 2410 #undef __FUNCT__ 2411 #define __FUNCT__ "MatRelax_MPISBAIJ" 2412 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2413 { 2414 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2415 int ierr,mbs=mat->mbs,bs=mat->bs; 2416 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2417 Vec bb1; 2418 2419 PetscFunctionBegin; 2420 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2421 if (bs > 1) 2422 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2423 2424 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2425 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2426 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2427 its--; 2428 } 2429 2430 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2431 while (its--){ 2432 2433 /* lower triangular part: slvec0b = - B^T*xx */ 2434 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2435 2436 /* copy xx into slvec0a */ 2437 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2438 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2439 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2440 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2441 2442 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2443 2444 /* copy bb into slvec1a */ 2445 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2446 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2447 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2448 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2449 2450 /* set slvec1b = 0 */ 2451 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2452 2453 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2454 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2455 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2456 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2457 2458 /* upper triangular part: bb1 = bb1 - B*x */ 2459 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2460 2461 /* local diagonal sweep */ 2462 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2463 } 2464 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2465 } else { 2466 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2467 } 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2473 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2474 { 2475 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2476 int ierr; 2477 PetscScalar mone=-1.0; 2478 Vec lvec1,bb1; 2479 2480 PetscFunctionBegin; 2481 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2482 if (mat->bs > 1) 2483 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2484 2485 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2486 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2487 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2488 its--; 2489 } 2490 2491 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2492 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2493 while (its--){ 2494 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2495 2496 /* lower diagonal part: bb1 = bb - B^T*xx */ 2497 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2498 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2499 2500 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2501 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2502 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2503 2504 /* upper diagonal part: bb1 = bb1 - B*x */ 2505 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2506 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2507 2508 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2509 2510 /* diagonal sweep */ 2511 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2512 } 2513 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2514 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2515 } else { 2516 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2517 } 2518 PetscFunctionReturn(0); 2519 } 2520 2521