1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []); 12 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 14 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 15 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 16 extern int MatPrintHelp_SeqSBAIJ(Mat); 17 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 18 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 19 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 20 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 21 22 /* UGLY, ugly, ugly 23 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 24 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 25 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 26 converts the entries into single precision and then calls ..._MatScalar() to put them 27 into the single precision data structures. 28 */ 29 #if defined(PETSC_USE_MAT_SINGLE) 30 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 31 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 32 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 33 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 35 #else 36 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 37 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 38 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 39 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 40 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 41 #endif 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 46 int MatStoreValues_MPISBAIJ(Mat mat) 47 { 48 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 49 int ierr; 50 51 PetscFunctionBegin; 52 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 53 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 EXTERN_C_END 57 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 61 int MatRetrieveValues_MPISBAIJ(Mat mat) 62 { 63 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 64 int ierr; 65 66 PetscFunctionBegin; 67 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 68 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 69 PetscFunctionReturn(0); 70 } 71 EXTERN_C_END 72 73 /* 74 Local utility routine that creates a mapping from the global column 75 number to the local number in the off-diagonal part of the local 76 storage of the matrix. This is done in a non scable way since the 77 length of colmap equals the global matrix length. 78 */ 79 #undef __FUNCT__ 80 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private" 81 static int CreateColmap_MPISBAIJ_Private(Mat mat) 82 { 83 PetscFunctionBegin; 84 SETERRQ(1,"Function not yet written for SBAIJ format"); 85 /* PetscFunctionReturn(0); */ 86 } 87 88 #define CHUNKSIZE 10 89 90 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 91 { \ 92 \ 93 brow = row/bs; \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow]; nrow = ailen[brow]; \ 96 bcol = col/bs; \ 97 ridx = row % bs; cidx = col % bs; \ 98 low = 0; high = nrow; \ 99 while (high-low > 3) { \ 100 t = (low+high)/2; \ 101 if (rp[t] > bcol) high = t; \ 102 else low = t; \ 103 } \ 104 for (_i=low; _i<high; _i++) { \ 105 if (rp[_i] > bcol) break; \ 106 if (rp[_i] == bcol) { \ 107 bap = ap + bs2*_i + bs*cidx + ridx; \ 108 if (addv == ADD_VALUES) *bap += value; \ 109 else *bap = value; \ 110 goto a_noinsert; \ 111 } \ 112 } \ 113 if (a->nonew == 1) goto a_noinsert; \ 114 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 115 if (nrow >= rmax) { \ 116 /* there is no extra room in row, therefore enlarge */ \ 117 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 118 MatScalar *new_a; \ 119 \ 120 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 121 \ 122 /* malloc new storage space */ \ 123 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 124 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 125 new_j = (int*)(new_a + bs2*new_nz); \ 126 new_i = new_j + new_nz; \ 127 \ 128 /* copy over old data into new slots */ \ 129 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 130 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 131 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 132 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 133 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 134 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 135 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 136 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 137 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 138 /* free up old matrix storage */ \ 139 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 140 if (!a->singlemalloc) { \ 141 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 142 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 143 } \ 144 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 145 a->singlemalloc = PETSC_TRUE; \ 146 \ 147 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 148 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 149 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 150 a->s_maxnz += bs2*CHUNKSIZE; \ 151 a->reallocs++; \ 152 a->s_nz++; \ 153 } \ 154 N = nrow++ - 1; \ 155 /* shift up all the later entries in this row */ \ 156 for (ii=N; ii>=_i; ii--) { \ 157 rp[ii+1] = rp[ii]; \ 158 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 159 } \ 160 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 161 rp[_i] = bcol; \ 162 ap[bs2*_i + bs*cidx + ridx] = value; \ 163 a_noinsert:; \ 164 ailen[brow] = nrow; \ 165 } 166 #ifndef MatSetValues_SeqBAIJ_B_Private 167 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 168 { \ 169 brow = row/bs; \ 170 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 171 rmax = bimax[brow]; nrow = bilen[brow]; \ 172 bcol = col/bs; \ 173 ridx = row % bs; cidx = col % bs; \ 174 low = 0; high = nrow; \ 175 while (high-low > 3) { \ 176 t = (low+high)/2; \ 177 if (rp[t] > bcol) high = t; \ 178 else low = t; \ 179 } \ 180 for (_i=low; _i<high; _i++) { \ 181 if (rp[_i] > bcol) break; \ 182 if (rp[_i] == bcol) { \ 183 bap = ap + bs2*_i + bs*cidx + ridx; \ 184 if (addv == ADD_VALUES) *bap += value; \ 185 else *bap = value; \ 186 goto b_noinsert; \ 187 } \ 188 } \ 189 if (b->nonew == 1) goto b_noinsert; \ 190 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 191 if (nrow >= rmax) { \ 192 /* there is no extra room in row, therefore enlarge */ \ 193 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 194 MatScalar *new_a; \ 195 \ 196 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 197 \ 198 /* malloc new storage space */ \ 199 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 200 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 201 new_j = (int*)(new_a + bs2*new_nz); \ 202 new_i = new_j + new_nz; \ 203 \ 204 /* copy over old data into new slots */ \ 205 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 206 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 207 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 208 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 209 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 210 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 211 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 212 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 213 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 214 /* free up old matrix storage */ \ 215 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 216 if (!b->singlemalloc) { \ 217 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 218 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 219 } \ 220 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 221 b->singlemalloc = PETSC_TRUE; \ 222 \ 223 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 224 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 225 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 226 b->maxnz += bs2*CHUNKSIZE; \ 227 b->reallocs++; \ 228 b->nz++; \ 229 } \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 for (ii=N; ii>=_i; ii--) { \ 233 rp[ii+1] = rp[ii]; \ 234 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 235 } \ 236 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 237 rp[_i] = bcol; \ 238 ap[bs2*_i + bs*cidx + ridx] = value; \ 239 b_noinsert:; \ 240 bilen[brow] = nrow; \ 241 } 242 #endif 243 244 #if defined(PETSC_USE_MAT_SINGLE) 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatSetValues_MPISBAIJ" 247 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 248 { 249 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 250 int ierr,i,N = m*n; 251 MatScalar *vsingle; 252 253 PetscFunctionBegin; 254 if (N > b->setvalueslen) { 255 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 256 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 257 b->setvalueslen = N; 258 } 259 vsingle = b->setvaluescopy; 260 261 for (i=0; i<N; i++) { 262 vsingle[i] = v[i]; 263 } 264 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 270 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 271 { 272 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 273 int ierr,i,N = m*n*b->bs2; 274 MatScalar *vsingle; 275 276 PetscFunctionBegin; 277 if (N > b->setvalueslen) { 278 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 279 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 280 b->setvalueslen = N; 281 } 282 vsingle = b->setvaluescopy; 283 for (i=0; i<N; i++) { 284 vsingle[i] = v[i]; 285 } 286 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 292 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 293 { 294 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 295 int ierr,i,N = m*n; 296 MatScalar *vsingle; 297 298 PetscFunctionBegin; 299 SETERRQ(1,"Function not yet written for SBAIJ format"); 300 /* PetscFunctionReturn(0); */ 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 305 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 306 { 307 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 308 int ierr,i,N = m*n*b->bs2; 309 MatScalar *vsingle; 310 311 PetscFunctionBegin; 312 SETERRQ(1,"Function not yet written for SBAIJ format"); 313 /* PetscFunctionReturn(0); */ 314 } 315 #endif 316 317 /* Only add/insert a(i,j) with i<=j (blocks). 318 Any a(i,j) with i>j input by user is ingored. 319 */ 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 322 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 323 { 324 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 325 MatScalar value; 326 PetscTruth roworiented = baij->roworiented; 327 int ierr,i,j,row,col; 328 int rstart_orig=baij->rstart_bs; 329 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 330 int cend_orig=baij->cend_bs,bs=baij->bs; 331 332 /* Some Variables required in the macro */ 333 Mat A = baij->A; 334 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 335 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 336 MatScalar *aa=a->a; 337 338 Mat B = baij->B; 339 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 340 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 341 MatScalar *ba=b->a; 342 343 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 344 int low,high,t,ridx,cidx,bs2=a->bs2; 345 MatScalar *ap,*bap; 346 347 /* for stash */ 348 int n_loc, *in_loc=0; 349 MatScalar *v_loc=0; 350 351 PetscFunctionBegin; 352 353 if(!baij->donotstash){ 354 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 355 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 356 } 357 358 for (i=0; i<m; i++) { 359 if (im[i] < 0) continue; 360 #if defined(PETSC_USE_BOPT_g) 361 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 362 #endif 363 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 364 row = im[i] - rstart_orig; /* local row index */ 365 for (j=0; j<n; j++) { 366 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 367 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 368 col = in[j] - cstart_orig; /* local col index */ 369 brow = row/bs; bcol = col/bs; 370 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 371 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 372 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 373 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 374 } else if (in[j] < 0) continue; 375 #if defined(PETSC_USE_BOPT_g) 376 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 377 #endif 378 else { /* off-diag entry (B) */ 379 if (mat->was_assembled) { 380 if (!baij->colmap) { 381 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 382 } 383 #if defined (PETSC_USE_CTABLE) 384 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 385 col = col - 1; 386 #else 387 col = baij->colmap[in[j]/bs] - 1; 388 #endif 389 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 390 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 391 col = in[j]; 392 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 393 B = baij->B; 394 b = (Mat_SeqBAIJ*)(B)->data; 395 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 396 ba=b->a; 397 } else col += in[j]%bs; 398 } else col = in[j]; 399 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 400 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 401 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 402 } 403 } 404 } else { /* off processor entry */ 405 if (!baij->donotstash) { 406 n_loc = 0; 407 for (j=0; j<n; j++){ 408 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 409 in_loc[n_loc] = in[j]; 410 if (roworiented) { 411 v_loc[n_loc] = v[i*n+j]; 412 } else { 413 v_loc[n_loc] = v[j*m+i]; 414 } 415 n_loc++; 416 } 417 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 418 } 419 } 420 } 421 422 if(!baij->donotstash){ 423 ierr = PetscFree(in_loc);CHKERRQ(ierr); 424 ierr = PetscFree(v_loc);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 431 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 432 { 433 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 434 const MatScalar *value; 435 MatScalar *barray=baij->barray; 436 PetscTruth roworiented = baij->roworiented; 437 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 438 int rend=baij->rend,cstart=baij->cstart,stepval; 439 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 440 441 PetscFunctionBegin; 442 if(!barray) { 443 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 444 baij->barray = barray; 445 } 446 447 if (roworiented) { 448 stepval = (n-1)*bs; 449 } else { 450 stepval = (m-1)*bs; 451 } 452 for (i=0; i<m; i++) { 453 if (im[i] < 0) continue; 454 #if defined(PETSC_USE_BOPT_g) 455 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 456 #endif 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 for (j=0; j<n; j++) { 460 /* If NumCol = 1 then a copy is not required */ 461 if ((roworiented) && (n == 1)) { 462 barray = (MatScalar*) v + i*bs2; 463 } else if((!roworiented) && (m == 1)) { 464 barray = (MatScalar*) v + j*bs2; 465 } else { /* Here a copy is required */ 466 if (roworiented) { 467 value = v + i*(stepval+bs)*bs + j*bs; 468 } else { 469 value = v + j*(stepval+bs)*bs + i*bs; 470 } 471 for (ii=0; ii<bs; ii++,value+=stepval) { 472 for (jj=0; jj<bs; jj++) { 473 *barray++ = *value++; 474 } 475 } 476 barray -=bs2; 477 } 478 479 if (in[j] >= cstart && in[j] < cend){ 480 col = in[j] - cstart; 481 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 482 } 483 else if (in[j] < 0) continue; 484 #if defined(PETSC_USE_BOPT_g) 485 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 486 #endif 487 else { 488 if (mat->was_assembled) { 489 if (!baij->colmap) { 490 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 491 } 492 493 #if defined(PETSC_USE_BOPT_g) 494 #if defined (PETSC_USE_CTABLE) 495 { int data; 496 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 497 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 498 } 499 #else 500 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 501 #endif 502 #endif 503 #if defined (PETSC_USE_CTABLE) 504 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 505 col = (col - 1)/bs; 506 #else 507 col = (baij->colmap[in[j]] - 1)/bs; 508 #endif 509 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 510 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 511 col = in[j]; 512 } 513 } 514 else col = in[j]; 515 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 516 } 517 } 518 } else { 519 if (!baij->donotstash) { 520 if (roworiented) { 521 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 522 } else { 523 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 524 } 525 } 526 } 527 } 528 PetscFunctionReturn(0); 529 } 530 531 #define HASH_KEY 0.6180339887 532 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 533 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 534 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 535 #undef __FUNCT__ 536 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 537 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 538 { 539 PetscFunctionBegin; 540 SETERRQ(1,"Function not yet written for SBAIJ format"); 541 /* PetscFunctionReturn(0); */ 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 546 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 547 { 548 PetscFunctionBegin; 549 SETERRQ(1,"Function not yet written for SBAIJ format"); 550 /* PetscFunctionReturn(0); */ 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "MatGetValues_MPISBAIJ" 555 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 556 { 557 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 558 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 559 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 560 561 PetscFunctionBegin; 562 for (i=0; i<m; i++) { 563 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 564 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 565 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 566 row = idxm[i] - bsrstart; 567 for (j=0; j<n; j++) { 568 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]); 569 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 570 if (idxn[j] >= bscstart && idxn[j] < bscend){ 571 col = idxn[j] - bscstart; 572 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 573 } else { 574 if (!baij->colmap) { 575 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 576 } 577 #if defined (PETSC_USE_CTABLE) 578 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 579 data --; 580 #else 581 data = baij->colmap[idxn[j]/bs]-1; 582 #endif 583 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 584 else { 585 col = data + idxn[j]%bs; 586 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 587 } 588 } 589 } 590 } else { 591 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 592 } 593 } 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatNorm_MPISBAIJ" 599 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 600 { 601 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 602 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 603 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 604 int ierr; 605 PetscReal sum[2],*lnorm2; 606 607 PetscFunctionBegin; 608 if (baij->size == 1) { 609 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 610 } else { 611 if (type == NORM_FROBENIUS) { 612 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 613 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 614 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 615 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 616 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 617 /* 618 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 619 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 620 */ 621 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 622 /* 623 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 624 PetscSynchronizedFlush(mat->comm); */ 625 626 *norm = sqrt(sum[0] + 2*sum[1]); 627 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 628 } else { 629 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 Creates the hash table, and sets the table 637 This table is created only once. 638 If new entried need to be added to the matrix 639 then the hash table has to be destroyed and 640 recreated. 641 */ 642 #undef __FUNCT__ 643 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 644 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 645 { 646 PetscFunctionBegin; 647 SETERRQ(1,"Function not yet written for SBAIJ format"); 648 /* PetscFunctionReturn(0); */ 649 } 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 653 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 654 { 655 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 656 int ierr,nstash,reallocs; 657 InsertMode addv; 658 659 PetscFunctionBegin; 660 if (baij->donotstash) { 661 PetscFunctionReturn(0); 662 } 663 664 /* make sure all processors are either in INSERTMODE or ADDMODE */ 665 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 666 if (addv == (ADD_VALUES|INSERT_VALUES)) { 667 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 668 } 669 mat->insertmode = addv; /* in case this processor had no cache */ 670 671 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 672 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 673 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 674 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 675 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 676 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 682 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 683 { 684 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 685 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 686 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 687 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 688 int *row,*col,other_disassembled; 689 PetscTruth r1,r2,r3; 690 MatScalar *val; 691 InsertMode addv = mat->insertmode; 692 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 693 PetscTruth flag; 694 #endif 695 696 PetscFunctionBegin; 697 698 if (!baij->donotstash) { 699 while (1) { 700 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 701 /* 702 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 703 PetscSynchronizedFlush(mat->comm); 704 */ 705 if (!flg) break; 706 707 for (i=0; i<n;) { 708 /* Now identify the consecutive vals belonging to the same row */ 709 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 710 if (j < n) ncols = j-i; 711 else ncols = n-i; 712 /* Now assemble all these values with a single function call */ 713 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 /* Now process the block-stash. Since the values are stashed column-oriented, 719 set the roworiented flag to column oriented, and after MatSetValues() 720 restore the original flags */ 721 r1 = baij->roworiented; 722 r2 = a->roworiented; 723 r3 = b->roworiented; 724 baij->roworiented = PETSC_FALSE; 725 a->roworiented = PETSC_FALSE; 726 b->roworiented = PETSC_FALSE; 727 while (1) { 728 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 729 if (!flg) break; 730 731 for (i=0; i<n;) { 732 /* Now identify the consecutive vals belonging to the same row */ 733 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 734 if (j < n) ncols = j-i; 735 else ncols = n-i; 736 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 737 i = j; 738 } 739 } 740 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 741 baij->roworiented = r1; 742 a->roworiented = r2; 743 b->roworiented = r3; 744 } 745 746 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 747 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 748 749 /* determine if any processor has disassembled, if so we must 750 also disassemble ourselfs, in order that we may reassemble. */ 751 /* 752 if nonzero structure of submatrix B cannot change then we know that 753 no processor disassembled thus we can skip this stuff 754 */ 755 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 756 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 757 if (mat->was_assembled && !other_disassembled) { 758 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 759 } 760 } 761 762 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 763 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 764 } 765 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 767 768 #if defined(PETSC_USE_BOPT_g) 769 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 770 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 771 baij->ht_total_ct = 0; 772 baij->ht_insert_ct = 0; 773 } 774 #endif 775 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 776 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 777 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 778 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 779 } 780 781 if (baij->rowvalues) { 782 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 783 baij->rowvalues = 0; 784 } 785 786 #if defined(PETSC_HAVE_SPOOLES) 787 ierr = PetscOptionsHasName(mat->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 788 if (flag) { ierr = MatUseSpooles_MPISBAIJ(mat);CHKERRQ(ierr); } 789 #endif 790 #if defined(PETSC_HAVE_MUMPS) 791 ierr = PetscOptionsHasName(mat->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 792 if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); } 793 #endif 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 799 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 800 { 801 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 802 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 803 PetscTruth isascii,isdraw; 804 PetscViewer sviewer; 805 PetscViewerFormat format; 806 807 PetscFunctionBegin; 808 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 809 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 810 if (isascii) { 811 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 812 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 813 MatInfo info; 814 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 815 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 816 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 817 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 818 baij->bs,(int)info.memory);CHKERRQ(ierr); 819 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 820 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 821 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 822 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 823 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 824 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 825 PetscFunctionReturn(0); 826 } else if (format == PETSC_VIEWER_ASCII_INFO) { 827 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 828 PetscFunctionReturn(0); 829 } 830 } 831 832 if (isdraw) { 833 PetscDraw draw; 834 PetscTruth isnull; 835 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 836 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 837 } 838 839 if (size == 1) { 840 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 841 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 842 } else { 843 /* assemble the entire matrix onto first processor. */ 844 Mat A; 845 Mat_SeqSBAIJ *Aloc; 846 Mat_SeqBAIJ *Bloc; 847 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 848 MatScalar *a; 849 850 if (!rank) { 851 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 852 } else { 853 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 854 } 855 PetscLogObjectParent(mat,A); 856 857 /* copy over the A part */ 858 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 859 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 860 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 861 862 for (i=0; i<mbs; i++) { 863 rvals[0] = bs*(baij->rstart + i); 864 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 865 for (j=ai[i]; j<ai[i+1]; j++) { 866 col = (baij->cstart+aj[j])*bs; 867 for (k=0; k<bs; k++) { 868 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 869 col++; a += bs; 870 } 871 } 872 } 873 /* copy over the B part */ 874 Bloc = (Mat_SeqBAIJ*)baij->B->data; 875 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 876 for (i=0; i<mbs; i++) { 877 rvals[0] = bs*(baij->rstart + i); 878 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 879 for (j=ai[i]; j<ai[i+1]; j++) { 880 col = baij->garray[aj[j]]*bs; 881 for (k=0; k<bs; k++) { 882 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 883 col++; a += bs; 884 } 885 } 886 } 887 ierr = PetscFree(rvals);CHKERRQ(ierr); 888 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 889 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 890 /* 891 Everyone has to call to draw the matrix since the graphics waits are 892 synchronized across all processors that share the PetscDraw object 893 */ 894 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 895 if (!rank) { 896 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 897 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 898 } 899 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 900 ierr = MatDestroy(A);CHKERRQ(ierr); 901 } 902 PetscFunctionReturn(0); 903 } 904 905 #undef __FUNCT__ 906 #define __FUNCT__ "MatView_MPISBAIJ" 907 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 908 { 909 int ierr; 910 PetscTruth isascii,isdraw,issocket,isbinary; 911 912 PetscFunctionBegin; 913 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 914 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 915 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 916 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 917 if (isascii || isdraw || issocket || isbinary) { 918 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 919 } else { 920 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 921 } 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatDestroy_MPISBAIJ" 927 int MatDestroy_MPISBAIJ(Mat mat) 928 { 929 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 930 int ierr; 931 932 PetscFunctionBegin; 933 #if defined(PETSC_USE_LOG) 934 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 935 #endif 936 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 937 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 938 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 939 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 940 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 941 #if defined (PETSC_USE_CTABLE) 942 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 943 #else 944 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 945 #endif 946 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 947 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 948 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 949 if (baij->slvec0) { 950 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 951 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 952 } 953 if (baij->slvec1) { 954 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 955 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 956 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 957 } 958 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 959 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 960 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 961 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 962 #if defined(PETSC_USE_MAT_SINGLE) 963 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 964 #endif 965 ierr = PetscFree(baij);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMult_MPISBAIJ" 971 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 972 { 973 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 974 int ierr,nt,mbs=a->mbs,bs=a->bs; 975 PetscScalar *x,*from,zero=0.0; 976 977 PetscFunctionBegin; 978 /* 979 PetscSynchronizedPrintf(A->comm," _1comm is called ...\n"); 980 PetscSynchronizedFlush(A->comm); 981 */ 982 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 983 if (nt != A->n) { 984 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 985 } 986 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 987 if (nt != A->m) { 988 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 989 } 990 991 /* diagonal part */ 992 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 993 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 994 995 /* subdiagonal part */ 996 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 997 998 /* copy x into the vec slvec0 */ 999 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1000 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1001 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1002 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1003 1004 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1005 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1007 1008 /* supperdiagonal part */ 1009 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1010 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1016 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1017 { 1018 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1019 int ierr,nt; 1020 1021 PetscFunctionBegin; 1022 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1023 if (nt != A->n) { 1024 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1025 } 1026 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1027 if (nt != A->m) { 1028 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1029 } 1030 1031 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1032 /* do diagonal part */ 1033 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1034 /* do supperdiagonal part */ 1035 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1036 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1037 /* do subdiagonal part */ 1038 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1039 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1040 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1041 1042 PetscFunctionReturn(0); 1043 } 1044 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1047 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1048 { 1049 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1050 int ierr,mbs=a->mbs,bs=a->bs; 1051 PetscScalar *x,*from,zero=0.0; 1052 1053 PetscFunctionBegin; 1054 /* 1055 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1056 PetscSynchronizedFlush(A->comm); 1057 */ 1058 /* diagonal part */ 1059 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1060 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1061 1062 /* subdiagonal part */ 1063 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1064 1065 /* copy x into the vec slvec0 */ 1066 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1067 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1068 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1069 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1070 1071 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1072 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1073 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1074 1075 /* supperdiagonal part */ 1076 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1077 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1083 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1084 { 1085 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1086 int ierr; 1087 1088 PetscFunctionBegin; 1089 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1090 /* do diagonal part */ 1091 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1092 /* do supperdiagonal part */ 1093 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1094 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1095 1096 /* do subdiagonal part */ 1097 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1098 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1099 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1100 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1106 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1107 { 1108 PetscFunctionBegin; 1109 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1110 /* PetscFunctionReturn(0); */ 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1115 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1116 { 1117 PetscFunctionBegin; 1118 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1119 /* PetscFunctionReturn(0); */ 1120 } 1121 1122 /* 1123 This only works correctly for square matrices where the subblock A->A is the 1124 diagonal block 1125 */ 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1128 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1129 { 1130 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1131 int ierr; 1132 1133 PetscFunctionBegin; 1134 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1135 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1136 PetscFunctionReturn(0); 1137 } 1138 1139 #undef __FUNCT__ 1140 #define __FUNCT__ "MatScale_MPISBAIJ" 1141 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A) 1142 { 1143 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1144 int ierr; 1145 1146 PetscFunctionBegin; 1147 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1148 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1154 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1155 { 1156 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1157 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1158 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1159 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1160 int *cmap,*idx_p,cstart = mat->cstart; 1161 1162 PetscFunctionBegin; 1163 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1164 mat->getrowactive = PETSC_TRUE; 1165 1166 if (!mat->rowvalues && (idx || v)) { 1167 /* 1168 allocate enough space to hold information from the longest row. 1169 */ 1170 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1171 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1172 int max = 1,mbs = mat->mbs,tmp; 1173 for (i=0; i<mbs; i++) { 1174 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1175 if (max < tmp) { max = tmp; } 1176 } 1177 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1178 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1179 } 1180 1181 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1182 lrow = row - brstart; /* local row index */ 1183 1184 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1185 if (!v) {pvA = 0; pvB = 0;} 1186 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1187 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1188 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1189 nztot = nzA + nzB; 1190 1191 cmap = mat->garray; 1192 if (v || idx) { 1193 if (nztot) { 1194 /* Sort by increasing column numbers, assuming A and B already sorted */ 1195 int imark = -1; 1196 if (v) { 1197 *v = v_p = mat->rowvalues; 1198 for (i=0; i<nzB; i++) { 1199 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1200 else break; 1201 } 1202 imark = i; 1203 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1204 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1205 } 1206 if (idx) { 1207 *idx = idx_p = mat->rowindices; 1208 if (imark > -1) { 1209 for (i=0; i<imark; i++) { 1210 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1211 } 1212 } else { 1213 for (i=0; i<nzB; i++) { 1214 if (cmap[cworkB[i]/bs] < cstart) 1215 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1216 else break; 1217 } 1218 imark = i; 1219 } 1220 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1221 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1222 } 1223 } else { 1224 if (idx) *idx = 0; 1225 if (v) *v = 0; 1226 } 1227 } 1228 *nz = nztot; 1229 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1230 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1236 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1237 { 1238 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1239 1240 PetscFunctionBegin; 1241 if (baij->getrowactive == PETSC_FALSE) { 1242 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1243 } 1244 baij->getrowactive = PETSC_FALSE; 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1250 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1251 { 1252 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1253 1254 PetscFunctionBegin; 1255 *bs = baij->bs; 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1261 int MatZeroEntries_MPISBAIJ(Mat A) 1262 { 1263 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1264 int ierr; 1265 1266 PetscFunctionBegin; 1267 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1268 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 #undef __FUNCT__ 1273 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1274 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1275 { 1276 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1277 Mat A = a->A,B = a->B; 1278 int ierr; 1279 PetscReal isend[5],irecv[5]; 1280 1281 PetscFunctionBegin; 1282 info->block_size = (PetscReal)a->bs; 1283 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1284 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1285 isend[3] = info->memory; isend[4] = info->mallocs; 1286 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1287 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1288 isend[3] += info->memory; isend[4] += info->mallocs; 1289 if (flag == MAT_LOCAL) { 1290 info->nz_used = isend[0]; 1291 info->nz_allocated = isend[1]; 1292 info->nz_unneeded = isend[2]; 1293 info->memory = isend[3]; 1294 info->mallocs = isend[4]; 1295 } else if (flag == MAT_GLOBAL_MAX) { 1296 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1297 info->nz_used = irecv[0]; 1298 info->nz_allocated = irecv[1]; 1299 info->nz_unneeded = irecv[2]; 1300 info->memory = irecv[3]; 1301 info->mallocs = irecv[4]; 1302 } else if (flag == MAT_GLOBAL_SUM) { 1303 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1304 info->nz_used = irecv[0]; 1305 info->nz_allocated = irecv[1]; 1306 info->nz_unneeded = irecv[2]; 1307 info->memory = irecv[3]; 1308 info->mallocs = irecv[4]; 1309 } else { 1310 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1311 } 1312 info->rows_global = (PetscReal)A->M; 1313 info->columns_global = (PetscReal)A->N; 1314 info->rows_local = (PetscReal)A->m; 1315 info->columns_local = (PetscReal)A->N; 1316 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1317 info->fill_ratio_needed = 0; 1318 info->factor_mallocs = 0; 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1324 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1325 { 1326 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1327 int ierr; 1328 1329 PetscFunctionBegin; 1330 switch (op) { 1331 case MAT_NO_NEW_NONZERO_LOCATIONS: 1332 case MAT_YES_NEW_NONZERO_LOCATIONS: 1333 case MAT_COLUMNS_UNSORTED: 1334 case MAT_COLUMNS_SORTED: 1335 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1336 case MAT_KEEP_ZEROED_ROWS: 1337 case MAT_NEW_NONZERO_LOCATION_ERR: 1338 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1339 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1340 break; 1341 case MAT_ROW_ORIENTED: 1342 a->roworiented = PETSC_TRUE; 1343 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1344 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1345 break; 1346 case MAT_ROWS_SORTED: 1347 case MAT_ROWS_UNSORTED: 1348 case MAT_YES_NEW_DIAGONALS: 1349 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1350 break; 1351 case MAT_COLUMN_ORIENTED: 1352 a->roworiented = PETSC_FALSE; 1353 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1354 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1355 break; 1356 case MAT_IGNORE_OFF_PROC_ENTRIES: 1357 a->donotstash = PETSC_TRUE; 1358 break; 1359 case MAT_NO_NEW_DIAGONALS: 1360 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1361 case MAT_USE_HASH_TABLE: 1362 a->ht_flag = PETSC_TRUE; 1363 break; 1364 default: 1365 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1366 } 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1372 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1373 { 1374 int ierr; 1375 PetscFunctionBegin; 1376 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1382 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1383 { 1384 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1385 Mat a = baij->A,b = baij->B; 1386 int ierr,s1,s2,s3; 1387 1388 PetscFunctionBegin; 1389 if (ll != rr) { 1390 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1391 } 1392 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1393 if (rr) { 1394 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1395 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1396 /* Overlap communication with computation. */ 1397 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1398 /*} if (ll) { */ 1399 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1400 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1401 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1402 /* } */ 1403 /* scale the diagonal block */ 1404 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1405 1406 /* if (rr) { */ 1407 /* Do a scatter end and then right scale the off-diagonal block */ 1408 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1409 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1410 } 1411 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1417 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag) 1418 { 1419 PetscFunctionBegin; 1420 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1425 int MatPrintHelp_MPISBAIJ(Mat A) 1426 { 1427 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1428 MPI_Comm comm = A->comm; 1429 static int called = 0; 1430 int ierr; 1431 1432 PetscFunctionBegin; 1433 if (!a->rank) { 1434 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1435 } 1436 if (called) {PetscFunctionReturn(0);} else called = 1; 1437 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1438 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1444 int MatSetUnfactored_MPISBAIJ(Mat A) 1445 { 1446 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1447 int ierr; 1448 1449 PetscFunctionBegin; 1450 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1451 PetscFunctionReturn(0); 1452 } 1453 1454 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1455 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "MatEqual_MPISBAIJ" 1458 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1459 { 1460 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1461 Mat a,b,c,d; 1462 PetscTruth flg; 1463 int ierr; 1464 1465 PetscFunctionBegin; 1466 a = matA->A; b = matA->B; 1467 c = matB->A; d = matB->B; 1468 1469 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1470 if (flg == PETSC_TRUE) { 1471 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1472 } 1473 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 #undef __FUNCT__ 1478 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1479 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1480 { 1481 int ierr; 1482 1483 PetscFunctionBegin; 1484 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 /* -------------------------------------------------------------------*/ 1488 static struct _MatOps MatOps_Values = { 1489 MatSetValues_MPISBAIJ, 1490 MatGetRow_MPISBAIJ, 1491 MatRestoreRow_MPISBAIJ, 1492 MatMult_MPISBAIJ, 1493 MatMultAdd_MPISBAIJ, 1494 MatMultTranspose_MPISBAIJ, 1495 MatMultTransposeAdd_MPISBAIJ, 1496 0, 1497 0, 1498 0, 1499 0, 1500 0, 1501 0, 1502 MatRelax_MPISBAIJ, 1503 MatTranspose_MPISBAIJ, 1504 MatGetInfo_MPISBAIJ, 1505 MatEqual_MPISBAIJ, 1506 MatGetDiagonal_MPISBAIJ, 1507 MatDiagonalScale_MPISBAIJ, 1508 MatNorm_MPISBAIJ, 1509 MatAssemblyBegin_MPISBAIJ, 1510 MatAssemblyEnd_MPISBAIJ, 1511 0, 1512 MatSetOption_MPISBAIJ, 1513 MatZeroEntries_MPISBAIJ, 1514 MatZeroRows_MPISBAIJ, 1515 0, 1516 0, 1517 0, 1518 0, 1519 MatSetUpPreallocation_MPISBAIJ, 1520 0, 1521 0, 1522 0, 1523 0, 1524 MatDuplicate_MPISBAIJ, 1525 0, 1526 0, 1527 0, 1528 0, 1529 0, 1530 0, 1531 0, 1532 MatGetValues_MPISBAIJ, 1533 0, 1534 MatPrintHelp_MPISBAIJ, 1535 MatScale_MPISBAIJ, 1536 0, 1537 0, 1538 0, 1539 MatGetBlockSize_MPISBAIJ, 1540 0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 MatSetUnfactored_MPISBAIJ, 1547 0, 1548 MatSetValuesBlocked_MPISBAIJ, 1549 0, 1550 0, 1551 0, 1552 MatGetPetscMaps_Petsc, 1553 0, 1554 0, 1555 0, 1556 0, 1557 0, 1558 0, 1559 MatGetRowMax_MPISBAIJ}; 1560 1561 1562 EXTERN_C_BEGIN 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1565 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1566 { 1567 PetscFunctionBegin; 1568 *a = ((Mat_MPISBAIJ *)A->data)->A; 1569 *iscopy = PETSC_FALSE; 1570 PetscFunctionReturn(0); 1571 } 1572 EXTERN_C_END 1573 1574 EXTERN_C_BEGIN 1575 #undef __FUNCT__ 1576 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1577 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1578 { 1579 Mat_MPISBAIJ *b; 1580 int ierr,i,mbs,Mbs; 1581 1582 PetscFunctionBegin; 1583 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1584 1585 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1586 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1587 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1588 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1589 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1590 if (d_nnz) { 1591 for (i=0; i<B->m/bs; i++) { 1592 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1593 } 1594 } 1595 if (o_nnz) { 1596 for (i=0; i<B->m/bs; i++) { 1597 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1598 } 1599 } 1600 B->preallocated = PETSC_TRUE; 1601 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1602 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1603 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1604 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1605 1606 b = (Mat_MPISBAIJ*)B->data; 1607 mbs = B->m/bs; 1608 Mbs = B->M/bs; 1609 if (mbs*bs != B->m) { 1610 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1611 } 1612 1613 b->bs = bs; 1614 b->bs2 = bs*bs; 1615 b->mbs = mbs; 1616 b->nbs = mbs; 1617 b->Mbs = Mbs; 1618 b->Nbs = Mbs; 1619 1620 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1621 b->rowners[0] = 0; 1622 for (i=2; i<=b->size; i++) { 1623 b->rowners[i] += b->rowners[i-1]; 1624 } 1625 b->rstart = b->rowners[b->rank]; 1626 b->rend = b->rowners[b->rank+1]; 1627 b->cstart = b->rstart; 1628 b->cend = b->rend; 1629 for (i=0; i<=b->size; i++) { 1630 b->rowners_bs[i] = b->rowners[i]*bs; 1631 } 1632 b->rstart_bs = b-> rstart*bs; 1633 b->rend_bs = b->rend*bs; 1634 1635 b->cstart_bs = b->cstart*bs; 1636 b->cend_bs = b->cend*bs; 1637 1638 1639 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1640 PetscLogObjectParent(B,b->A); 1641 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1642 PetscLogObjectParent(B,b->B); 1643 1644 /* build cache for off array entries formed */ 1645 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1646 1647 PetscFunctionReturn(0); 1648 } 1649 EXTERN_C_END 1650 1651 EXTERN_C_BEGIN 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatCreate_MPISBAIJ" 1654 int MatCreate_MPISBAIJ(Mat B) 1655 { 1656 Mat_MPISBAIJ *b; 1657 int ierr; 1658 PetscTruth flg; 1659 1660 PetscFunctionBegin; 1661 1662 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1663 B->data = (void*)b; 1664 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1665 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1666 1667 B->ops->destroy = MatDestroy_MPISBAIJ; 1668 B->ops->view = MatView_MPISBAIJ; 1669 B->mapping = 0; 1670 B->factor = 0; 1671 B->assembled = PETSC_FALSE; 1672 1673 B->insertmode = NOT_SET_VALUES; 1674 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1675 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1676 1677 /* build local table of row and column ownerships */ 1678 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1679 b->cowners = b->rowners + b->size + 2; 1680 b->rowners_bs = b->cowners + b->size + 2; 1681 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1682 1683 /* build cache for off array entries formed */ 1684 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1685 b->donotstash = PETSC_FALSE; 1686 b->colmap = PETSC_NULL; 1687 b->garray = PETSC_NULL; 1688 b->roworiented = PETSC_TRUE; 1689 1690 #if defined(PETSC_USE_MAT_SINGLE) 1691 /* stuff for MatSetValues_XXX in single precision */ 1692 b->setvalueslen = 0; 1693 b->setvaluescopy = PETSC_NULL; 1694 #endif 1695 1696 /* stuff used in block assembly */ 1697 b->barray = 0; 1698 1699 /* stuff used for matrix vector multiply */ 1700 b->lvec = 0; 1701 b->Mvctx = 0; 1702 b->slvec0 = 0; 1703 b->slvec0b = 0; 1704 b->slvec1 = 0; 1705 b->slvec1a = 0; 1706 b->slvec1b = 0; 1707 b->sMvctx = 0; 1708 1709 /* stuff for MatGetRow() */ 1710 b->rowindices = 0; 1711 b->rowvalues = 0; 1712 b->getrowactive = PETSC_FALSE; 1713 1714 /* hash table stuff */ 1715 b->ht = 0; 1716 b->hd = 0; 1717 b->ht_size = 0; 1718 b->ht_flag = PETSC_FALSE; 1719 b->ht_fact = 0; 1720 b->ht_total_ct = 0; 1721 b->ht_insert_ct = 0; 1722 1723 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1724 if (flg) { 1725 PetscReal fact = 1.39; 1726 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1727 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1728 if (fact <= 1.0) fact = 1.39; 1729 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1730 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1731 } 1732 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1733 "MatStoreValues_MPISBAIJ", 1734 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1735 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1736 "MatRetrieveValues_MPISBAIJ", 1737 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1738 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1739 "MatGetDiagonalBlock_MPISBAIJ", 1740 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1741 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1742 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1743 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1744 PetscFunctionReturn(0); 1745 } 1746 EXTERN_C_END 1747 1748 #undef __FUNCT__ 1749 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1750 /*@C 1751 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1752 the user should preallocate the matrix storage by setting the parameters 1753 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1754 performance can be increased by more than a factor of 50. 1755 1756 Collective on Mat 1757 1758 Input Parameters: 1759 + A - the matrix 1760 . bs - size of blockk 1761 . d_nz - number of block nonzeros per block row in diagonal portion of local 1762 submatrix (same for all local rows) 1763 . d_nnz - array containing the number of block nonzeros in the various block rows 1764 in the upper triangular and diagonal part of the in diagonal portion of the local 1765 (possibly different for each block row) or PETSC_NULL. You must leave room 1766 for the diagonal entry even if it is zero. 1767 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1768 submatrix (same for all local rows). 1769 - o_nnz - array containing the number of nonzeros in the various block rows of the 1770 off-diagonal portion of the local submatrix (possibly different for 1771 each block row) or PETSC_NULL. 1772 1773 1774 Options Database Keys: 1775 . -mat_no_unroll - uses code that does not unroll the loops in the 1776 block calculations (much slower) 1777 . -mat_block_size - size of the blocks to use 1778 1779 Notes: 1780 1781 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1782 than it must be used on all processors that share the object for that argument. 1783 1784 Storage Information: 1785 For a square global matrix we define each processor's diagonal portion 1786 to be its local rows and the corresponding columns (a square submatrix); 1787 each processor's off-diagonal portion encompasses the remainder of the 1788 local matrix (a rectangular submatrix). 1789 1790 The user can specify preallocated storage for the diagonal part of 1791 the local submatrix with either d_nz or d_nnz (not both). Set 1792 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1793 memory allocation. Likewise, specify preallocated storage for the 1794 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1795 1796 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1797 the figure below we depict these three local rows and all columns (0-11). 1798 1799 .vb 1800 0 1 2 3 4 5 6 7 8 9 10 11 1801 ------------------- 1802 row 3 | o o o d d d o o o o o o 1803 row 4 | o o o d d d o o o o o o 1804 row 5 | o o o d d d o o o o o o 1805 ------------------- 1806 .ve 1807 1808 Thus, any entries in the d locations are stored in the d (diagonal) 1809 submatrix, and any entries in the o locations are stored in the 1810 o (off-diagonal) submatrix. Note that the d matrix is stored in 1811 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1812 1813 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1814 plus the diagonal part of the d matrix, 1815 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1816 In general, for PDE problems in which most nonzeros are near the diagonal, 1817 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1818 or you will get TERRIBLE performance; see the users' manual chapter on 1819 matrices. 1820 1821 Level: intermediate 1822 1823 .keywords: matrix, block, aij, compressed row, sparse, parallel 1824 1825 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1826 @*/ 1827 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1828 { 1829 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 1830 1831 PetscFunctionBegin; 1832 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1833 if (f) { 1834 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1835 } 1836 PetscFunctionReturn(0); 1837 } 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatCreateMPISBAIJ" 1841 /*@C 1842 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1843 (block compressed row). For good matrix assembly performance 1844 the user should preallocate the matrix storage by setting the parameters 1845 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1846 performance can be increased by more than a factor of 50. 1847 1848 Collective on MPI_Comm 1849 1850 Input Parameters: 1851 + comm - MPI communicator 1852 . bs - size of blockk 1853 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1854 This value should be the same as the local size used in creating the 1855 y vector for the matrix-vector product y = Ax. 1856 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1857 This value should be the same as the local size used in creating the 1858 x vector for the matrix-vector product y = Ax. 1859 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1860 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1861 . d_nz - number of block nonzeros per block row in diagonal portion of local 1862 submatrix (same for all local rows) 1863 . d_nnz - array containing the number of block nonzeros in the various block rows 1864 in the upper triangular portion of the in diagonal portion of the local 1865 (possibly different for each block block row) or PETSC_NULL. 1866 You must leave room for the diagonal entry even if it is zero. 1867 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1868 submatrix (same for all local rows). 1869 - o_nnz - array containing the number of nonzeros in the various block rows of the 1870 off-diagonal portion of the local submatrix (possibly different for 1871 each block row) or PETSC_NULL. 1872 1873 Output Parameter: 1874 . A - the matrix 1875 1876 Options Database Keys: 1877 . -mat_no_unroll - uses code that does not unroll the loops in the 1878 block calculations (much slower) 1879 . -mat_block_size - size of the blocks to use 1880 . -mat_mpi - use the parallel matrix data structures even on one processor 1881 (defaults to using SeqBAIJ format on one processor) 1882 1883 Notes: 1884 The user MUST specify either the local or global matrix dimensions 1885 (possibly both). 1886 1887 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1888 than it must be used on all processors that share the object for that argument. 1889 1890 Storage Information: 1891 For a square global matrix we define each processor's diagonal portion 1892 to be its local rows and the corresponding columns (a square submatrix); 1893 each processor's off-diagonal portion encompasses the remainder of the 1894 local matrix (a rectangular submatrix). 1895 1896 The user can specify preallocated storage for the diagonal part of 1897 the local submatrix with either d_nz or d_nnz (not both). Set 1898 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1899 memory allocation. Likewise, specify preallocated storage for the 1900 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1901 1902 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1903 the figure below we depict these three local rows and all columns (0-11). 1904 1905 .vb 1906 0 1 2 3 4 5 6 7 8 9 10 11 1907 ------------------- 1908 row 3 | o o o d d d o o o o o o 1909 row 4 | o o o d d d o o o o o o 1910 row 5 | o o o d d d o o o o o o 1911 ------------------- 1912 .ve 1913 1914 Thus, any entries in the d locations are stored in the d (diagonal) 1915 submatrix, and any entries in the o locations are stored in the 1916 o (off-diagonal) submatrix. Note that the d matrix is stored in 1917 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1918 1919 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1920 plus the diagonal part of the d matrix, 1921 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1922 In general, for PDE problems in which most nonzeros are near the diagonal, 1923 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1924 or you will get TERRIBLE performance; see the users' manual chapter on 1925 matrices. 1926 1927 Level: intermediate 1928 1929 .keywords: matrix, block, aij, compressed row, sparse, parallel 1930 1931 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1932 @*/ 1933 1934 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 1935 { 1936 int ierr,size; 1937 1938 PetscFunctionBegin; 1939 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1940 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1941 if (size > 1) { 1942 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1943 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1944 } else { 1945 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1946 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1954 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1955 { 1956 Mat mat; 1957 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1958 int ierr,len=0; 1959 1960 PetscFunctionBegin; 1961 *newmat = 0; 1962 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1963 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 1964 mat->preallocated = PETSC_TRUE; 1965 a = (Mat_MPISBAIJ*)mat->data; 1966 a->bs = oldmat->bs; 1967 a->bs2 = oldmat->bs2; 1968 a->mbs = oldmat->mbs; 1969 a->nbs = oldmat->nbs; 1970 a->Mbs = oldmat->Mbs; 1971 a->Nbs = oldmat->Nbs; 1972 1973 a->rstart = oldmat->rstart; 1974 a->rend = oldmat->rend; 1975 a->cstart = oldmat->cstart; 1976 a->cend = oldmat->cend; 1977 a->size = oldmat->size; 1978 a->rank = oldmat->rank; 1979 a->donotstash = oldmat->donotstash; 1980 a->roworiented = oldmat->roworiented; 1981 a->rowindices = 0; 1982 a->rowvalues = 0; 1983 a->getrowactive = PETSC_FALSE; 1984 a->barray = 0; 1985 a->rstart_bs = oldmat->rstart_bs; 1986 a->rend_bs = oldmat->rend_bs; 1987 a->cstart_bs = oldmat->cstart_bs; 1988 a->cend_bs = oldmat->cend_bs; 1989 1990 /* hash table stuff */ 1991 a->ht = 0; 1992 a->hd = 0; 1993 a->ht_size = 0; 1994 a->ht_flag = oldmat->ht_flag; 1995 a->ht_fact = oldmat->ht_fact; 1996 a->ht_total_ct = 0; 1997 a->ht_insert_ct = 0; 1998 1999 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 2000 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2001 a->cowners = a->rowners + a->size + 2; 2002 a->rowners_bs = a->cowners + a->size + 2; 2003 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2004 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2005 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2006 if (oldmat->colmap) { 2007 #if defined (PETSC_USE_CTABLE) 2008 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2009 #else 2010 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2011 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2012 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2013 #endif 2014 } else a->colmap = 0; 2015 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2016 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2017 PetscLogObjectMemory(mat,len*sizeof(int)); 2018 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2019 } else a->garray = 0; 2020 2021 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2022 PetscLogObjectParent(mat,a->lvec); 2023 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2024 2025 PetscLogObjectParent(mat,a->Mvctx); 2026 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2027 PetscLogObjectParent(mat,a->A); 2028 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2029 PetscLogObjectParent(mat,a->B); 2030 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2031 *newmat = mat; 2032 PetscFunctionReturn(0); 2033 } 2034 2035 #include "petscsys.h" 2036 2037 EXTERN_C_BEGIN 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "MatLoad_MPISBAIJ" 2040 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2041 { 2042 Mat A; 2043 int i,nz,ierr,j,rstart,rend,fd; 2044 PetscScalar *vals,*buf; 2045 MPI_Comm comm = ((PetscObject)viewer)->comm; 2046 MPI_Status status; 2047 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2048 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2049 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2050 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2051 int dcount,kmax,k,nzcount,tmp; 2052 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 2053 PetscTruth flag; 2054 #endif 2055 2056 PetscFunctionBegin; 2057 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2058 2059 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2060 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2061 if (!rank) { 2062 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2063 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2064 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2065 if (header[3] < 0) { 2066 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2067 } 2068 } 2069 2070 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2071 M = header[1]; N = header[2]; 2072 2073 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2074 2075 /* 2076 This code adds extra rows to make sure the number of rows is 2077 divisible by the blocksize 2078 */ 2079 Mbs = M/bs; 2080 extra_rows = bs - M + bs*(Mbs); 2081 if (extra_rows == bs) extra_rows = 0; 2082 else Mbs++; 2083 if (extra_rows &&!rank) { 2084 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2085 } 2086 2087 /* determine ownership of all rows */ 2088 mbs = Mbs/size + ((Mbs % size) > rank); 2089 m = mbs*bs; 2090 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2091 browners = rowners + size + 1; 2092 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2093 rowners[0] = 0; 2094 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2095 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2096 rstart = rowners[rank]; 2097 rend = rowners[rank+1]; 2098 2099 /* distribute row lengths to all processors */ 2100 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2101 if (!rank) { 2102 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2103 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2104 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2105 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2106 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2107 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2108 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2109 } else { 2110 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2111 } 2112 2113 if (!rank) { /* procs[0] */ 2114 /* calculate the number of nonzeros on each processor */ 2115 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2116 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2117 for (i=0; i<size; i++) { 2118 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2119 procsnz[i] += rowlengths[j]; 2120 } 2121 } 2122 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2123 2124 /* determine max buffer needed and allocate it */ 2125 maxnz = 0; 2126 for (i=0; i<size; i++) { 2127 maxnz = PetscMax(maxnz,procsnz[i]); 2128 } 2129 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2130 2131 /* read in my part of the matrix column indices */ 2132 nz = procsnz[0]; 2133 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2134 mycols = ibuf; 2135 if (size == 1) nz -= extra_rows; 2136 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2137 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2138 2139 /* read in every ones (except the last) and ship off */ 2140 for (i=1; i<size-1; i++) { 2141 nz = procsnz[i]; 2142 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2143 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2144 } 2145 /* read in the stuff for the last proc */ 2146 if (size != 1) { 2147 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2148 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2149 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2150 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2151 } 2152 ierr = PetscFree(cols);CHKERRQ(ierr); 2153 } else { /* procs[i], i>0 */ 2154 /* determine buffer space needed for message */ 2155 nz = 0; 2156 for (i=0; i<m; i++) { 2157 nz += locrowlens[i]; 2158 } 2159 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2160 mycols = ibuf; 2161 /* receive message of column indices*/ 2162 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2163 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2164 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2165 } 2166 2167 /* loop over local rows, determining number of off diagonal entries */ 2168 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2169 odlens = dlens + (rend-rstart); 2170 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2171 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2172 masked1 = mask + Mbs; 2173 masked2 = masked1 + Mbs; 2174 rowcount = 0; nzcount = 0; 2175 for (i=0; i<mbs; i++) { 2176 dcount = 0; 2177 odcount = 0; 2178 for (j=0; j<bs; j++) { 2179 kmax = locrowlens[rowcount]; 2180 for (k=0; k<kmax; k++) { 2181 tmp = mycols[nzcount++]/bs; /* block col. index */ 2182 if (!mask[tmp]) { 2183 mask[tmp] = 1; 2184 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2185 else masked1[dcount++] = tmp; /* entry in diag portion */ 2186 } 2187 } 2188 rowcount++; 2189 } 2190 2191 dlens[i] = dcount; /* d_nzz[i] */ 2192 odlens[i] = odcount; /* o_nzz[i] */ 2193 2194 /* zero out the mask elements we set */ 2195 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2196 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2197 } 2198 2199 /* create our matrix */ 2200 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2201 CHKERRQ(ierr); 2202 A = *newmat; 2203 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2204 2205 if (!rank) { 2206 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2207 /* read in my part of the matrix numerical values */ 2208 nz = procsnz[0]; 2209 vals = buf; 2210 mycols = ibuf; 2211 if (size == 1) nz -= extra_rows; 2212 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2213 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2214 2215 /* insert into matrix */ 2216 jj = rstart*bs; 2217 for (i=0; i<m; i++) { 2218 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2219 mycols += locrowlens[i]; 2220 vals += locrowlens[i]; 2221 jj++; 2222 } 2223 2224 /* read in other processors (except the last one) and ship out */ 2225 for (i=1; i<size-1; i++) { 2226 nz = procsnz[i]; 2227 vals = buf; 2228 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2229 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2230 } 2231 /* the last proc */ 2232 if (size != 1){ 2233 nz = procsnz[i] - extra_rows; 2234 vals = buf; 2235 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2236 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2237 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2238 } 2239 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2240 2241 } else { 2242 /* receive numeric values */ 2243 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2244 2245 /* receive message of values*/ 2246 vals = buf; 2247 mycols = ibuf; 2248 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2249 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2250 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2251 2252 /* insert into matrix */ 2253 jj = rstart*bs; 2254 for (i=0; i<m; i++) { 2255 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2256 mycols += locrowlens[i]; 2257 vals += locrowlens[i]; 2258 jj++; 2259 } 2260 } 2261 2262 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2263 ierr = PetscFree(buf);CHKERRQ(ierr); 2264 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2265 ierr = PetscFree(rowners);CHKERRQ(ierr); 2266 ierr = PetscFree(dlens);CHKERRQ(ierr); 2267 ierr = PetscFree(mask);CHKERRQ(ierr); 2268 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2269 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2270 #if defined(PETSC_HAVE_SPOOLES) 2271 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 2272 if (flag) { 2273 if (size == 1) { 2274 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2275 } else { 2276 ierr = MatUseSpooles_MPISBAIJ(A);CHKERRQ(ierr); 2277 } 2278 } 2279 #endif 2280 #if defined(PETSC_HAVE_MUMPS) 2281 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 2282 if (flag) { 2283 ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); 2284 } 2285 #endif 2286 PetscFunctionReturn(0); 2287 } 2288 EXTERN_C_END 2289 2290 #undef __FUNCT__ 2291 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2292 /*@ 2293 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2294 2295 Input Parameters: 2296 . mat - the matrix 2297 . fact - factor 2298 2299 Collective on Mat 2300 2301 Level: advanced 2302 2303 Notes: 2304 This can also be set by the command line option: -mat_use_hash_table fact 2305 2306 .keywords: matrix, hashtable, factor, HT 2307 2308 .seealso: MatSetOption() 2309 @*/ 2310 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2311 { 2312 PetscFunctionBegin; 2313 SETERRQ(1,"Function not yet written for SBAIJ format"); 2314 /* PetscFunctionReturn(0); */ 2315 } 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2319 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2320 { 2321 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2322 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2323 PetscReal atmp; 2324 PetscReal *work,*svalues,*rvalues; 2325 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2326 int rank,size,*rowners_bs,dest,count,source; 2327 PetscScalar *va; 2328 MatScalar *ba; 2329 MPI_Status stat; 2330 2331 PetscFunctionBegin; 2332 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2333 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2334 2335 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2336 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2337 2338 bs = a->bs; 2339 mbs = a->mbs; 2340 Mbs = a->Mbs; 2341 ba = b->a; 2342 bi = b->i; 2343 bj = b->j; 2344 /* 2345 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2346 PetscSynchronizedFlush(A->comm); 2347 */ 2348 2349 /* find ownerships */ 2350 rowners_bs = a->rowners_bs; 2351 /* 2352 if (!rank){ 2353 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2354 } 2355 */ 2356 2357 /* each proc creates an array to be distributed */ 2358 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2359 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2360 2361 /* row_max for B */ 2362 if (rank != size-1){ 2363 for (i=0; i<mbs; i++) { 2364 ncols = bi[1] - bi[0]; bi++; 2365 brow = bs*i; 2366 for (j=0; j<ncols; j++){ 2367 bcol = bs*(*bj); 2368 for (kcol=0; kcol<bs; kcol++){ 2369 col = bcol + kcol; /* local col index */ 2370 col += rowners_bs[rank+1]; /* global col index */ 2371 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2372 for (krow=0; krow<bs; krow++){ 2373 atmp = PetscAbsScalar(*ba); ba++; 2374 row = brow + krow; /* local row index */ 2375 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2376 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2377 if (work[col] < atmp) work[col] = atmp; 2378 } 2379 } 2380 bj++; 2381 } 2382 } 2383 /* 2384 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2385 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2386 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2387 */ 2388 2389 /* send values to its owners */ 2390 for (dest=rank+1; dest<size; dest++){ 2391 svalues = work + rowners_bs[dest]; 2392 count = rowners_bs[dest+1]-rowners_bs[dest]; 2393 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2394 /* 2395 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2396 PetscSynchronizedFlush(A->comm); 2397 */ 2398 } 2399 } 2400 2401 /* receive values */ 2402 if (rank){ 2403 rvalues = work; 2404 count = rowners_bs[rank+1]-rowners_bs[rank]; 2405 for (source=0; source<rank; source++){ 2406 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2407 /* process values */ 2408 for (i=0; i<count; i++){ 2409 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2410 } 2411 /* 2412 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2413 PetscSynchronizedFlush(A->comm); 2414 */ 2415 } 2416 } 2417 2418 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2419 ierr = PetscFree(work);CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 #undef __FUNCT__ 2424 #define __FUNCT__ "MatRelax_MPISBAIJ" 2425 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2426 { 2427 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2428 int ierr,mbs=mat->mbs,bs=mat->bs; 2429 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2430 Vec bb1; 2431 2432 PetscFunctionBegin; 2433 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2434 if (bs > 1) 2435 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2436 2437 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2438 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2439 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2440 its--; 2441 } 2442 2443 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2444 while (its--){ 2445 2446 /* lower triangular part: slvec0b = - B^T*xx */ 2447 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2448 2449 /* copy xx into slvec0a */ 2450 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2451 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2452 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2453 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2454 2455 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2456 2457 /* copy bb into slvec1a */ 2458 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2459 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2460 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2461 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2462 2463 /* set slvec1b = 0 */ 2464 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2465 2466 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2467 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2468 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2469 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2470 2471 /* upper triangular part: bb1 = bb1 - B*x */ 2472 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2473 2474 /* local diagonal sweep */ 2475 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2476 } 2477 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2478 } else { 2479 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2480 } 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2486 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2487 { 2488 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2489 int ierr; 2490 PetscScalar mone=-1.0; 2491 Vec lvec1,bb1; 2492 2493 PetscFunctionBegin; 2494 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2495 if (mat->bs > 1) 2496 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2497 2498 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2499 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2500 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2501 its--; 2502 } 2503 2504 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2505 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2506 while (its--){ 2507 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2508 2509 /* lower diagonal part: bb1 = bb - B^T*xx */ 2510 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2511 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2512 2513 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2514 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2515 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2516 2517 /* upper diagonal part: bb1 = bb1 - B*x */ 2518 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2519 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2520 2521 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2522 2523 /* diagonal sweep */ 2524 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2525 } 2526 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2527 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2528 } else { 2529 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2530 } 2531 PetscFunctionReturn(0); 2532 } 2533 2534