1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *); 12 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode); 13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 14 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 15 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 16 extern int MatPrintHelp_SeqSBAIJ(Mat); 17 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 18 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 19 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 20 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 21 22 /* UGLY, ugly, ugly 23 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 24 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 25 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 26 converts the entries into single precision and then calls ..._MatScalar() to put them 27 into the single precision data structures. 28 */ 29 #if defined(PETSC_USE_MAT_SINGLE) 30 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 32 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 35 #else 36 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 37 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 38 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 39 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 40 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 41 #endif 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 46 int MatStoreValues_MPISBAIJ(Mat mat) 47 { 48 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 49 int ierr; 50 51 PetscFunctionBegin; 52 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 53 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 EXTERN_C_END 57 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 61 int MatRetrieveValues_MPISBAIJ(Mat mat) 62 { 63 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 64 int ierr; 65 66 PetscFunctionBegin; 67 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 68 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 69 PetscFunctionReturn(0); 70 } 71 EXTERN_C_END 72 73 /* 74 Local utility routine that creates a mapping from the global column 75 number to the local number in the off-diagonal part of the local 76 storage of the matrix. This is done in a non scable way since the 77 length of colmap equals the global matrix length. 78 */ 79 #undef __FUNCT__ 80 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private" 81 static int CreateColmap_MPISBAIJ_Private(Mat mat) 82 { 83 PetscFunctionBegin; 84 SETERRQ(1,"Function not yet written for SBAIJ format"); 85 /* PetscFunctionReturn(0); */ 86 } 87 88 #define CHUNKSIZE 10 89 90 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 91 { \ 92 \ 93 brow = row/bs; \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow]; nrow = ailen[brow]; \ 96 bcol = col/bs; \ 97 ridx = row % bs; cidx = col % bs; \ 98 low = 0; high = nrow; \ 99 while (high-low > 3) { \ 100 t = (low+high)/2; \ 101 if (rp[t] > bcol) high = t; \ 102 else low = t; \ 103 } \ 104 for (_i=low; _i<high; _i++) { \ 105 if (rp[_i] > bcol) break; \ 106 if (rp[_i] == bcol) { \ 107 bap = ap + bs2*_i + bs*cidx + ridx; \ 108 if (addv == ADD_VALUES) *bap += value; \ 109 else *bap = value; \ 110 goto a_noinsert; \ 111 } \ 112 } \ 113 if (a->nonew == 1) goto a_noinsert; \ 114 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 115 if (nrow >= rmax) { \ 116 /* there is no extra room in row, therefore enlarge */ \ 117 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 118 MatScalar *new_a; \ 119 \ 120 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 121 \ 122 /* malloc new storage space */ \ 123 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 124 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 125 new_j = (int*)(new_a + bs2*new_nz); \ 126 new_i = new_j + new_nz; \ 127 \ 128 /* copy over old data into new slots */ \ 129 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 130 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 131 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 132 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 133 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 134 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 135 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 136 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 137 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 138 /* free up old matrix storage */ \ 139 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 140 if (!a->singlemalloc) { \ 141 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 142 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 143 } \ 144 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 145 a->singlemalloc = PETSC_TRUE; \ 146 \ 147 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 148 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 149 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 150 a->s_maxnz += bs2*CHUNKSIZE; \ 151 a->reallocs++; \ 152 a->s_nz++; \ 153 } \ 154 N = nrow++ - 1; \ 155 /* shift up all the later entries in this row */ \ 156 for (ii=N; ii>=_i; ii--) { \ 157 rp[ii+1] = rp[ii]; \ 158 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 159 } \ 160 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 161 rp[_i] = bcol; \ 162 ap[bs2*_i + bs*cidx + ridx] = value; \ 163 a_noinsert:; \ 164 ailen[brow] = nrow; \ 165 } 166 #ifndef MatSetValues_SeqBAIJ_B_Private 167 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 168 { \ 169 brow = row/bs; \ 170 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 171 rmax = bimax[brow]; nrow = bilen[brow]; \ 172 bcol = col/bs; \ 173 ridx = row % bs; cidx = col % bs; \ 174 low = 0; high = nrow; \ 175 while (high-low > 3) { \ 176 t = (low+high)/2; \ 177 if (rp[t] > bcol) high = t; \ 178 else low = t; \ 179 } \ 180 for (_i=low; _i<high; _i++) { \ 181 if (rp[_i] > bcol) break; \ 182 if (rp[_i] == bcol) { \ 183 bap = ap + bs2*_i + bs*cidx + ridx; \ 184 if (addv == ADD_VALUES) *bap += value; \ 185 else *bap = value; \ 186 goto b_noinsert; \ 187 } \ 188 } \ 189 if (b->nonew == 1) goto b_noinsert; \ 190 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 191 if (nrow >= rmax) { \ 192 /* there is no extra room in row, therefore enlarge */ \ 193 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 194 MatScalar *new_a; \ 195 \ 196 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 197 \ 198 /* malloc new storage space */ \ 199 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 200 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 201 new_j = (int*)(new_a + bs2*new_nz); \ 202 new_i = new_j + new_nz; \ 203 \ 204 /* copy over old data into new slots */ \ 205 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 206 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 207 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 208 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 209 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 210 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 211 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 212 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 213 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 214 /* free up old matrix storage */ \ 215 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 216 if (!b->singlemalloc) { \ 217 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 218 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 219 } \ 220 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 221 b->singlemalloc = PETSC_TRUE; \ 222 \ 223 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 224 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 225 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 226 b->maxnz += bs2*CHUNKSIZE; \ 227 b->reallocs++; \ 228 b->nz++; \ 229 } \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 for (ii=N; ii>=_i; ii--) { \ 233 rp[ii+1] = rp[ii]; \ 234 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 235 } \ 236 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 237 rp[_i] = bcol; \ 238 ap[bs2*_i + bs*cidx + ridx] = value; \ 239 b_noinsert:; \ 240 bilen[brow] = nrow; \ 241 } 242 #endif 243 244 #if defined(PETSC_USE_MAT_SINGLE) 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatSetValues_MPISBAIJ" 247 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 248 { 249 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 250 int ierr,i,N = m*n; 251 MatScalar *vsingle; 252 253 PetscFunctionBegin; 254 if (N > b->setvalueslen) { 255 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 256 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 257 b->setvalueslen = N; 258 } 259 vsingle = b->setvaluescopy; 260 261 for (i=0; i<N; i++) { 262 vsingle[i] = v[i]; 263 } 264 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 270 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 271 { 272 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 273 int ierr,i,N = m*n*b->bs2; 274 MatScalar *vsingle; 275 276 PetscFunctionBegin; 277 if (N > b->setvalueslen) { 278 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 279 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 280 b->setvalueslen = N; 281 } 282 vsingle = b->setvaluescopy; 283 for (i=0; i<N; i++) { 284 vsingle[i] = v[i]; 285 } 286 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 292 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 293 { 294 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 295 int ierr,i,N = m*n; 296 MatScalar *vsingle; 297 298 PetscFunctionBegin; 299 SETERRQ(1,"Function not yet written for SBAIJ format"); 300 /* PetscFunctionReturn(0); */ 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 305 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 306 { 307 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 308 int ierr,i,N = m*n*b->bs2; 309 MatScalar *vsingle; 310 311 PetscFunctionBegin; 312 SETERRQ(1,"Function not yet written for SBAIJ format"); 313 /* PetscFunctionReturn(0); */ 314 } 315 #endif 316 317 /* Only add/insert a(i,j) with i<=j (blocks). 318 Any a(i,j) with i>j input by user is ingored. 319 */ 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 322 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 323 { 324 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 325 MatScalar value; 326 PetscTruth roworiented = baij->roworiented; 327 int ierr,i,j,row,col; 328 int rstart_orig=baij->rstart_bs; 329 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 330 int cend_orig=baij->cend_bs,bs=baij->bs; 331 332 /* Some Variables required in the macro */ 333 Mat A = baij->A; 334 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 335 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 336 MatScalar *aa=a->a; 337 338 Mat B = baij->B; 339 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 340 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 341 MatScalar *ba=b->a; 342 343 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 344 int low,high,t,ridx,cidx,bs2=a->bs2; 345 MatScalar *ap,*bap; 346 347 /* for stash */ 348 int n_loc, *in_loc=0; 349 MatScalar *v_loc=0; 350 351 PetscFunctionBegin; 352 353 if(!baij->donotstash){ 354 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 355 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 356 } 357 358 for (i=0; i<m; i++) { 359 if (im[i] < 0) continue; 360 #if defined(PETSC_USE_BOPT_g) 361 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 362 #endif 363 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 364 row = im[i] - rstart_orig; /* local row index */ 365 for (j=0; j<n; j++) { 366 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 367 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 368 col = in[j] - cstart_orig; /* local col index */ 369 brow = row/bs; bcol = col/bs; 370 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 371 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 372 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 373 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 374 } else if (in[j] < 0) continue; 375 #if defined(PETSC_USE_BOPT_g) 376 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 377 #endif 378 else { /* off-diag entry (B) */ 379 if (mat->was_assembled) { 380 if (!baij->colmap) { 381 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 382 } 383 #if defined (PETSC_USE_CTABLE) 384 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 385 col = col - 1; 386 #else 387 col = baij->colmap[in[j]/bs] - 1; 388 #endif 389 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 390 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 391 col = in[j]; 392 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 393 B = baij->B; 394 b = (Mat_SeqBAIJ*)(B)->data; 395 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 396 ba=b->a; 397 } else col += in[j]%bs; 398 } else col = in[j]; 399 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 400 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 401 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 402 } 403 } 404 } else { /* off processor entry */ 405 if (!baij->donotstash) { 406 n_loc = 0; 407 for (j=0; j<n; j++){ 408 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 409 in_loc[n_loc] = in[j]; 410 if (roworiented) { 411 v_loc[n_loc] = v[i*n+j]; 412 } else { 413 v_loc[n_loc] = v[j*m+i]; 414 } 415 n_loc++; 416 } 417 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 418 } 419 } 420 } 421 422 if(!baij->donotstash){ 423 ierr = PetscFree(in_loc);CHKERRQ(ierr); 424 ierr = PetscFree(v_loc);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 431 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 432 { 433 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 434 MatScalar *value,*barray=baij->barray; 435 PetscTruth roworiented = baij->roworiented; 436 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 437 int rend=baij->rend,cstart=baij->cstart,stepval; 438 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 439 440 PetscFunctionBegin; 441 if(!barray) { 442 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 443 baij->barray = barray; 444 } 445 446 if (roworiented) { 447 stepval = (n-1)*bs; 448 } else { 449 stepval = (m-1)*bs; 450 } 451 for (i=0; i<m; i++) { 452 if (im[i] < 0) continue; 453 #if defined(PETSC_USE_BOPT_g) 454 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs); 455 #endif 456 if (im[i] >= rstart && im[i] < rend) { 457 row = im[i] - rstart; 458 for (j=0; j<n; j++) { 459 /* If NumCol = 1 then a copy is not required */ 460 if ((roworiented) && (n == 1)) { 461 barray = v + i*bs2; 462 } else if((!roworiented) && (m == 1)) { 463 barray = v + j*bs2; 464 } else { /* Here a copy is required */ 465 if (roworiented) { 466 value = v + i*(stepval+bs)*bs + j*bs; 467 } else { 468 value = v + j*(stepval+bs)*bs + i*bs; 469 } 470 for (ii=0; ii<bs; ii++,value+=stepval) { 471 for (jj=0; jj<bs; jj++) { 472 *barray++ = *value++; 473 } 474 } 475 barray -=bs2; 476 } 477 478 if (in[j] >= cstart && in[j] < cend){ 479 col = in[j] - cstart; 480 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 481 } 482 else if (in[j] < 0) continue; 483 #if defined(PETSC_USE_BOPT_g) 484 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);} 485 #endif 486 else { 487 if (mat->was_assembled) { 488 if (!baij->colmap) { 489 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 490 } 491 492 #if defined(PETSC_USE_BOPT_g) 493 #if defined (PETSC_USE_CTABLE) 494 { int data; 495 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 496 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 497 } 498 #else 499 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 500 #endif 501 #endif 502 #if defined (PETSC_USE_CTABLE) 503 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 504 col = (col - 1)/bs; 505 #else 506 col = (baij->colmap[in[j]] - 1)/bs; 507 #endif 508 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 509 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 510 col = in[j]; 511 } 512 } 513 else col = in[j]; 514 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 515 } 516 } 517 } else { 518 if (!baij->donotstash) { 519 if (roworiented) { 520 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 521 } else { 522 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 523 } 524 } 525 } 526 } 527 PetscFunctionReturn(0); 528 } 529 530 #define HASH_KEY 0.6180339887 531 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 532 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 533 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 534 #undef __FUNCT__ 535 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 536 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 537 { 538 PetscFunctionBegin; 539 SETERRQ(1,"Function not yet written for SBAIJ format"); 540 /* PetscFunctionReturn(0); */ 541 } 542 543 #undef __FUNCT__ 544 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 545 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 546 { 547 PetscFunctionBegin; 548 SETERRQ(1,"Function not yet written for SBAIJ format"); 549 /* PetscFunctionReturn(0); */ 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatGetValues_MPISBAIJ" 554 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 555 { 556 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 557 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 558 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 559 560 PetscFunctionBegin; 561 for (i=0; i<m; i++) { 562 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 563 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 564 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 565 row = idxm[i] - bsrstart; 566 for (j=0; j<n; j++) { 567 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 568 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 569 if (idxn[j] >= bscstart && idxn[j] < bscend){ 570 col = idxn[j] - bscstart; 571 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 572 } else { 573 if (!baij->colmap) { 574 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 575 } 576 #if defined (PETSC_USE_CTABLE) 577 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 578 data --; 579 #else 580 data = baij->colmap[idxn[j]/bs]-1; 581 #endif 582 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 583 else { 584 col = data + idxn[j]%bs; 585 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 586 } 587 } 588 } 589 } else { 590 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 591 } 592 } 593 PetscFunctionReturn(0); 594 } 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatNorm_MPISBAIJ" 598 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 599 { 600 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 601 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 602 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 603 int ierr; 604 PetscReal sum[2],*lnorm2; 605 606 PetscFunctionBegin; 607 if (baij->size == 1) { 608 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 609 } else { 610 if (type == NORM_FROBENIUS) { 611 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 612 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 613 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 614 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 615 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 616 /* 617 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 618 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 619 */ 620 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 621 /* 622 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 623 PetscSynchronizedFlush(mat->comm); */ 624 625 *norm = sqrt(sum[0] + 2*sum[1]); 626 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 627 } else { 628 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 /* 635 Creates the hash table, and sets the table 636 This table is created only once. 637 If new entried need to be added to the matrix 638 then the hash table has to be destroyed and 639 recreated. 640 */ 641 #undef __FUNCT__ 642 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 643 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 644 { 645 PetscFunctionBegin; 646 SETERRQ(1,"Function not yet written for SBAIJ format"); 647 /* PetscFunctionReturn(0); */ 648 } 649 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 652 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 653 { 654 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 655 int ierr,nstash,reallocs; 656 InsertMode addv; 657 658 PetscFunctionBegin; 659 if (baij->donotstash) { 660 PetscFunctionReturn(0); 661 } 662 663 /* make sure all processors are either in INSERTMODE or ADDMODE */ 664 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 665 if (addv == (ADD_VALUES|INSERT_VALUES)) { 666 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 667 } 668 mat->insertmode = addv; /* in case this processor had no cache */ 669 670 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 671 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 672 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 673 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 674 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 675 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 681 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 682 { 683 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 684 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 685 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 686 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 687 int *row,*col,other_disassembled; 688 PetscTruth r1,r2,r3; 689 MatScalar *val; 690 InsertMode addv = mat->insertmode; 691 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 692 PetscTruth flag; 693 #endif 694 695 PetscFunctionBegin; 696 697 if (!baij->donotstash) { 698 while (1) { 699 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 700 /* 701 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 702 PetscSynchronizedFlush(mat->comm); 703 */ 704 if (!flg) break; 705 706 for (i=0; i<n;) { 707 /* Now identify the consecutive vals belonging to the same row */ 708 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 709 if (j < n) ncols = j-i; 710 else ncols = n-i; 711 /* Now assemble all these values with a single function call */ 712 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 713 i = j; 714 } 715 } 716 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 717 /* Now process the block-stash. Since the values are stashed column-oriented, 718 set the roworiented flag to column oriented, and after MatSetValues() 719 restore the original flags */ 720 r1 = baij->roworiented; 721 r2 = a->roworiented; 722 r3 = b->roworiented; 723 baij->roworiented = PETSC_FALSE; 724 a->roworiented = PETSC_FALSE; 725 b->roworiented = PETSC_FALSE; 726 while (1) { 727 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 728 if (!flg) break; 729 730 for (i=0; i<n;) { 731 /* Now identify the consecutive vals belonging to the same row */ 732 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 733 if (j < n) ncols = j-i; 734 else ncols = n-i; 735 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 736 i = j; 737 } 738 } 739 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 740 baij->roworiented = r1; 741 a->roworiented = r2; 742 b->roworiented = r3; 743 } 744 745 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 746 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 747 748 /* determine if any processor has disassembled, if so we must 749 also disassemble ourselfs, in order that we may reassemble. */ 750 /* 751 if nonzero structure of submatrix B cannot change then we know that 752 no processor disassembled thus we can skip this stuff 753 */ 754 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 755 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 756 if (mat->was_assembled && !other_disassembled) { 757 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 758 } 759 } 760 761 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 762 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 763 } 764 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 765 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 766 767 #if defined(PETSC_USE_BOPT_g) 768 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 769 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 770 baij->ht_total_ct = 0; 771 baij->ht_insert_ct = 0; 772 } 773 #endif 774 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 775 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 776 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 777 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 778 } 779 780 if (baij->rowvalues) { 781 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 782 baij->rowvalues = 0; 783 } 784 785 #if defined(PETSC_HAVE_SPOOLES) 786 ierr = PetscOptionsHasName(mat->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 787 if (flag) { ierr = MatUseSpooles_MPISBAIJ(mat);CHKERRQ(ierr); } 788 #endif 789 #if defined(PETSC_HAVE_MUMPS) 790 ierr = PetscOptionsHasName(mat->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 791 if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); } 792 #endif 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 798 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 799 { 800 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 801 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 802 PetscTruth isascii,isdraw; 803 PetscViewer sviewer; 804 PetscViewerFormat format; 805 806 PetscFunctionBegin; 807 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 808 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 809 if (isascii) { 810 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 811 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 812 MatInfo info; 813 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 814 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 816 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 817 baij->bs,(int)info.memory);CHKERRQ(ierr); 818 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 819 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 820 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 821 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 822 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 823 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } else if (format == PETSC_VIEWER_ASCII_INFO) { 826 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 } 830 831 if (isdraw) { 832 PetscDraw draw; 833 PetscTruth isnull; 834 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 835 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 836 } 837 838 if (size == 1) { 839 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 840 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 841 } else { 842 /* assemble the entire matrix onto first processor. */ 843 Mat A; 844 Mat_SeqSBAIJ *Aloc; 845 Mat_SeqBAIJ *Bloc; 846 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 847 MatScalar *a; 848 849 if (!rank) { 850 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 851 } else { 852 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 853 } 854 PetscLogObjectParent(mat,A); 855 856 /* copy over the A part */ 857 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 858 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 859 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 860 861 for (i=0; i<mbs; i++) { 862 rvals[0] = bs*(baij->rstart + i); 863 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 864 for (j=ai[i]; j<ai[i+1]; j++) { 865 col = (baij->cstart+aj[j])*bs; 866 for (k=0; k<bs; k++) { 867 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 868 col++; a += bs; 869 } 870 } 871 } 872 /* copy over the B part */ 873 Bloc = (Mat_SeqBAIJ*)baij->B->data; 874 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 875 for (i=0; i<mbs; i++) { 876 rvals[0] = bs*(baij->rstart + i); 877 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 878 for (j=ai[i]; j<ai[i+1]; j++) { 879 col = baij->garray[aj[j]]*bs; 880 for (k=0; k<bs; k++) { 881 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 882 col++; a += bs; 883 } 884 } 885 } 886 ierr = PetscFree(rvals);CHKERRQ(ierr); 887 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 889 /* 890 Everyone has to call to draw the matrix since the graphics waits are 891 synchronized across all processors that share the PetscDraw object 892 */ 893 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 894 if (!rank) { 895 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 896 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 897 } 898 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 899 ierr = MatDestroy(A);CHKERRQ(ierr); 900 } 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatView_MPISBAIJ" 906 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 907 { 908 int ierr; 909 PetscTruth isascii,isdraw,issocket,isbinary; 910 911 PetscFunctionBegin; 912 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 913 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 914 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 915 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 916 if (isascii || isdraw || issocket || isbinary) { 917 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 918 } else { 919 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 920 } 921 PetscFunctionReturn(0); 922 } 923 924 #undef __FUNCT__ 925 #define __FUNCT__ "MatDestroy_MPISBAIJ" 926 int MatDestroy_MPISBAIJ(Mat mat) 927 { 928 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 929 int ierr; 930 931 PetscFunctionBegin; 932 #if defined(PETSC_USE_LOG) 933 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 934 #endif 935 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 936 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 937 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 938 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 939 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 940 #if defined (PETSC_USE_CTABLE) 941 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 942 #else 943 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 944 #endif 945 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 946 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 947 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 948 if (baij->slvec0) { 949 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 950 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 951 } 952 if (baij->slvec1) { 953 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 954 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 955 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 956 } 957 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 958 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 959 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 960 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 961 #if defined(PETSC_USE_MAT_SINGLE) 962 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 963 #endif 964 ierr = PetscFree(baij);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatMult_MPISBAIJ" 970 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 971 { 972 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 973 int ierr,nt,mbs=a->mbs,bs=a->bs; 974 PetscScalar *x,*from,zero=0.0; 975 976 PetscFunctionBegin; 977 /* 978 PetscSynchronizedPrintf(A->comm," _1comm is called ...\n"); 979 PetscSynchronizedFlush(A->comm); 980 */ 981 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 982 if (nt != A->n) { 983 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 984 } 985 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 986 if (nt != A->m) { 987 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 988 } 989 990 /* diagonal part */ 991 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 992 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 993 994 /* subdiagonal part */ 995 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 996 997 /* copy x into the vec slvec0 */ 998 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 999 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1000 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1001 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1002 1003 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1004 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1006 1007 /* supperdiagonal part */ 1008 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1009 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1015 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1016 { 1017 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1018 int ierr,nt; 1019 1020 PetscFunctionBegin; 1021 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1022 if (nt != A->n) { 1023 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1024 } 1025 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1026 if (nt != A->m) { 1027 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1028 } 1029 1030 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1031 /* do diagonal part */ 1032 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1033 /* do supperdiagonal part */ 1034 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1035 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1036 /* do subdiagonal part */ 1037 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1038 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1039 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1040 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1046 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1047 { 1048 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1049 int ierr,mbs=a->mbs,bs=a->bs; 1050 PetscScalar *x,*from,zero=0.0; 1051 1052 PetscFunctionBegin; 1053 /* 1054 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1055 PetscSynchronizedFlush(A->comm); 1056 */ 1057 /* diagonal part */ 1058 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1059 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1060 1061 /* subdiagonal part */ 1062 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1063 1064 /* copy x into the vec slvec0 */ 1065 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1066 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1067 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1068 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1069 1070 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1071 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1072 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1073 1074 /* supperdiagonal part */ 1075 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1076 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1082 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1083 { 1084 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1085 int ierr; 1086 1087 PetscFunctionBegin; 1088 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1089 /* do diagonal part */ 1090 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1091 /* do supperdiagonal part */ 1092 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1093 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1094 1095 /* do subdiagonal part */ 1096 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1097 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1098 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1099 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1105 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1106 { 1107 PetscFunctionBegin; 1108 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1109 /* PetscFunctionReturn(0); */ 1110 } 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1114 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1115 { 1116 PetscFunctionBegin; 1117 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1118 /* PetscFunctionReturn(0); */ 1119 } 1120 1121 /* 1122 This only works correctly for square matrices where the subblock A->A is the 1123 diagonal block 1124 */ 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1127 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1128 { 1129 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1130 int ierr; 1131 1132 PetscFunctionBegin; 1133 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1134 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatScale_MPISBAIJ" 1140 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A) 1141 { 1142 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1143 int ierr; 1144 1145 PetscFunctionBegin; 1146 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1147 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 #undef __FUNCT__ 1152 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1153 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1154 { 1155 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1156 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1157 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1158 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1159 int *cmap,*idx_p,cstart = mat->cstart; 1160 1161 PetscFunctionBegin; 1162 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1163 mat->getrowactive = PETSC_TRUE; 1164 1165 if (!mat->rowvalues && (idx || v)) { 1166 /* 1167 allocate enough space to hold information from the longest row. 1168 */ 1169 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1170 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1171 int max = 1,mbs = mat->mbs,tmp; 1172 for (i=0; i<mbs; i++) { 1173 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1174 if (max < tmp) { max = tmp; } 1175 } 1176 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1177 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1178 } 1179 1180 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1181 lrow = row - brstart; /* local row index */ 1182 1183 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1184 if (!v) {pvA = 0; pvB = 0;} 1185 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1186 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1187 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1188 nztot = nzA + nzB; 1189 1190 cmap = mat->garray; 1191 if (v || idx) { 1192 if (nztot) { 1193 /* Sort by increasing column numbers, assuming A and B already sorted */ 1194 int imark = -1; 1195 if (v) { 1196 *v = v_p = mat->rowvalues; 1197 for (i=0; i<nzB; i++) { 1198 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1199 else break; 1200 } 1201 imark = i; 1202 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1203 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1204 } 1205 if (idx) { 1206 *idx = idx_p = mat->rowindices; 1207 if (imark > -1) { 1208 for (i=0; i<imark; i++) { 1209 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1210 } 1211 } else { 1212 for (i=0; i<nzB; i++) { 1213 if (cmap[cworkB[i]/bs] < cstart) 1214 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1215 else break; 1216 } 1217 imark = i; 1218 } 1219 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1220 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1221 } 1222 } else { 1223 if (idx) *idx = 0; 1224 if (v) *v = 0; 1225 } 1226 } 1227 *nz = nztot; 1228 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1229 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1235 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1236 { 1237 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1238 1239 PetscFunctionBegin; 1240 if (baij->getrowactive == PETSC_FALSE) { 1241 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1242 } 1243 baij->getrowactive = PETSC_FALSE; 1244 PetscFunctionReturn(0); 1245 } 1246 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1249 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1250 { 1251 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1252 1253 PetscFunctionBegin; 1254 *bs = baij->bs; 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1260 int MatZeroEntries_MPISBAIJ(Mat A) 1261 { 1262 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1263 int ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1267 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1273 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1274 { 1275 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1276 Mat A = a->A,B = a->B; 1277 int ierr; 1278 PetscReal isend[5],irecv[5]; 1279 1280 PetscFunctionBegin; 1281 info->block_size = (PetscReal)a->bs; 1282 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1283 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1284 isend[3] = info->memory; isend[4] = info->mallocs; 1285 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1286 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1287 isend[3] += info->memory; isend[4] += info->mallocs; 1288 if (flag == MAT_LOCAL) { 1289 info->nz_used = isend[0]; 1290 info->nz_allocated = isend[1]; 1291 info->nz_unneeded = isend[2]; 1292 info->memory = isend[3]; 1293 info->mallocs = isend[4]; 1294 } else if (flag == MAT_GLOBAL_MAX) { 1295 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1296 info->nz_used = irecv[0]; 1297 info->nz_allocated = irecv[1]; 1298 info->nz_unneeded = irecv[2]; 1299 info->memory = irecv[3]; 1300 info->mallocs = irecv[4]; 1301 } else if (flag == MAT_GLOBAL_SUM) { 1302 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1303 info->nz_used = irecv[0]; 1304 info->nz_allocated = irecv[1]; 1305 info->nz_unneeded = irecv[2]; 1306 info->memory = irecv[3]; 1307 info->mallocs = irecv[4]; 1308 } else { 1309 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1310 } 1311 info->rows_global = (PetscReal)A->M; 1312 info->columns_global = (PetscReal)A->N; 1313 info->rows_local = (PetscReal)A->m; 1314 info->columns_local = (PetscReal)A->N; 1315 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1316 info->fill_ratio_needed = 0; 1317 info->factor_mallocs = 0; 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1323 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1324 { 1325 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1326 int ierr; 1327 1328 PetscFunctionBegin; 1329 switch (op) { 1330 case MAT_NO_NEW_NONZERO_LOCATIONS: 1331 case MAT_YES_NEW_NONZERO_LOCATIONS: 1332 case MAT_COLUMNS_UNSORTED: 1333 case MAT_COLUMNS_SORTED: 1334 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1335 case MAT_KEEP_ZEROED_ROWS: 1336 case MAT_NEW_NONZERO_LOCATION_ERR: 1337 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1338 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1339 break; 1340 case MAT_ROW_ORIENTED: 1341 a->roworiented = PETSC_TRUE; 1342 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1343 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1344 break; 1345 case MAT_ROWS_SORTED: 1346 case MAT_ROWS_UNSORTED: 1347 case MAT_YES_NEW_DIAGONALS: 1348 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1349 break; 1350 case MAT_COLUMN_ORIENTED: 1351 a->roworiented = PETSC_FALSE; 1352 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1353 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1354 break; 1355 case MAT_IGNORE_OFF_PROC_ENTRIES: 1356 a->donotstash = PETSC_TRUE; 1357 break; 1358 case MAT_NO_NEW_DIAGONALS: 1359 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1360 case MAT_USE_HASH_TABLE: 1361 a->ht_flag = PETSC_TRUE; 1362 break; 1363 default: 1364 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1365 } 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1371 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1372 { 1373 int ierr; 1374 PetscFunctionBegin; 1375 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1381 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1382 { 1383 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1384 Mat a = baij->A,b = baij->B; 1385 int ierr,s1,s2,s3; 1386 1387 PetscFunctionBegin; 1388 if (ll != rr) { 1389 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1390 } 1391 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1392 if (rr) { 1393 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1394 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1395 /* Overlap communication with computation. */ 1396 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1397 /*} if (ll) { */ 1398 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1399 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1400 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1401 /* } */ 1402 /* scale the diagonal block */ 1403 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1404 1405 /* if (rr) { */ 1406 /* Do a scatter end and then right scale the off-diagonal block */ 1407 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1408 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1409 } 1410 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1416 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag) 1417 { 1418 PetscFunctionBegin; 1419 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1424 int MatPrintHelp_MPISBAIJ(Mat A) 1425 { 1426 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1427 MPI_Comm comm = A->comm; 1428 static int called = 0; 1429 int ierr; 1430 1431 PetscFunctionBegin; 1432 if (!a->rank) { 1433 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1434 } 1435 if (called) {PetscFunctionReturn(0);} else called = 1; 1436 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1437 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1443 int MatSetUnfactored_MPISBAIJ(Mat A) 1444 { 1445 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1446 int ierr; 1447 1448 PetscFunctionBegin; 1449 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1450 PetscFunctionReturn(0); 1451 } 1452 1453 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1454 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatEqual_MPISBAIJ" 1457 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1458 { 1459 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1460 Mat a,b,c,d; 1461 PetscTruth flg; 1462 int ierr; 1463 1464 PetscFunctionBegin; 1465 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr); 1466 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1467 a = matA->A; b = matA->B; 1468 c = matB->A; d = matB->B; 1469 1470 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1471 if (flg == PETSC_TRUE) { 1472 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1473 } 1474 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1480 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1481 { 1482 int ierr; 1483 1484 PetscFunctionBegin; 1485 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 /* -------------------------------------------------------------------*/ 1489 static struct _MatOps MatOps_Values = { 1490 MatSetValues_MPISBAIJ, 1491 MatGetRow_MPISBAIJ, 1492 MatRestoreRow_MPISBAIJ, 1493 MatMult_MPISBAIJ, 1494 MatMultAdd_MPISBAIJ, 1495 MatMultTranspose_MPISBAIJ, 1496 MatMultTransposeAdd_MPISBAIJ, 1497 0, 1498 0, 1499 0, 1500 0, 1501 0, 1502 0, 1503 MatRelax_MPISBAIJ, 1504 MatTranspose_MPISBAIJ, 1505 MatGetInfo_MPISBAIJ, 1506 MatEqual_MPISBAIJ, 1507 MatGetDiagonal_MPISBAIJ, 1508 MatDiagonalScale_MPISBAIJ, 1509 MatNorm_MPISBAIJ, 1510 MatAssemblyBegin_MPISBAIJ, 1511 MatAssemblyEnd_MPISBAIJ, 1512 0, 1513 MatSetOption_MPISBAIJ, 1514 MatZeroEntries_MPISBAIJ, 1515 MatZeroRows_MPISBAIJ, 1516 0, 1517 0, 1518 0, 1519 0, 1520 MatSetUpPreallocation_MPISBAIJ, 1521 0, 1522 0, 1523 0, 1524 0, 1525 MatDuplicate_MPISBAIJ, 1526 0, 1527 0, 1528 0, 1529 0, 1530 0, 1531 0, 1532 0, 1533 MatGetValues_MPISBAIJ, 1534 0, 1535 MatPrintHelp_MPISBAIJ, 1536 MatScale_MPISBAIJ, 1537 0, 1538 0, 1539 0, 1540 MatGetBlockSize_MPISBAIJ, 1541 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 0, 1547 MatSetUnfactored_MPISBAIJ, 1548 0, 1549 MatSetValuesBlocked_MPISBAIJ, 1550 0, 1551 0, 1552 0, 1553 MatGetPetscMaps_Petsc, 1554 0, 1555 0, 1556 0, 1557 0, 1558 0, 1559 0, 1560 MatGetRowMax_MPISBAIJ}; 1561 1562 1563 EXTERN_C_BEGIN 1564 #undef __FUNCT__ 1565 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1566 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1567 { 1568 PetscFunctionBegin; 1569 *a = ((Mat_MPISBAIJ *)A->data)->A; 1570 *iscopy = PETSC_FALSE; 1571 PetscFunctionReturn(0); 1572 } 1573 EXTERN_C_END 1574 1575 EXTERN_C_BEGIN 1576 #undef __FUNCT__ 1577 #define __FUNCT__ "MatCreate_MPISBAIJ" 1578 int MatCreate_MPISBAIJ(Mat B) 1579 { 1580 Mat_MPISBAIJ *b; 1581 int ierr; 1582 PetscTruth flg; 1583 1584 PetscFunctionBegin; 1585 1586 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1587 B->data = (void*)b; 1588 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1589 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1590 1591 B->ops->destroy = MatDestroy_MPISBAIJ; 1592 B->ops->view = MatView_MPISBAIJ; 1593 B->mapping = 0; 1594 B->factor = 0; 1595 B->assembled = PETSC_FALSE; 1596 1597 B->insertmode = NOT_SET_VALUES; 1598 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1599 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1600 1601 /* build local table of row and column ownerships */ 1602 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1603 b->cowners = b->rowners + b->size + 2; 1604 b->rowners_bs = b->cowners + b->size + 2; 1605 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1606 1607 /* build cache for off array entries formed */ 1608 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1609 b->donotstash = PETSC_FALSE; 1610 b->colmap = PETSC_NULL; 1611 b->garray = PETSC_NULL; 1612 b->roworiented = PETSC_TRUE; 1613 1614 #if defined(PETSC_USE_MAT_SINGLE) 1615 /* stuff for MatSetValues_XXX in single precision */ 1616 b->setvalueslen = 0; 1617 b->setvaluescopy = PETSC_NULL; 1618 #endif 1619 1620 /* stuff used in block assembly */ 1621 b->barray = 0; 1622 1623 /* stuff used for matrix vector multiply */ 1624 b->lvec = 0; 1625 b->Mvctx = 0; 1626 b->slvec0 = 0; 1627 b->slvec0b = 0; 1628 b->slvec1 = 0; 1629 b->slvec1a = 0; 1630 b->slvec1b = 0; 1631 b->sMvctx = 0; 1632 1633 /* stuff for MatGetRow() */ 1634 b->rowindices = 0; 1635 b->rowvalues = 0; 1636 b->getrowactive = PETSC_FALSE; 1637 1638 /* hash table stuff */ 1639 b->ht = 0; 1640 b->hd = 0; 1641 b->ht_size = 0; 1642 b->ht_flag = PETSC_FALSE; 1643 b->ht_fact = 0; 1644 b->ht_total_ct = 0; 1645 b->ht_insert_ct = 0; 1646 1647 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1648 if (flg) { 1649 PetscReal fact = 1.39; 1650 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1651 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1652 if (fact <= 1.0) fact = 1.39; 1653 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1654 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1655 } 1656 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1657 "MatStoreValues_MPISBAIJ", 1658 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1659 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1660 "MatRetrieveValues_MPISBAIJ", 1661 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1662 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1663 "MatGetDiagonalBlock_MPISBAIJ", 1664 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1665 PetscFunctionReturn(0); 1666 } 1667 EXTERN_C_END 1668 1669 #undef __FUNCT__ 1670 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1671 /*@C 1672 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1673 the user should preallocate the matrix storage by setting the parameters 1674 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1675 performance can be increased by more than a factor of 50. 1676 1677 Collective on Mat 1678 1679 Input Parameters: 1680 + A - the matrix 1681 . bs - size of blockk 1682 . d_nz - number of block nonzeros per block row in diagonal portion of local 1683 submatrix (same for all local rows) 1684 . d_nnz - array containing the number of block nonzeros in the various block rows 1685 in the upper triangular and diagonal part of the in diagonal portion of the local 1686 (possibly different for each block row) or PETSC_NULL. You must leave room 1687 for the diagonal entry even if it is zero. 1688 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1689 submatrix (same for all local rows). 1690 - o_nnz - array containing the number of nonzeros in the various block rows of the 1691 off-diagonal portion of the local submatrix (possibly different for 1692 each block row) or PETSC_NULL. 1693 1694 1695 Options Database Keys: 1696 . -mat_no_unroll - uses code that does not unroll the loops in the 1697 block calculations (much slower) 1698 . -mat_block_size - size of the blocks to use 1699 1700 Notes: 1701 1702 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1703 than it must be used on all processors that share the object for that argument. 1704 1705 Storage Information: 1706 For a square global matrix we define each processor's diagonal portion 1707 to be its local rows and the corresponding columns (a square submatrix); 1708 each processor's off-diagonal portion encompasses the remainder of the 1709 local matrix (a rectangular submatrix). 1710 1711 The user can specify preallocated storage for the diagonal part of 1712 the local submatrix with either d_nz or d_nnz (not both). Set 1713 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1714 memory allocation. Likewise, specify preallocated storage for the 1715 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1716 1717 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1718 the figure below we depict these three local rows and all columns (0-11). 1719 1720 .vb 1721 0 1 2 3 4 5 6 7 8 9 10 11 1722 ------------------- 1723 row 3 | o o o d d d o o o o o o 1724 row 4 | o o o d d d o o o o o o 1725 row 5 | o o o d d d o o o o o o 1726 ------------------- 1727 .ve 1728 1729 Thus, any entries in the d locations are stored in the d (diagonal) 1730 submatrix, and any entries in the o locations are stored in the 1731 o (off-diagonal) submatrix. Note that the d matrix is stored in 1732 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1733 1734 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1735 plus the diagonal part of the d matrix, 1736 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1737 In general, for PDE problems in which most nonzeros are near the diagonal, 1738 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1739 or you will get TERRIBLE performance; see the users' manual chapter on 1740 matrices. 1741 1742 Level: intermediate 1743 1744 .keywords: matrix, block, aij, compressed row, sparse, parallel 1745 1746 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1747 @*/ 1748 1749 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1750 { 1751 Mat_MPISBAIJ *b; 1752 int ierr,i,mbs,Mbs; 1753 PetscTruth flg2; 1754 1755 PetscFunctionBegin; 1756 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr); 1757 if (!flg2) PetscFunctionReturn(0); 1758 1759 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1760 1761 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1762 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1763 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1764 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1765 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1766 if (d_nnz) { 1767 for (i=0; i<B->m/bs; i++) { 1768 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1769 } 1770 } 1771 if (o_nnz) { 1772 for (i=0; i<B->m/bs; i++) { 1773 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1774 } 1775 } 1776 B->preallocated = PETSC_TRUE; 1777 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1778 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1779 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1780 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1781 1782 b = (Mat_MPISBAIJ*)B->data; 1783 mbs = B->m/bs; 1784 Mbs = B->M/bs; 1785 if (mbs*bs != B->m) { 1786 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1787 } 1788 1789 b->bs = bs; 1790 b->bs2 = bs*bs; 1791 b->mbs = mbs; 1792 b->nbs = mbs; 1793 b->Mbs = Mbs; 1794 b->Nbs = Mbs; 1795 1796 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1797 b->rowners[0] = 0; 1798 for (i=2; i<=b->size; i++) { 1799 b->rowners[i] += b->rowners[i-1]; 1800 } 1801 b->rstart = b->rowners[b->rank]; 1802 b->rend = b->rowners[b->rank+1]; 1803 b->cstart = b->rstart; 1804 b->cend = b->rend; 1805 for (i=0; i<=b->size; i++) { 1806 b->rowners_bs[i] = b->rowners[i]*bs; 1807 } 1808 b->rstart_bs = b-> rstart*bs; 1809 b->rend_bs = b->rend*bs; 1810 1811 b->cstart_bs = b->cstart*bs; 1812 b->cend_bs = b->cend*bs; 1813 1814 1815 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1816 PetscLogObjectParent(B,b->A); 1817 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1818 PetscLogObjectParent(B,b->B); 1819 1820 /* build cache for off array entries formed */ 1821 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1822 1823 PetscFunctionReturn(0); 1824 } 1825 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "MatCreateMPISBAIJ" 1828 /*@C 1829 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1830 (block compressed row). For good matrix assembly performance 1831 the user should preallocate the matrix storage by setting the parameters 1832 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1833 performance can be increased by more than a factor of 50. 1834 1835 Collective on MPI_Comm 1836 1837 Input Parameters: 1838 + comm - MPI communicator 1839 . bs - size of blockk 1840 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1841 This value should be the same as the local size used in creating the 1842 y vector for the matrix-vector product y = Ax. 1843 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1844 This value should be the same as the local size used in creating the 1845 x vector for the matrix-vector product y = Ax. 1846 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1847 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1848 . d_nz - number of block nonzeros per block row in diagonal portion of local 1849 submatrix (same for all local rows) 1850 . d_nnz - array containing the number of block nonzeros in the various block rows 1851 in the upper triangular portion of the in diagonal portion of the local 1852 (possibly different for each block block row) or PETSC_NULL. 1853 You must leave room for the diagonal entry even if it is zero. 1854 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1855 submatrix (same for all local rows). 1856 - o_nnz - array containing the number of nonzeros in the various block rows of the 1857 off-diagonal portion of the local submatrix (possibly different for 1858 each block row) or PETSC_NULL. 1859 1860 Output Parameter: 1861 . A - the matrix 1862 1863 Options Database Keys: 1864 . -mat_no_unroll - uses code that does not unroll the loops in the 1865 block calculations (much slower) 1866 . -mat_block_size - size of the blocks to use 1867 . -mat_mpi - use the parallel matrix data structures even on one processor 1868 (defaults to using SeqBAIJ format on one processor) 1869 1870 Notes: 1871 The user MUST specify either the local or global matrix dimensions 1872 (possibly both). 1873 1874 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1875 than it must be used on all processors that share the object for that argument. 1876 1877 Storage Information: 1878 For a square global matrix we define each processor's diagonal portion 1879 to be its local rows and the corresponding columns (a square submatrix); 1880 each processor's off-diagonal portion encompasses the remainder of the 1881 local matrix (a rectangular submatrix). 1882 1883 The user can specify preallocated storage for the diagonal part of 1884 the local submatrix with either d_nz or d_nnz (not both). Set 1885 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1886 memory allocation. Likewise, specify preallocated storage for the 1887 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1888 1889 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1890 the figure below we depict these three local rows and all columns (0-11). 1891 1892 .vb 1893 0 1 2 3 4 5 6 7 8 9 10 11 1894 ------------------- 1895 row 3 | o o o d d d o o o o o o 1896 row 4 | o o o d d d o o o o o o 1897 row 5 | o o o d d d o o o o o o 1898 ------------------- 1899 .ve 1900 1901 Thus, any entries in the d locations are stored in the d (diagonal) 1902 submatrix, and any entries in the o locations are stored in the 1903 o (off-diagonal) submatrix. Note that the d matrix is stored in 1904 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1905 1906 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1907 plus the diagonal part of the d matrix, 1908 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1909 In general, for PDE problems in which most nonzeros are near the diagonal, 1910 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1911 or you will get TERRIBLE performance; see the users' manual chapter on 1912 matrices. 1913 1914 Level: intermediate 1915 1916 .keywords: matrix, block, aij, compressed row, sparse, parallel 1917 1918 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1919 @*/ 1920 1921 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1922 { 1923 int ierr,size; 1924 1925 PetscFunctionBegin; 1926 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1927 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1928 if (size > 1) { 1929 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1930 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1931 } else { 1932 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1933 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1934 } 1935 PetscFunctionReturn(0); 1936 } 1937 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1941 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1942 { 1943 Mat mat; 1944 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1945 int ierr,len=0; 1946 1947 PetscFunctionBegin; 1948 *newmat = 0; 1949 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1950 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 1951 mat->preallocated = PETSC_TRUE; 1952 a = (Mat_MPISBAIJ*)mat->data; 1953 a->bs = oldmat->bs; 1954 a->bs2 = oldmat->bs2; 1955 a->mbs = oldmat->mbs; 1956 a->nbs = oldmat->nbs; 1957 a->Mbs = oldmat->Mbs; 1958 a->Nbs = oldmat->Nbs; 1959 1960 a->rstart = oldmat->rstart; 1961 a->rend = oldmat->rend; 1962 a->cstart = oldmat->cstart; 1963 a->cend = oldmat->cend; 1964 a->size = oldmat->size; 1965 a->rank = oldmat->rank; 1966 a->donotstash = oldmat->donotstash; 1967 a->roworiented = oldmat->roworiented; 1968 a->rowindices = 0; 1969 a->rowvalues = 0; 1970 a->getrowactive = PETSC_FALSE; 1971 a->barray = 0; 1972 a->rstart_bs = oldmat->rstart_bs; 1973 a->rend_bs = oldmat->rend_bs; 1974 a->cstart_bs = oldmat->cstart_bs; 1975 a->cend_bs = oldmat->cend_bs; 1976 1977 /* hash table stuff */ 1978 a->ht = 0; 1979 a->hd = 0; 1980 a->ht_size = 0; 1981 a->ht_flag = oldmat->ht_flag; 1982 a->ht_fact = oldmat->ht_fact; 1983 a->ht_total_ct = 0; 1984 a->ht_insert_ct = 0; 1985 1986 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1987 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1988 a->cowners = a->rowners + a->size + 2; 1989 a->rowners_bs = a->cowners + a->size + 2; 1990 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1991 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1992 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 1993 if (oldmat->colmap) { 1994 #if defined (PETSC_USE_CTABLE) 1995 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1996 #else 1997 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1998 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1999 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2000 #endif 2001 } else a->colmap = 0; 2002 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2003 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2004 PetscLogObjectMemory(mat,len*sizeof(int)); 2005 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2006 } else a->garray = 0; 2007 2008 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2009 PetscLogObjectParent(mat,a->lvec); 2010 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2011 2012 PetscLogObjectParent(mat,a->Mvctx); 2013 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2014 PetscLogObjectParent(mat,a->A); 2015 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2016 PetscLogObjectParent(mat,a->B); 2017 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2018 *newmat = mat; 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #include "petscsys.h" 2023 2024 EXTERN_C_BEGIN 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "MatLoad_MPISBAIJ" 2027 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2028 { 2029 Mat A; 2030 int i,nz,ierr,j,rstart,rend,fd; 2031 PetscScalar *vals,*buf; 2032 MPI_Comm comm = ((PetscObject)viewer)->comm; 2033 MPI_Status status; 2034 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2035 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2036 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2037 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2038 int dcount,kmax,k,nzcount,tmp; 2039 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 2040 PetscTruth flag; 2041 #endif 2042 2043 PetscFunctionBegin; 2044 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2045 2046 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2047 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2048 if (!rank) { 2049 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2050 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2051 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2052 if (header[3] < 0) { 2053 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2054 } 2055 } 2056 2057 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2058 M = header[1]; N = header[2]; 2059 2060 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2061 2062 /* 2063 This code adds extra rows to make sure the number of rows is 2064 divisible by the blocksize 2065 */ 2066 Mbs = M/bs; 2067 extra_rows = bs - M + bs*(Mbs); 2068 if (extra_rows == bs) extra_rows = 0; 2069 else Mbs++; 2070 if (extra_rows &&!rank) { 2071 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2072 } 2073 2074 /* determine ownership of all rows */ 2075 mbs = Mbs/size + ((Mbs % size) > rank); 2076 m = mbs*bs; 2077 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2078 browners = rowners + size + 1; 2079 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2080 rowners[0] = 0; 2081 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2082 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2083 rstart = rowners[rank]; 2084 rend = rowners[rank+1]; 2085 2086 /* distribute row lengths to all processors */ 2087 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2088 if (!rank) { 2089 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2090 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2091 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2092 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2093 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2094 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2095 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2096 } else { 2097 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2098 } 2099 2100 if (!rank) { /* procs[0] */ 2101 /* calculate the number of nonzeros on each processor */ 2102 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2103 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2104 for (i=0; i<size; i++) { 2105 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2106 procsnz[i] += rowlengths[j]; 2107 } 2108 } 2109 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2110 2111 /* determine max buffer needed and allocate it */ 2112 maxnz = 0; 2113 for (i=0; i<size; i++) { 2114 maxnz = PetscMax(maxnz,procsnz[i]); 2115 } 2116 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2117 2118 /* read in my part of the matrix column indices */ 2119 nz = procsnz[0]; 2120 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2121 mycols = ibuf; 2122 if (size == 1) nz -= extra_rows; 2123 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2124 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2125 2126 /* read in every ones (except the last) and ship off */ 2127 for (i=1; i<size-1; i++) { 2128 nz = procsnz[i]; 2129 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2130 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2131 } 2132 /* read in the stuff for the last proc */ 2133 if (size != 1) { 2134 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2135 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2136 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2137 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2138 } 2139 ierr = PetscFree(cols);CHKERRQ(ierr); 2140 } else { /* procs[i], i>0 */ 2141 /* determine buffer space needed for message */ 2142 nz = 0; 2143 for (i=0; i<m; i++) { 2144 nz += locrowlens[i]; 2145 } 2146 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2147 mycols = ibuf; 2148 /* receive message of column indices*/ 2149 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2150 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2151 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2152 } 2153 2154 /* loop over local rows, determining number of off diagonal entries */ 2155 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2156 odlens = dlens + (rend-rstart); 2157 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2158 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2159 masked1 = mask + Mbs; 2160 masked2 = masked1 + Mbs; 2161 rowcount = 0; nzcount = 0; 2162 for (i=0; i<mbs; i++) { 2163 dcount = 0; 2164 odcount = 0; 2165 for (j=0; j<bs; j++) { 2166 kmax = locrowlens[rowcount]; 2167 for (k=0; k<kmax; k++) { 2168 tmp = mycols[nzcount++]/bs; /* block col. index */ 2169 if (!mask[tmp]) { 2170 mask[tmp] = 1; 2171 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2172 else masked1[dcount++] = tmp; /* entry in diag portion */ 2173 } 2174 } 2175 rowcount++; 2176 } 2177 2178 dlens[i] = dcount; /* d_nzz[i] */ 2179 odlens[i] = odcount; /* o_nzz[i] */ 2180 2181 /* zero out the mask elements we set */ 2182 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2183 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2184 } 2185 2186 /* create our matrix */ 2187 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2188 CHKERRQ(ierr); 2189 A = *newmat; 2190 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2191 2192 if (!rank) { 2193 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2194 /* read in my part of the matrix numerical values */ 2195 nz = procsnz[0]; 2196 vals = buf; 2197 mycols = ibuf; 2198 if (size == 1) nz -= extra_rows; 2199 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2200 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2201 2202 /* insert into matrix */ 2203 jj = rstart*bs; 2204 for (i=0; i<m; i++) { 2205 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2206 mycols += locrowlens[i]; 2207 vals += locrowlens[i]; 2208 jj++; 2209 } 2210 2211 /* read in other processors (except the last one) and ship out */ 2212 for (i=1; i<size-1; i++) { 2213 nz = procsnz[i]; 2214 vals = buf; 2215 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2216 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2217 } 2218 /* the last proc */ 2219 if (size != 1){ 2220 nz = procsnz[i] - extra_rows; 2221 vals = buf; 2222 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2223 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2224 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2225 } 2226 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2227 2228 } else { 2229 /* receive numeric values */ 2230 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2231 2232 /* receive message of values*/ 2233 vals = buf; 2234 mycols = ibuf; 2235 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2236 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2237 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2238 2239 /* insert into matrix */ 2240 jj = rstart*bs; 2241 for (i=0; i<m; i++) { 2242 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2243 mycols += locrowlens[i]; 2244 vals += locrowlens[i]; 2245 jj++; 2246 } 2247 } 2248 2249 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2250 ierr = PetscFree(buf);CHKERRQ(ierr); 2251 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2252 ierr = PetscFree(rowners);CHKERRQ(ierr); 2253 ierr = PetscFree(dlens);CHKERRQ(ierr); 2254 ierr = PetscFree(mask);CHKERRQ(ierr); 2255 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2256 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2257 #if defined(PETSC_HAVE_SPOOLES) 2258 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 2259 if (flag) { 2260 if (size == 1) { 2261 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2262 } else { 2263 ierr = MatUseSpooles_MPISBAIJ(A);CHKERRQ(ierr); 2264 } 2265 } 2266 #endif 2267 #if defined(PETSC_HAVE_MUMPS) 2268 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 2269 if (flag) { 2270 ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); 2271 } 2272 #endif 2273 PetscFunctionReturn(0); 2274 } 2275 EXTERN_C_END 2276 2277 #undef __FUNCT__ 2278 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2279 /*@ 2280 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2281 2282 Input Parameters: 2283 . mat - the matrix 2284 . fact - factor 2285 2286 Collective on Mat 2287 2288 Level: advanced 2289 2290 Notes: 2291 This can also be set by the command line option: -mat_use_hash_table fact 2292 2293 .keywords: matrix, hashtable, factor, HT 2294 2295 .seealso: MatSetOption() 2296 @*/ 2297 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2298 { 2299 PetscFunctionBegin; 2300 SETERRQ(1,"Function not yet written for SBAIJ format"); 2301 /* PetscFunctionReturn(0); */ 2302 } 2303 2304 #undef __FUNCT__ 2305 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2306 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2307 { 2308 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2309 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2310 PetscReal atmp; 2311 PetscReal *work,*svalues,*rvalues; 2312 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2313 int rank,size,*rowners_bs,dest,count,source; 2314 PetscScalar *va; 2315 MatScalar *ba; 2316 MPI_Status stat; 2317 2318 PetscFunctionBegin; 2319 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2320 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2321 2322 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2323 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2324 2325 bs = a->bs; 2326 mbs = a->mbs; 2327 Mbs = a->Mbs; 2328 ba = b->a; 2329 bi = b->i; 2330 bj = b->j; 2331 /* 2332 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2333 PetscSynchronizedFlush(A->comm); 2334 */ 2335 2336 /* find ownerships */ 2337 rowners_bs = a->rowners_bs; 2338 /* 2339 if (!rank){ 2340 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2341 } 2342 */ 2343 2344 /* each proc creates an array to be distributed */ 2345 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2346 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2347 2348 /* row_max for B */ 2349 if (rank != size-1){ 2350 for (i=0; i<mbs; i++) { 2351 ncols = bi[1] - bi[0]; bi++; 2352 brow = bs*i; 2353 for (j=0; j<ncols; j++){ 2354 bcol = bs*(*bj); 2355 for (kcol=0; kcol<bs; kcol++){ 2356 col = bcol + kcol; /* local col index */ 2357 col += rowners_bs[rank+1]; /* global col index */ 2358 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2359 for (krow=0; krow<bs; krow++){ 2360 atmp = PetscAbsScalar(*ba); ba++; 2361 row = brow + krow; /* local row index */ 2362 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2363 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2364 if (work[col] < atmp) work[col] = atmp; 2365 } 2366 } 2367 bj++; 2368 } 2369 } 2370 /* 2371 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2372 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2373 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2374 */ 2375 2376 /* send values to its owners */ 2377 for (dest=rank+1; dest<size; dest++){ 2378 svalues = work + rowners_bs[dest]; 2379 count = rowners_bs[dest+1]-rowners_bs[dest]; 2380 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2381 /* 2382 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2383 PetscSynchronizedFlush(A->comm); 2384 */ 2385 } 2386 } 2387 2388 /* receive values */ 2389 if (rank){ 2390 rvalues = work; 2391 count = rowners_bs[rank+1]-rowners_bs[rank]; 2392 for (source=0; source<rank; source++){ 2393 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2394 /* process values */ 2395 for (i=0; i<count; i++){ 2396 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2397 } 2398 /* 2399 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2400 PetscSynchronizedFlush(A->comm); 2401 */ 2402 } 2403 } 2404 2405 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2406 ierr = PetscFree(work);CHKERRQ(ierr); 2407 PetscFunctionReturn(0); 2408 } 2409 2410 #undef __FUNCT__ 2411 #define __FUNCT__ "MatRelax_MPISBAIJ" 2412 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2413 { 2414 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2415 int ierr,mbs=mat->mbs,bs=mat->bs; 2416 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2417 Vec bb1; 2418 2419 PetscFunctionBegin; 2420 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2421 if (bs > 1) 2422 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2423 2424 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2425 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2426 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2427 its--; 2428 } 2429 2430 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2431 while (its--){ 2432 2433 /* lower triangular part: slvec0b = - B^T*xx */ 2434 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2435 2436 /* copy xx into slvec0a */ 2437 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2438 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2439 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2440 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2441 2442 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2443 2444 /* copy bb into slvec1a */ 2445 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2446 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2447 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2448 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2449 2450 /* set slvec1b = 0 */ 2451 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2452 2453 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2454 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2455 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2456 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2457 2458 /* upper triangular part: bb1 = bb1 - B*x */ 2459 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2460 2461 /* local diagonal sweep */ 2462 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2463 } 2464 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2465 } else { 2466 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2467 } 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2473 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2474 { 2475 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2476 int ierr; 2477 PetscScalar mone=-1.0; 2478 Vec lvec1,bb1; 2479 2480 PetscFunctionBegin; 2481 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2482 if (mat->bs > 1) 2483 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2484 2485 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2486 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2487 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2488 its--; 2489 } 2490 2491 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2492 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2493 while (its--){ 2494 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2495 2496 /* lower diagonal part: bb1 = bb - B^T*xx */ 2497 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2498 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2499 2500 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2501 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2502 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2503 2504 /* upper diagonal part: bb1 = bb1 - B*x */ 2505 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2506 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2507 2508 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2509 2510 /* diagonal sweep */ 2511 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2512 } 2513 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2514 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2515 } else { 2516 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2517 } 2518 PetscFunctionReturn(0); 2519 } 2520 2521