1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *); 12 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode); 13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 14 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 15 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 16 extern int MatPrintHelp_SeqSBAIJ(Mat); 17 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 18 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 19 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 20 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 21 22 /* UGLY, ugly, ugly 23 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 24 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 25 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 26 converts the entries into single precision and then calls ..._MatScalar() to put them 27 into the single precision data structures. 28 */ 29 #if defined(PETSC_USE_MAT_SINGLE) 30 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 32 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 35 #else 36 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 37 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 38 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 39 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 40 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 41 #endif 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 46 int MatStoreValues_MPISBAIJ(Mat mat) 47 { 48 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 49 int ierr; 50 51 PetscFunctionBegin; 52 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 53 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 EXTERN_C_END 57 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 61 int MatRetrieveValues_MPISBAIJ(Mat mat) 62 { 63 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 64 int ierr; 65 66 PetscFunctionBegin; 67 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 68 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 69 PetscFunctionReturn(0); 70 } 71 EXTERN_C_END 72 73 /* 74 Local utility routine that creates a mapping from the global column 75 number to the local number in the off-diagonal part of the local 76 storage of the matrix. This is done in a non scable way since the 77 length of colmap equals the global matrix length. 78 */ 79 #undef __FUNCT__ 80 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private" 81 static int CreateColmap_MPISBAIJ_Private(Mat mat) 82 { 83 PetscFunctionBegin; 84 SETERRQ(1,"Function not yet written for SBAIJ format"); 85 /* PetscFunctionReturn(0); */ 86 } 87 88 #define CHUNKSIZE 10 89 90 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 91 { \ 92 \ 93 brow = row/bs; \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow]; nrow = ailen[brow]; \ 96 bcol = col/bs; \ 97 ridx = row % bs; cidx = col % bs; \ 98 low = 0; high = nrow; \ 99 while (high-low > 3) { \ 100 t = (low+high)/2; \ 101 if (rp[t] > bcol) high = t; \ 102 else low = t; \ 103 } \ 104 for (_i=low; _i<high; _i++) { \ 105 if (rp[_i] > bcol) break; \ 106 if (rp[_i] == bcol) { \ 107 bap = ap + bs2*_i + bs*cidx + ridx; \ 108 if (addv == ADD_VALUES) *bap += value; \ 109 else *bap = value; \ 110 goto a_noinsert; \ 111 } \ 112 } \ 113 if (a->nonew == 1) goto a_noinsert; \ 114 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 115 if (nrow >= rmax) { \ 116 /* there is no extra room in row, therefore enlarge */ \ 117 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 118 MatScalar *new_a; \ 119 \ 120 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 121 \ 122 /* malloc new storage space */ \ 123 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 124 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 125 new_j = (int*)(new_a + bs2*new_nz); \ 126 new_i = new_j + new_nz; \ 127 \ 128 /* copy over old data into new slots */ \ 129 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 130 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 131 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 132 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 133 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 134 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 135 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 136 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 137 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 138 /* free up old matrix storage */ \ 139 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 140 if (!a->singlemalloc) { \ 141 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 142 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 143 } \ 144 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 145 a->singlemalloc = PETSC_TRUE; \ 146 \ 147 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 148 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 149 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 150 a->s_maxnz += bs2*CHUNKSIZE; \ 151 a->reallocs++; \ 152 a->s_nz++; \ 153 } \ 154 N = nrow++ - 1; \ 155 /* shift up all the later entries in this row */ \ 156 for (ii=N; ii>=_i; ii--) { \ 157 rp[ii+1] = rp[ii]; \ 158 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 159 } \ 160 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 161 rp[_i] = bcol; \ 162 ap[bs2*_i + bs*cidx + ridx] = value; \ 163 a_noinsert:; \ 164 ailen[brow] = nrow; \ 165 } 166 #ifndef MatSetValues_SeqBAIJ_B_Private 167 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 168 { \ 169 brow = row/bs; \ 170 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 171 rmax = bimax[brow]; nrow = bilen[brow]; \ 172 bcol = col/bs; \ 173 ridx = row % bs; cidx = col % bs; \ 174 low = 0; high = nrow; \ 175 while (high-low > 3) { \ 176 t = (low+high)/2; \ 177 if (rp[t] > bcol) high = t; \ 178 else low = t; \ 179 } \ 180 for (_i=low; _i<high; _i++) { \ 181 if (rp[_i] > bcol) break; \ 182 if (rp[_i] == bcol) { \ 183 bap = ap + bs2*_i + bs*cidx + ridx; \ 184 if (addv == ADD_VALUES) *bap += value; \ 185 else *bap = value; \ 186 goto b_noinsert; \ 187 } \ 188 } \ 189 if (b->nonew == 1) goto b_noinsert; \ 190 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 191 if (nrow >= rmax) { \ 192 /* there is no extra room in row, therefore enlarge */ \ 193 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 194 MatScalar *new_a; \ 195 \ 196 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 197 \ 198 /* malloc new storage space */ \ 199 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 200 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 201 new_j = (int*)(new_a + bs2*new_nz); \ 202 new_i = new_j + new_nz; \ 203 \ 204 /* copy over old data into new slots */ \ 205 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 206 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 207 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 208 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 209 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 210 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 211 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 212 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 213 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 214 /* free up old matrix storage */ \ 215 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 216 if (!b->singlemalloc) { \ 217 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 218 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 219 } \ 220 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 221 b->singlemalloc = PETSC_TRUE; \ 222 \ 223 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 224 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 225 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 226 b->maxnz += bs2*CHUNKSIZE; \ 227 b->reallocs++; \ 228 b->nz++; \ 229 } \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 for (ii=N; ii>=_i; ii--) { \ 233 rp[ii+1] = rp[ii]; \ 234 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 235 } \ 236 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 237 rp[_i] = bcol; \ 238 ap[bs2*_i + bs*cidx + ridx] = value; \ 239 b_noinsert:; \ 240 bilen[brow] = nrow; \ 241 } 242 #endif 243 244 #if defined(PETSC_USE_MAT_SINGLE) 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatSetValues_MPISBAIJ" 247 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 248 { 249 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 250 int ierr,i,N = m*n; 251 MatScalar *vsingle; 252 253 PetscFunctionBegin; 254 if (N > b->setvalueslen) { 255 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 256 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 257 b->setvalueslen = N; 258 } 259 vsingle = b->setvaluescopy; 260 261 for (i=0; i<N; i++) { 262 vsingle[i] = v[i]; 263 } 264 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 270 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 271 { 272 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 273 int ierr,i,N = m*n*b->bs2; 274 MatScalar *vsingle; 275 276 PetscFunctionBegin; 277 if (N > b->setvalueslen) { 278 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 279 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 280 b->setvalueslen = N; 281 } 282 vsingle = b->setvaluescopy; 283 for (i=0; i<N; i++) { 284 vsingle[i] = v[i]; 285 } 286 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 292 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 293 { 294 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 295 int ierr,i,N = m*n; 296 MatScalar *vsingle; 297 298 PetscFunctionBegin; 299 SETERRQ(1,"Function not yet written for SBAIJ format"); 300 /* PetscFunctionReturn(0); */ 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 305 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 306 { 307 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 308 int ierr,i,N = m*n*b->bs2; 309 MatScalar *vsingle; 310 311 PetscFunctionBegin; 312 SETERRQ(1,"Function not yet written for SBAIJ format"); 313 /* PetscFunctionReturn(0); */ 314 } 315 #endif 316 317 /* Only add/insert a(i,j) with i<=j (blocks). 318 Any a(i,j) with i>j input by user is ingored. 319 */ 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 322 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 323 { 324 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 325 MatScalar value; 326 PetscTruth roworiented = baij->roworiented; 327 int ierr,i,j,row,col; 328 int rstart_orig=baij->rstart_bs; 329 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 330 int cend_orig=baij->cend_bs,bs=baij->bs; 331 332 /* Some Variables required in the macro */ 333 Mat A = baij->A; 334 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 335 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 336 MatScalar *aa=a->a; 337 338 Mat B = baij->B; 339 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 340 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 341 MatScalar *ba=b->a; 342 343 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 344 int low,high,t,ridx,cidx,bs2=a->bs2; 345 MatScalar *ap,*bap; 346 347 /* for stash */ 348 int n_loc, *in_loc=0; 349 MatScalar *v_loc=0; 350 351 PetscFunctionBegin; 352 353 if(!baij->donotstash){ 354 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 355 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 356 } 357 358 for (i=0; i<m; i++) { 359 if (im[i] < 0) continue; 360 #if defined(PETSC_USE_BOPT_g) 361 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 362 #endif 363 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 364 row = im[i] - rstart_orig; /* local row index */ 365 for (j=0; j<n; j++) { 366 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 367 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 368 col = in[j] - cstart_orig; /* local col index */ 369 brow = row/bs; bcol = col/bs; 370 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 371 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 372 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 373 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 374 } else if (in[j] < 0) continue; 375 #if defined(PETSC_USE_BOPT_g) 376 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 377 #endif 378 else { /* off-diag entry (B) */ 379 if (mat->was_assembled) { 380 if (!baij->colmap) { 381 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 382 } 383 #if defined (PETSC_USE_CTABLE) 384 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 385 col = col - 1; 386 #else 387 col = baij->colmap[in[j]/bs] - 1; 388 #endif 389 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 390 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 391 col = in[j]; 392 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 393 B = baij->B; 394 b = (Mat_SeqBAIJ*)(B)->data; 395 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 396 ba=b->a; 397 } else col += in[j]%bs; 398 } else col = in[j]; 399 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 400 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 401 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 402 } 403 } 404 } else { /* off processor entry */ 405 if (!baij->donotstash) { 406 n_loc = 0; 407 for (j=0; j<n; j++){ 408 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 409 in_loc[n_loc] = in[j]; 410 if (roworiented) { 411 v_loc[n_loc] = v[i*n+j]; 412 } else { 413 v_loc[n_loc] = v[j*m+i]; 414 } 415 n_loc++; 416 } 417 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 418 } 419 } 420 } 421 422 if(!baij->donotstash){ 423 ierr = PetscFree(in_loc);CHKERRQ(ierr); 424 ierr = PetscFree(v_loc);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 431 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 432 { 433 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 434 MatScalar *value,*barray=baij->barray; 435 PetscTruth roworiented = baij->roworiented; 436 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 437 int rend=baij->rend,cstart=baij->cstart,stepval; 438 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 439 440 PetscFunctionBegin; 441 if(!barray) { 442 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 443 baij->barray = barray; 444 } 445 446 if (roworiented) { 447 stepval = (n-1)*bs; 448 } else { 449 stepval = (m-1)*bs; 450 } 451 for (i=0; i<m; i++) { 452 if (im[i] < 0) continue; 453 #if defined(PETSC_USE_BOPT_g) 454 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs); 455 #endif 456 if (im[i] >= rstart && im[i] < rend) { 457 row = im[i] - rstart; 458 for (j=0; j<n; j++) { 459 /* If NumCol = 1 then a copy is not required */ 460 if ((roworiented) && (n == 1)) { 461 barray = v + i*bs2; 462 } else if((!roworiented) && (m == 1)) { 463 barray = v + j*bs2; 464 } else { /* Here a copy is required */ 465 if (roworiented) { 466 value = v + i*(stepval+bs)*bs + j*bs; 467 } else { 468 value = v + j*(stepval+bs)*bs + i*bs; 469 } 470 for (ii=0; ii<bs; ii++,value+=stepval) { 471 for (jj=0; jj<bs; jj++) { 472 *barray++ = *value++; 473 } 474 } 475 barray -=bs2; 476 } 477 478 if (in[j] >= cstart && in[j] < cend){ 479 col = in[j] - cstart; 480 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 481 } 482 else if (in[j] < 0) continue; 483 #if defined(PETSC_USE_BOPT_g) 484 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);} 485 #endif 486 else { 487 if (mat->was_assembled) { 488 if (!baij->colmap) { 489 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 490 } 491 492 #if defined(PETSC_USE_BOPT_g) 493 #if defined (PETSC_USE_CTABLE) 494 { int data; 495 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 496 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 497 } 498 #else 499 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 500 #endif 501 #endif 502 #if defined (PETSC_USE_CTABLE) 503 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 504 col = (col - 1)/bs; 505 #else 506 col = (baij->colmap[in[j]] - 1)/bs; 507 #endif 508 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 509 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 510 col = in[j]; 511 } 512 } 513 else col = in[j]; 514 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 515 } 516 } 517 } else { 518 if (!baij->donotstash) { 519 if (roworiented) { 520 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 521 } else { 522 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 523 } 524 } 525 } 526 } 527 PetscFunctionReturn(0); 528 } 529 530 #define HASH_KEY 0.6180339887 531 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 532 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 533 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 534 #undef __FUNCT__ 535 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 536 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 537 { 538 PetscFunctionBegin; 539 SETERRQ(1,"Function not yet written for SBAIJ format"); 540 /* PetscFunctionReturn(0); */ 541 } 542 543 #undef __FUNCT__ 544 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 545 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 546 { 547 PetscFunctionBegin; 548 SETERRQ(1,"Function not yet written for SBAIJ format"); 549 /* PetscFunctionReturn(0); */ 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatGetValues_MPISBAIJ" 554 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 555 { 556 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 557 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 558 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 559 560 PetscFunctionBegin; 561 for (i=0; i<m; i++) { 562 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 563 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 564 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 565 row = idxm[i] - bsrstart; 566 for (j=0; j<n; j++) { 567 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 568 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 569 if (idxn[j] >= bscstart && idxn[j] < bscend){ 570 col = idxn[j] - bscstart; 571 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 572 } else { 573 if (!baij->colmap) { 574 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 575 } 576 #if defined (PETSC_USE_CTABLE) 577 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 578 data --; 579 #else 580 data = baij->colmap[idxn[j]/bs]-1; 581 #endif 582 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 583 else { 584 col = data + idxn[j]%bs; 585 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 586 } 587 } 588 } 589 } else { 590 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 591 } 592 } 593 PetscFunctionReturn(0); 594 } 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatNorm_MPISBAIJ" 598 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 599 { 600 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 601 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 602 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 603 int ierr; 604 PetscReal sum[2],*lnorm2; 605 606 PetscFunctionBegin; 607 if (baij->size == 1) { 608 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 609 } else { 610 if (type == NORM_FROBENIUS) { 611 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 612 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 613 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 614 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 615 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 616 /* 617 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 618 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 619 */ 620 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 621 /* 622 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 623 PetscSynchronizedFlush(mat->comm); */ 624 625 *norm = sqrt(sum[0] + 2*sum[1]); 626 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 627 } else { 628 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 /* 635 Creates the hash table, and sets the table 636 This table is created only once. 637 If new entried need to be added to the matrix 638 then the hash table has to be destroyed and 639 recreated. 640 */ 641 #undef __FUNCT__ 642 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 643 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 644 { 645 PetscFunctionBegin; 646 SETERRQ(1,"Function not yet written for SBAIJ format"); 647 /* PetscFunctionReturn(0); */ 648 } 649 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 652 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 653 { 654 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 655 int ierr,nstash,reallocs; 656 InsertMode addv; 657 658 PetscFunctionBegin; 659 if (baij->donotstash) { 660 PetscFunctionReturn(0); 661 } 662 663 /* make sure all processors are either in INSERTMODE or ADDMODE */ 664 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 665 if (addv == (ADD_VALUES|INSERT_VALUES)) { 666 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 667 } 668 mat->insertmode = addv; /* in case this processor had no cache */ 669 670 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 671 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 672 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 673 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 674 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 675 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 681 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 682 { 683 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 684 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 685 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 686 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 687 int *row,*col,other_disassembled; 688 PetscTruth r1,r2,r3; 689 MatScalar *val; 690 InsertMode addv = mat->insertmode; 691 #if defined(PETSC_HAVE_SPOOLES) 692 PetscTruth flag; 693 #endif 694 695 PetscFunctionBegin; 696 697 if (!baij->donotstash) { 698 while (1) { 699 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 700 /* 701 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 702 PetscSynchronizedFlush(mat->comm); 703 */ 704 if (!flg) break; 705 706 for (i=0; i<n;) { 707 /* Now identify the consecutive vals belonging to the same row */ 708 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 709 if (j < n) ncols = j-i; 710 else ncols = n-i; 711 /* Now assemble all these values with a single function call */ 712 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 713 i = j; 714 } 715 } 716 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 717 /* Now process the block-stash. Since the values are stashed column-oriented, 718 set the roworiented flag to column oriented, and after MatSetValues() 719 restore the original flags */ 720 r1 = baij->roworiented; 721 r2 = a->roworiented; 722 r3 = b->roworiented; 723 baij->roworiented = PETSC_FALSE; 724 a->roworiented = PETSC_FALSE; 725 b->roworiented = PETSC_FALSE; 726 while (1) { 727 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 728 if (!flg) break; 729 730 for (i=0; i<n;) { 731 /* Now identify the consecutive vals belonging to the same row */ 732 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 733 if (j < n) ncols = j-i; 734 else ncols = n-i; 735 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 736 i = j; 737 } 738 } 739 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 740 baij->roworiented = r1; 741 a->roworiented = r2; 742 b->roworiented = r3; 743 } 744 745 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 746 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 747 748 /* determine if any processor has disassembled, if so we must 749 also disassemble ourselfs, in order that we may reassemble. */ 750 /* 751 if nonzero structure of submatrix B cannot change then we know that 752 no processor disassembled thus we can skip this stuff 753 */ 754 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 755 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 756 if (mat->was_assembled && !other_disassembled) { 757 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 758 } 759 } 760 761 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 762 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 763 } 764 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 765 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 766 767 #if defined(PETSC_USE_BOPT_g) 768 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 769 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 770 baij->ht_total_ct = 0; 771 baij->ht_insert_ct = 0; 772 } 773 #endif 774 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 775 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 776 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 777 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 778 } 779 780 if (baij->rowvalues) { 781 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 782 baij->rowvalues = 0; 783 } 784 785 #if defined(PETSC_HAVE_SPOOLES) 786 ierr = PetscOptionsHasName(mat->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 787 if (flag) { ierr = MatUseSpooles_MPISBAIJ(mat);CHKERRQ(ierr); } 788 #endif 789 PetscFunctionReturn(0); 790 } 791 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 794 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 795 { 796 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 797 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 798 PetscTruth isascii,isdraw; 799 PetscViewer sviewer; 800 PetscViewerFormat format; 801 802 PetscFunctionBegin; 803 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 804 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 805 if (isascii) { 806 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 807 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 808 MatInfo info; 809 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 810 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 811 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 812 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 813 baij->bs,(int)info.memory);CHKERRQ(ierr); 814 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 816 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 818 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 819 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } else if (format == PETSC_VIEWER_ASCII_INFO) { 822 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 823 PetscFunctionReturn(0); 824 } 825 } 826 827 if (isdraw) { 828 PetscDraw draw; 829 PetscTruth isnull; 830 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 831 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 832 } 833 834 if (size == 1) { 835 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 836 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 837 } else { 838 /* assemble the entire matrix onto first processor. */ 839 Mat A; 840 Mat_SeqSBAIJ *Aloc; 841 Mat_SeqBAIJ *Bloc; 842 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 843 MatScalar *a; 844 845 if (!rank) { 846 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 847 } else { 848 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 849 } 850 PetscLogObjectParent(mat,A); 851 852 /* copy over the A part */ 853 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 854 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 855 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 856 857 for (i=0; i<mbs; i++) { 858 rvals[0] = bs*(baij->rstart + i); 859 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 860 for (j=ai[i]; j<ai[i+1]; j++) { 861 col = (baij->cstart+aj[j])*bs; 862 for (k=0; k<bs; k++) { 863 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 864 col++; a += bs; 865 } 866 } 867 } 868 /* copy over the B part */ 869 Bloc = (Mat_SeqBAIJ*)baij->B->data; 870 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 871 for (i=0; i<mbs; i++) { 872 rvals[0] = bs*(baij->rstart + i); 873 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 874 for (j=ai[i]; j<ai[i+1]; j++) { 875 col = baij->garray[aj[j]]*bs; 876 for (k=0; k<bs; k++) { 877 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 878 col++; a += bs; 879 } 880 } 881 } 882 ierr = PetscFree(rvals);CHKERRQ(ierr); 883 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 885 /* 886 Everyone has to call to draw the matrix since the graphics waits are 887 synchronized across all processors that share the PetscDraw object 888 */ 889 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 890 if (!rank) { 891 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 892 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 893 } 894 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 895 ierr = MatDestroy(A);CHKERRQ(ierr); 896 } 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatView_MPISBAIJ" 902 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 903 { 904 int ierr; 905 PetscTruth isascii,isdraw,issocket,isbinary; 906 907 PetscFunctionBegin; 908 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 909 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 910 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 911 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 912 if (isascii || isdraw || issocket || isbinary) { 913 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 914 } else { 915 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 916 } 917 PetscFunctionReturn(0); 918 } 919 920 #undef __FUNCT__ 921 #define __FUNCT__ "MatDestroy_MPISBAIJ" 922 int MatDestroy_MPISBAIJ(Mat mat) 923 { 924 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 925 int ierr; 926 927 PetscFunctionBegin; 928 #if defined(PETSC_USE_LOG) 929 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 930 #endif 931 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 932 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 933 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 934 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 935 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 936 #if defined (PETSC_USE_CTABLE) 937 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 938 #else 939 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 940 #endif 941 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 942 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 943 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 944 if (baij->slvec0) { 945 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 946 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 947 } 948 if (baij->slvec1) { 949 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 950 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 951 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 952 } 953 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 954 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 955 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 956 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 957 #if defined(PETSC_USE_MAT_SINGLE) 958 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 959 #endif 960 ierr = PetscFree(baij);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatMult_MPISBAIJ" 966 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 967 { 968 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 969 int ierr,nt,mbs=a->mbs,bs=a->bs; 970 PetscScalar *x,*from,zero=0.0; 971 972 PetscFunctionBegin; 973 /* 974 PetscSynchronizedPrintf(A->comm," _1comm is called ...\n"); 975 PetscSynchronizedFlush(A->comm); 976 */ 977 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 978 if (nt != A->n) { 979 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 980 } 981 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 982 if (nt != A->m) { 983 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 984 } 985 986 /* diagonal part */ 987 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 988 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 989 990 /* subdiagonal part */ 991 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 992 993 /* copy x into the vec slvec0 */ 994 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 995 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 996 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 997 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 998 999 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1000 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1002 1003 /* supperdiagonal part */ 1004 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1005 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1011 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1012 { 1013 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1014 int ierr,nt; 1015 1016 PetscFunctionBegin; 1017 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1018 if (nt != A->n) { 1019 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1020 } 1021 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1022 if (nt != A->m) { 1023 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1024 } 1025 1026 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1027 /* do diagonal part */ 1028 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1029 /* do supperdiagonal part */ 1030 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1031 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1032 /* do subdiagonal part */ 1033 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1034 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1036 1037 PetscFunctionReturn(0); 1038 } 1039 1040 #undef __FUNCT__ 1041 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1042 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1043 { 1044 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1045 int ierr,mbs=a->mbs,bs=a->bs; 1046 PetscScalar *x,*from,zero=0.0; 1047 1048 PetscFunctionBegin; 1049 /* 1050 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1051 PetscSynchronizedFlush(A->comm); 1052 */ 1053 /* diagonal part */ 1054 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1055 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1056 1057 /* subdiagonal part */ 1058 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1059 1060 /* copy x into the vec slvec0 */ 1061 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1062 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1063 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1064 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1065 1066 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1067 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1068 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1069 1070 /* supperdiagonal part */ 1071 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1072 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1078 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1079 { 1080 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1081 int ierr; 1082 1083 PetscFunctionBegin; 1084 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1085 /* do diagonal part */ 1086 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1087 /* do supperdiagonal part */ 1088 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1089 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1090 1091 /* do subdiagonal part */ 1092 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1093 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1094 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1095 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1101 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1102 { 1103 PetscFunctionBegin; 1104 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1105 /* PetscFunctionReturn(0); */ 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1110 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 PetscFunctionBegin; 1113 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1114 /* PetscFunctionReturn(0); */ 1115 } 1116 1117 /* 1118 This only works correctly for square matrices where the subblock A->A is the 1119 diagonal block 1120 */ 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1123 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1124 { 1125 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1126 int ierr; 1127 1128 PetscFunctionBegin; 1129 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1130 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatScale_MPISBAIJ" 1136 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A) 1137 { 1138 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1139 int ierr; 1140 1141 PetscFunctionBegin; 1142 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1143 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1149 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1150 { 1151 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1152 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1153 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1154 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1155 int *cmap,*idx_p,cstart = mat->cstart; 1156 1157 PetscFunctionBegin; 1158 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1159 mat->getrowactive = PETSC_TRUE; 1160 1161 if (!mat->rowvalues && (idx || v)) { 1162 /* 1163 allocate enough space to hold information from the longest row. 1164 */ 1165 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1166 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1167 int max = 1,mbs = mat->mbs,tmp; 1168 for (i=0; i<mbs; i++) { 1169 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1170 if (max < tmp) { max = tmp; } 1171 } 1172 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1173 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1174 } 1175 1176 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1177 lrow = row - brstart; /* local row index */ 1178 1179 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1180 if (!v) {pvA = 0; pvB = 0;} 1181 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1182 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1183 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1184 nztot = nzA + nzB; 1185 1186 cmap = mat->garray; 1187 if (v || idx) { 1188 if (nztot) { 1189 /* Sort by increasing column numbers, assuming A and B already sorted */ 1190 int imark = -1; 1191 if (v) { 1192 *v = v_p = mat->rowvalues; 1193 for (i=0; i<nzB; i++) { 1194 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1195 else break; 1196 } 1197 imark = i; 1198 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1199 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1200 } 1201 if (idx) { 1202 *idx = idx_p = mat->rowindices; 1203 if (imark > -1) { 1204 for (i=0; i<imark; i++) { 1205 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1206 } 1207 } else { 1208 for (i=0; i<nzB; i++) { 1209 if (cmap[cworkB[i]/bs] < cstart) 1210 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1211 else break; 1212 } 1213 imark = i; 1214 } 1215 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1216 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1217 } 1218 } else { 1219 if (idx) *idx = 0; 1220 if (v) *v = 0; 1221 } 1222 } 1223 *nz = nztot; 1224 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1225 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1231 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1232 { 1233 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1234 1235 PetscFunctionBegin; 1236 if (baij->getrowactive == PETSC_FALSE) { 1237 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1238 } 1239 baij->getrowactive = PETSC_FALSE; 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1245 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1246 { 1247 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1248 1249 PetscFunctionBegin; 1250 *bs = baij->bs; 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1256 int MatZeroEntries_MPISBAIJ(Mat A) 1257 { 1258 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1259 int ierr; 1260 1261 PetscFunctionBegin; 1262 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1263 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1269 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1270 { 1271 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1272 Mat A = a->A,B = a->B; 1273 int ierr; 1274 PetscReal isend[5],irecv[5]; 1275 1276 PetscFunctionBegin; 1277 info->block_size = (PetscReal)a->bs; 1278 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1279 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1280 isend[3] = info->memory; isend[4] = info->mallocs; 1281 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1282 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1283 isend[3] += info->memory; isend[4] += info->mallocs; 1284 if (flag == MAT_LOCAL) { 1285 info->nz_used = isend[0]; 1286 info->nz_allocated = isend[1]; 1287 info->nz_unneeded = isend[2]; 1288 info->memory = isend[3]; 1289 info->mallocs = isend[4]; 1290 } else if (flag == MAT_GLOBAL_MAX) { 1291 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1292 info->nz_used = irecv[0]; 1293 info->nz_allocated = irecv[1]; 1294 info->nz_unneeded = irecv[2]; 1295 info->memory = irecv[3]; 1296 info->mallocs = irecv[4]; 1297 } else if (flag == MAT_GLOBAL_SUM) { 1298 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1299 info->nz_used = irecv[0]; 1300 info->nz_allocated = irecv[1]; 1301 info->nz_unneeded = irecv[2]; 1302 info->memory = irecv[3]; 1303 info->mallocs = irecv[4]; 1304 } else { 1305 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1306 } 1307 info->rows_global = (PetscReal)A->M; 1308 info->columns_global = (PetscReal)A->N; 1309 info->rows_local = (PetscReal)A->m; 1310 info->columns_local = (PetscReal)A->N; 1311 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1312 info->fill_ratio_needed = 0; 1313 info->factor_mallocs = 0; 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1319 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1320 { 1321 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1322 int ierr; 1323 1324 PetscFunctionBegin; 1325 switch (op) { 1326 case MAT_NO_NEW_NONZERO_LOCATIONS: 1327 case MAT_YES_NEW_NONZERO_LOCATIONS: 1328 case MAT_COLUMNS_UNSORTED: 1329 case MAT_COLUMNS_SORTED: 1330 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1331 case MAT_KEEP_ZEROED_ROWS: 1332 case MAT_NEW_NONZERO_LOCATION_ERR: 1333 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1334 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1335 break; 1336 case MAT_ROW_ORIENTED: 1337 a->roworiented = PETSC_TRUE; 1338 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1339 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1340 break; 1341 case MAT_ROWS_SORTED: 1342 case MAT_ROWS_UNSORTED: 1343 case MAT_YES_NEW_DIAGONALS: 1344 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1345 break; 1346 case MAT_COLUMN_ORIENTED: 1347 a->roworiented = PETSC_FALSE; 1348 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1349 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1350 break; 1351 case MAT_IGNORE_OFF_PROC_ENTRIES: 1352 a->donotstash = PETSC_TRUE; 1353 break; 1354 case MAT_NO_NEW_DIAGONALS: 1355 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1356 case MAT_USE_HASH_TABLE: 1357 a->ht_flag = PETSC_TRUE; 1358 break; 1359 default: 1360 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1361 } 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1367 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1368 { 1369 int ierr; 1370 PetscFunctionBegin; 1371 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1377 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1378 { 1379 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1380 Mat a = baij->A,b = baij->B; 1381 int ierr,s1,s2,s3; 1382 1383 PetscFunctionBegin; 1384 if (ll != rr) { 1385 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1386 } 1387 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1388 if (rr) { 1389 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1390 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1391 /* Overlap communication with computation. */ 1392 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1393 /*} if (ll) { */ 1394 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1395 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1396 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1397 /* } */ 1398 /* scale the diagonal block */ 1399 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1400 1401 /* if (rr) { */ 1402 /* Do a scatter end and then right scale the off-diagonal block */ 1403 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1404 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1405 } 1406 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1412 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag) 1413 { 1414 PetscFunctionBegin; 1415 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1420 int MatPrintHelp_MPISBAIJ(Mat A) 1421 { 1422 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1423 MPI_Comm comm = A->comm; 1424 static int called = 0; 1425 int ierr; 1426 1427 PetscFunctionBegin; 1428 if (!a->rank) { 1429 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1430 } 1431 if (called) {PetscFunctionReturn(0);} else called = 1; 1432 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1433 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1439 int MatSetUnfactored_MPISBAIJ(Mat A) 1440 { 1441 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1442 int ierr; 1443 1444 PetscFunctionBegin; 1445 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1446 PetscFunctionReturn(0); 1447 } 1448 1449 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatEqual_MPISBAIJ" 1453 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1454 { 1455 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1456 Mat a,b,c,d; 1457 PetscTruth flg; 1458 int ierr; 1459 1460 PetscFunctionBegin; 1461 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr); 1462 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1463 a = matA->A; b = matA->B; 1464 c = matB->A; d = matB->B; 1465 1466 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1467 if (flg == PETSC_TRUE) { 1468 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1469 } 1470 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1476 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1477 { 1478 int ierr; 1479 1480 PetscFunctionBegin; 1481 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 /* -------------------------------------------------------------------*/ 1485 static struct _MatOps MatOps_Values = { 1486 MatSetValues_MPISBAIJ, 1487 MatGetRow_MPISBAIJ, 1488 MatRestoreRow_MPISBAIJ, 1489 MatMult_MPISBAIJ, 1490 MatMultAdd_MPISBAIJ, 1491 MatMultTranspose_MPISBAIJ, 1492 MatMultTransposeAdd_MPISBAIJ, 1493 0, 1494 0, 1495 0, 1496 0, 1497 0, 1498 0, 1499 MatRelax_MPISBAIJ, 1500 MatTranspose_MPISBAIJ, 1501 MatGetInfo_MPISBAIJ, 1502 MatEqual_MPISBAIJ, 1503 MatGetDiagonal_MPISBAIJ, 1504 MatDiagonalScale_MPISBAIJ, 1505 MatNorm_MPISBAIJ, 1506 MatAssemblyBegin_MPISBAIJ, 1507 MatAssemblyEnd_MPISBAIJ, 1508 0, 1509 MatSetOption_MPISBAIJ, 1510 MatZeroEntries_MPISBAIJ, 1511 MatZeroRows_MPISBAIJ, 1512 0, 1513 0, 1514 0, 1515 0, 1516 MatSetUpPreallocation_MPISBAIJ, 1517 0, 1518 0, 1519 0, 1520 0, 1521 MatDuplicate_MPISBAIJ, 1522 0, 1523 0, 1524 0, 1525 0, 1526 0, 1527 0, 1528 0, 1529 MatGetValues_MPISBAIJ, 1530 0, 1531 MatPrintHelp_MPISBAIJ, 1532 MatScale_MPISBAIJ, 1533 0, 1534 0, 1535 0, 1536 MatGetBlockSize_MPISBAIJ, 1537 0, 1538 0, 1539 0, 1540 0, 1541 0, 1542 0, 1543 MatSetUnfactored_MPISBAIJ, 1544 0, 1545 MatSetValuesBlocked_MPISBAIJ, 1546 0, 1547 0, 1548 0, 1549 MatGetPetscMaps_Petsc, 1550 0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 0, 1556 MatGetRowMax_MPISBAIJ}; 1557 1558 1559 EXTERN_C_BEGIN 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1562 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1563 { 1564 PetscFunctionBegin; 1565 *a = ((Mat_MPISBAIJ *)A->data)->A; 1566 *iscopy = PETSC_FALSE; 1567 PetscFunctionReturn(0); 1568 } 1569 EXTERN_C_END 1570 1571 EXTERN_C_BEGIN 1572 #undef __FUNCT__ 1573 #define __FUNCT__ "MatCreate_MPISBAIJ" 1574 int MatCreate_MPISBAIJ(Mat B) 1575 { 1576 Mat_MPISBAIJ *b; 1577 int ierr; 1578 PetscTruth flg; 1579 1580 PetscFunctionBegin; 1581 1582 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1583 B->data = (void*)b; 1584 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1585 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1586 1587 B->ops->destroy = MatDestroy_MPISBAIJ; 1588 B->ops->view = MatView_MPISBAIJ; 1589 B->mapping = 0; 1590 B->factor = 0; 1591 B->assembled = PETSC_FALSE; 1592 1593 B->insertmode = NOT_SET_VALUES; 1594 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1595 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1596 1597 /* build local table of row and column ownerships */ 1598 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1599 b->cowners = b->rowners + b->size + 2; 1600 b->rowners_bs = b->cowners + b->size + 2; 1601 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1602 1603 /* build cache for off array entries formed */ 1604 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1605 b->donotstash = PETSC_FALSE; 1606 b->colmap = PETSC_NULL; 1607 b->garray = PETSC_NULL; 1608 b->roworiented = PETSC_TRUE; 1609 1610 #if defined(PETSC_USE_MAT_SINGLE) 1611 /* stuff for MatSetValues_XXX in single precision */ 1612 b->setvalueslen = 0; 1613 b->setvaluescopy = PETSC_NULL; 1614 #endif 1615 1616 /* stuff used in block assembly */ 1617 b->barray = 0; 1618 1619 /* stuff used for matrix vector multiply */ 1620 b->lvec = 0; 1621 b->Mvctx = 0; 1622 b->slvec0 = 0; 1623 b->slvec0b = 0; 1624 b->slvec1 = 0; 1625 b->slvec1a = 0; 1626 b->slvec1b = 0; 1627 b->sMvctx = 0; 1628 1629 /* stuff for MatGetRow() */ 1630 b->rowindices = 0; 1631 b->rowvalues = 0; 1632 b->getrowactive = PETSC_FALSE; 1633 1634 /* hash table stuff */ 1635 b->ht = 0; 1636 b->hd = 0; 1637 b->ht_size = 0; 1638 b->ht_flag = PETSC_FALSE; 1639 b->ht_fact = 0; 1640 b->ht_total_ct = 0; 1641 b->ht_insert_ct = 0; 1642 1643 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1644 if (flg) { 1645 PetscReal fact = 1.39; 1646 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1647 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1648 if (fact <= 1.0) fact = 1.39; 1649 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1650 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1651 } 1652 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1653 "MatStoreValues_MPISBAIJ", 1654 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1655 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1656 "MatRetrieveValues_MPISBAIJ", 1657 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1658 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1659 "MatGetDiagonalBlock_MPISBAIJ", 1660 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1661 PetscFunctionReturn(0); 1662 } 1663 EXTERN_C_END 1664 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1667 /*@C 1668 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1669 the user should preallocate the matrix storage by setting the parameters 1670 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1671 performance can be increased by more than a factor of 50. 1672 1673 Collective on Mat 1674 1675 Input Parameters: 1676 + A - the matrix 1677 . bs - size of blockk 1678 . d_nz - number of block nonzeros per block row in diagonal portion of local 1679 submatrix (same for all local rows) 1680 . d_nnz - array containing the number of block nonzeros in the various block rows 1681 in the upper triangular and diagonal part of the in diagonal portion of the local 1682 (possibly different for each block row) or PETSC_NULL. You must leave room 1683 for the diagonal entry even if it is zero. 1684 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1685 submatrix (same for all local rows). 1686 - o_nnz - array containing the number of nonzeros in the various block rows of the 1687 off-diagonal portion of the local submatrix (possibly different for 1688 each block row) or PETSC_NULL. 1689 1690 1691 Options Database Keys: 1692 . -mat_no_unroll - uses code that does not unroll the loops in the 1693 block calculations (much slower) 1694 . -mat_block_size - size of the blocks to use 1695 1696 Notes: 1697 1698 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1699 than it must be used on all processors that share the object for that argument. 1700 1701 Storage Information: 1702 For a square global matrix we define each processor's diagonal portion 1703 to be its local rows and the corresponding columns (a square submatrix); 1704 each processor's off-diagonal portion encompasses the remainder of the 1705 local matrix (a rectangular submatrix). 1706 1707 The user can specify preallocated storage for the diagonal part of 1708 the local submatrix with either d_nz or d_nnz (not both). Set 1709 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1710 memory allocation. Likewise, specify preallocated storage for the 1711 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1712 1713 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1714 the figure below we depict these three local rows and all columns (0-11). 1715 1716 .vb 1717 0 1 2 3 4 5 6 7 8 9 10 11 1718 ------------------- 1719 row 3 | o o o d d d o o o o o o 1720 row 4 | o o o d d d o o o o o o 1721 row 5 | o o o d d d o o o o o o 1722 ------------------- 1723 .ve 1724 1725 Thus, any entries in the d locations are stored in the d (diagonal) 1726 submatrix, and any entries in the o locations are stored in the 1727 o (off-diagonal) submatrix. Note that the d matrix is stored in 1728 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1729 1730 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1731 plus the diagonal part of the d matrix, 1732 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1733 In general, for PDE problems in which most nonzeros are near the diagonal, 1734 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1735 or you will get TERRIBLE performance; see the users' manual chapter on 1736 matrices. 1737 1738 Level: intermediate 1739 1740 .keywords: matrix, block, aij, compressed row, sparse, parallel 1741 1742 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1743 @*/ 1744 1745 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1746 { 1747 Mat_MPISBAIJ *b; 1748 int ierr,i,mbs,Mbs; 1749 PetscTruth flg2; 1750 1751 PetscFunctionBegin; 1752 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr); 1753 if (!flg2) PetscFunctionReturn(0); 1754 1755 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1756 1757 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1758 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1759 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1760 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1761 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1762 if (d_nnz) { 1763 for (i=0; i<B->m/bs; i++) { 1764 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1765 } 1766 } 1767 if (o_nnz) { 1768 for (i=0; i<B->m/bs; i++) { 1769 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1770 } 1771 } 1772 B->preallocated = PETSC_TRUE; 1773 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1774 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1775 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1776 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1777 1778 b = (Mat_MPISBAIJ*)B->data; 1779 mbs = B->m/bs; 1780 Mbs = B->M/bs; 1781 if (mbs*bs != B->m) { 1782 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1783 } 1784 1785 b->bs = bs; 1786 b->bs2 = bs*bs; 1787 b->mbs = mbs; 1788 b->nbs = mbs; 1789 b->Mbs = Mbs; 1790 b->Nbs = Mbs; 1791 1792 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1793 b->rowners[0] = 0; 1794 for (i=2; i<=b->size; i++) { 1795 b->rowners[i] += b->rowners[i-1]; 1796 } 1797 b->rstart = b->rowners[b->rank]; 1798 b->rend = b->rowners[b->rank+1]; 1799 b->cstart = b->rstart; 1800 b->cend = b->rend; 1801 for (i=0; i<=b->size; i++) { 1802 b->rowners_bs[i] = b->rowners[i]*bs; 1803 } 1804 b->rstart_bs = b-> rstart*bs; 1805 b->rend_bs = b->rend*bs; 1806 1807 b->cstart_bs = b->cstart*bs; 1808 b->cend_bs = b->cend*bs; 1809 1810 1811 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1812 PetscLogObjectParent(B,b->A); 1813 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1814 PetscLogObjectParent(B,b->B); 1815 1816 /* build cache for off array entries formed */ 1817 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1818 1819 PetscFunctionReturn(0); 1820 } 1821 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "MatCreateMPISBAIJ" 1824 /*@C 1825 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1826 (block compressed row). For good matrix assembly performance 1827 the user should preallocate the matrix storage by setting the parameters 1828 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1829 performance can be increased by more than a factor of 50. 1830 1831 Collective on MPI_Comm 1832 1833 Input Parameters: 1834 + comm - MPI communicator 1835 . bs - size of blockk 1836 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1837 This value should be the same as the local size used in creating the 1838 y vector for the matrix-vector product y = Ax. 1839 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1840 This value should be the same as the local size used in creating the 1841 x vector for the matrix-vector product y = Ax. 1842 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1843 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1844 . d_nz - number of block nonzeros per block row in diagonal portion of local 1845 submatrix (same for all local rows) 1846 . d_nnz - array containing the number of block nonzeros in the various block rows 1847 in the upper triangular portion of the in diagonal portion of the local 1848 (possibly different for each block block row) or PETSC_NULL. 1849 You must leave room for the diagonal entry even if it is zero. 1850 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1851 submatrix (same for all local rows). 1852 - o_nnz - array containing the number of nonzeros in the various block rows of the 1853 off-diagonal portion of the local submatrix (possibly different for 1854 each block row) or PETSC_NULL. 1855 1856 Output Parameter: 1857 . A - the matrix 1858 1859 Options Database Keys: 1860 . -mat_no_unroll - uses code that does not unroll the loops in the 1861 block calculations (much slower) 1862 . -mat_block_size - size of the blocks to use 1863 . -mat_mpi - use the parallel matrix data structures even on one processor 1864 (defaults to using SeqBAIJ format on one processor) 1865 1866 Notes: 1867 The user MUST specify either the local or global matrix dimensions 1868 (possibly both). 1869 1870 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1871 than it must be used on all processors that share the object for that argument. 1872 1873 Storage Information: 1874 For a square global matrix we define each processor's diagonal portion 1875 to be its local rows and the corresponding columns (a square submatrix); 1876 each processor's off-diagonal portion encompasses the remainder of the 1877 local matrix (a rectangular submatrix). 1878 1879 The user can specify preallocated storage for the diagonal part of 1880 the local submatrix with either d_nz or d_nnz (not both). Set 1881 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1882 memory allocation. Likewise, specify preallocated storage for the 1883 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1884 1885 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1886 the figure below we depict these three local rows and all columns (0-11). 1887 1888 .vb 1889 0 1 2 3 4 5 6 7 8 9 10 11 1890 ------------------- 1891 row 3 | o o o d d d o o o o o o 1892 row 4 | o o o d d d o o o o o o 1893 row 5 | o o o d d d o o o o o o 1894 ------------------- 1895 .ve 1896 1897 Thus, any entries in the d locations are stored in the d (diagonal) 1898 submatrix, and any entries in the o locations are stored in the 1899 o (off-diagonal) submatrix. Note that the d matrix is stored in 1900 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1901 1902 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1903 plus the diagonal part of the d matrix, 1904 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1905 In general, for PDE problems in which most nonzeros are near the diagonal, 1906 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1907 or you will get TERRIBLE performance; see the users' manual chapter on 1908 matrices. 1909 1910 Level: intermediate 1911 1912 .keywords: matrix, block, aij, compressed row, sparse, parallel 1913 1914 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1915 @*/ 1916 1917 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1918 { 1919 int ierr,size; 1920 1921 PetscFunctionBegin; 1922 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1923 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1924 if (size > 1) { 1925 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1926 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1927 } else { 1928 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1929 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1930 } 1931 PetscFunctionReturn(0); 1932 } 1933 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1937 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1938 { 1939 Mat mat; 1940 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1941 int ierr,len=0; 1942 1943 PetscFunctionBegin; 1944 *newmat = 0; 1945 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1946 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 1947 mat->preallocated = PETSC_TRUE; 1948 a = (Mat_MPISBAIJ*)mat->data; 1949 a->bs = oldmat->bs; 1950 a->bs2 = oldmat->bs2; 1951 a->mbs = oldmat->mbs; 1952 a->nbs = oldmat->nbs; 1953 a->Mbs = oldmat->Mbs; 1954 a->Nbs = oldmat->Nbs; 1955 1956 a->rstart = oldmat->rstart; 1957 a->rend = oldmat->rend; 1958 a->cstart = oldmat->cstart; 1959 a->cend = oldmat->cend; 1960 a->size = oldmat->size; 1961 a->rank = oldmat->rank; 1962 a->donotstash = oldmat->donotstash; 1963 a->roworiented = oldmat->roworiented; 1964 a->rowindices = 0; 1965 a->rowvalues = 0; 1966 a->getrowactive = PETSC_FALSE; 1967 a->barray = 0; 1968 a->rstart_bs = oldmat->rstart_bs; 1969 a->rend_bs = oldmat->rend_bs; 1970 a->cstart_bs = oldmat->cstart_bs; 1971 a->cend_bs = oldmat->cend_bs; 1972 1973 /* hash table stuff */ 1974 a->ht = 0; 1975 a->hd = 0; 1976 a->ht_size = 0; 1977 a->ht_flag = oldmat->ht_flag; 1978 a->ht_fact = oldmat->ht_fact; 1979 a->ht_total_ct = 0; 1980 a->ht_insert_ct = 0; 1981 1982 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1983 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1984 a->cowners = a->rowners + a->size + 2; 1985 a->rowners_bs = a->cowners + a->size + 2; 1986 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1987 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1988 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 1989 if (oldmat->colmap) { 1990 #if defined (PETSC_USE_CTABLE) 1991 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1992 #else 1993 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1994 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1995 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 1996 #endif 1997 } else a->colmap = 0; 1998 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 1999 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2000 PetscLogObjectMemory(mat,len*sizeof(int)); 2001 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2002 } else a->garray = 0; 2003 2004 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2005 PetscLogObjectParent(mat,a->lvec); 2006 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2007 2008 PetscLogObjectParent(mat,a->Mvctx); 2009 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2010 PetscLogObjectParent(mat,a->A); 2011 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2012 PetscLogObjectParent(mat,a->B); 2013 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2014 *newmat = mat; 2015 PetscFunctionReturn(0); 2016 } 2017 2018 #include "petscsys.h" 2019 2020 EXTERN_C_BEGIN 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "MatLoad_MPISBAIJ" 2023 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2024 { 2025 Mat A; 2026 int i,nz,ierr,j,rstart,rend,fd; 2027 PetscScalar *vals,*buf; 2028 MPI_Comm comm = ((PetscObject)viewer)->comm; 2029 MPI_Status status; 2030 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2031 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2032 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2033 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2034 int dcount,kmax,k,nzcount,tmp; 2035 #if defined(PETSC_HAVE_SPOOLES) 2036 PetscTruth flag; 2037 #endif 2038 2039 PetscFunctionBegin; 2040 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2041 2042 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2043 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2044 if (!rank) { 2045 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2046 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2047 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2048 if (header[3] < 0) { 2049 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2050 } 2051 } 2052 2053 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2054 M = header[1]; N = header[2]; 2055 2056 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2057 2058 /* 2059 This code adds extra rows to make sure the number of rows is 2060 divisible by the blocksize 2061 */ 2062 Mbs = M/bs; 2063 extra_rows = bs - M + bs*(Mbs); 2064 if (extra_rows == bs) extra_rows = 0; 2065 else Mbs++; 2066 if (extra_rows &&!rank) { 2067 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2068 } 2069 2070 /* determine ownership of all rows */ 2071 mbs = Mbs/size + ((Mbs % size) > rank); 2072 m = mbs*bs; 2073 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2074 browners = rowners + size + 1; 2075 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2076 rowners[0] = 0; 2077 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2078 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2079 rstart = rowners[rank]; 2080 rend = rowners[rank+1]; 2081 2082 /* distribute row lengths to all processors */ 2083 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2084 if (!rank) { 2085 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2086 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2087 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2088 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2089 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2090 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2091 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2092 } else { 2093 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2094 } 2095 2096 if (!rank) { /* procs[0] */ 2097 /* calculate the number of nonzeros on each processor */ 2098 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2099 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2100 for (i=0; i<size; i++) { 2101 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2102 procsnz[i] += rowlengths[j]; 2103 } 2104 } 2105 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2106 2107 /* determine max buffer needed and allocate it */ 2108 maxnz = 0; 2109 for (i=0; i<size; i++) { 2110 maxnz = PetscMax(maxnz,procsnz[i]); 2111 } 2112 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2113 2114 /* read in my part of the matrix column indices */ 2115 nz = procsnz[0]; 2116 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2117 mycols = ibuf; 2118 if (size == 1) nz -= extra_rows; 2119 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2120 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2121 2122 /* read in every ones (except the last) and ship off */ 2123 for (i=1; i<size-1; i++) { 2124 nz = procsnz[i]; 2125 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2126 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2127 } 2128 /* read in the stuff for the last proc */ 2129 if (size != 1) { 2130 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2131 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2132 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2133 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2134 } 2135 ierr = PetscFree(cols);CHKERRQ(ierr); 2136 } else { /* procs[i], i>0 */ 2137 /* determine buffer space needed for message */ 2138 nz = 0; 2139 for (i=0; i<m; i++) { 2140 nz += locrowlens[i]; 2141 } 2142 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2143 mycols = ibuf; 2144 /* receive message of column indices*/ 2145 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2146 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2147 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2148 } 2149 2150 /* loop over local rows, determining number of off diagonal entries */ 2151 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2152 odlens = dlens + (rend-rstart); 2153 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2154 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2155 masked1 = mask + Mbs; 2156 masked2 = masked1 + Mbs; 2157 rowcount = 0; nzcount = 0; 2158 for (i=0; i<mbs; i++) { 2159 dcount = 0; 2160 odcount = 0; 2161 for (j=0; j<bs; j++) { 2162 kmax = locrowlens[rowcount]; 2163 for (k=0; k<kmax; k++) { 2164 tmp = mycols[nzcount++]/bs; /* block col. index */ 2165 if (!mask[tmp]) { 2166 mask[tmp] = 1; 2167 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2168 else masked1[dcount++] = tmp; /* entry in diag portion */ 2169 } 2170 } 2171 rowcount++; 2172 } 2173 2174 dlens[i] = dcount; /* d_nzz[i] */ 2175 odlens[i] = odcount; /* o_nzz[i] */ 2176 2177 /* zero out the mask elements we set */ 2178 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2179 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2180 } 2181 2182 /* create our matrix */ 2183 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2184 CHKERRQ(ierr); 2185 A = *newmat; 2186 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2187 2188 if (!rank) { 2189 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2190 /* read in my part of the matrix numerical values */ 2191 nz = procsnz[0]; 2192 vals = buf; 2193 mycols = ibuf; 2194 if (size == 1) nz -= extra_rows; 2195 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2196 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2197 2198 /* insert into matrix */ 2199 jj = rstart*bs; 2200 for (i=0; i<m; i++) { 2201 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2202 mycols += locrowlens[i]; 2203 vals += locrowlens[i]; 2204 jj++; 2205 } 2206 2207 /* read in other processors (except the last one) and ship out */ 2208 for (i=1; i<size-1; i++) { 2209 nz = procsnz[i]; 2210 vals = buf; 2211 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2212 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2213 } 2214 /* the last proc */ 2215 if (size != 1){ 2216 nz = procsnz[i] - extra_rows; 2217 vals = buf; 2218 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2219 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2220 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2221 } 2222 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2223 2224 } else { 2225 /* receive numeric values */ 2226 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2227 2228 /* receive message of values*/ 2229 vals = buf; 2230 mycols = ibuf; 2231 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2232 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2233 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2234 2235 /* insert into matrix */ 2236 jj = rstart*bs; 2237 for (i=0; i<m; i++) { 2238 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2239 mycols += locrowlens[i]; 2240 vals += locrowlens[i]; 2241 jj++; 2242 } 2243 } 2244 2245 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2246 ierr = PetscFree(buf);CHKERRQ(ierr); 2247 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2248 ierr = PetscFree(rowners);CHKERRQ(ierr); 2249 ierr = PetscFree(dlens);CHKERRQ(ierr); 2250 ierr = PetscFree(mask);CHKERRQ(ierr); 2251 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2252 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2253 #if defined(PETSC_HAVE_SPOOLES) 2254 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 2255 if (flag) { 2256 if (size == 1) { 2257 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2258 } else { 2259 ierr = MatUseSpooles_MPISBAIJ(A);CHKERRQ(ierr); 2260 } 2261 } 2262 #endif 2263 PetscFunctionReturn(0); 2264 } 2265 EXTERN_C_END 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2269 /*@ 2270 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2271 2272 Input Parameters: 2273 . mat - the matrix 2274 . fact - factor 2275 2276 Collective on Mat 2277 2278 Level: advanced 2279 2280 Notes: 2281 This can also be set by the command line option: -mat_use_hash_table fact 2282 2283 .keywords: matrix, hashtable, factor, HT 2284 2285 .seealso: MatSetOption() 2286 @*/ 2287 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2288 { 2289 PetscFunctionBegin; 2290 SETERRQ(1,"Function not yet written for SBAIJ format"); 2291 /* PetscFunctionReturn(0); */ 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2296 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2297 { 2298 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2299 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2300 PetscReal atmp; 2301 PetscReal *work,*svalues,*rvalues; 2302 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2303 int rank,size,*rowners_bs,dest,count,source; 2304 PetscScalar *va; 2305 MatScalar *ba; 2306 MPI_Status stat; 2307 2308 PetscFunctionBegin; 2309 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2310 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2311 2312 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2313 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2314 2315 bs = a->bs; 2316 mbs = a->mbs; 2317 Mbs = a->Mbs; 2318 ba = b->a; 2319 bi = b->i; 2320 bj = b->j; 2321 /* 2322 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2323 PetscSynchronizedFlush(A->comm); 2324 */ 2325 2326 /* find ownerships */ 2327 rowners_bs = a->rowners_bs; 2328 /* 2329 if (!rank){ 2330 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2331 } 2332 */ 2333 2334 /* each proc creates an array to be distributed */ 2335 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2336 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2337 2338 /* row_max for B */ 2339 if (rank != size-1){ 2340 for (i=0; i<mbs; i++) { 2341 ncols = bi[1] - bi[0]; bi++; 2342 brow = bs*i; 2343 for (j=0; j<ncols; j++){ 2344 bcol = bs*(*bj); 2345 for (kcol=0; kcol<bs; kcol++){ 2346 col = bcol + kcol; /* local col index */ 2347 col += rowners_bs[rank+1]; /* global col index */ 2348 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2349 for (krow=0; krow<bs; krow++){ 2350 atmp = PetscAbsScalar(*ba); ba++; 2351 row = brow + krow; /* local row index */ 2352 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2353 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2354 if (work[col] < atmp) work[col] = atmp; 2355 } 2356 } 2357 bj++; 2358 } 2359 } 2360 /* 2361 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2362 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2363 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2364 */ 2365 2366 /* send values to its owners */ 2367 for (dest=rank+1; dest<size; dest++){ 2368 svalues = work + rowners_bs[dest]; 2369 count = rowners_bs[dest+1]-rowners_bs[dest]; 2370 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2371 /* 2372 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2373 PetscSynchronizedFlush(A->comm); 2374 */ 2375 } 2376 } 2377 2378 /* receive values */ 2379 if (rank){ 2380 rvalues = work; 2381 count = rowners_bs[rank+1]-rowners_bs[rank]; 2382 for (source=0; source<rank; source++){ 2383 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2384 /* process values */ 2385 for (i=0; i<count; i++){ 2386 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2387 } 2388 /* 2389 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2390 PetscSynchronizedFlush(A->comm); 2391 */ 2392 } 2393 } 2394 2395 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2396 ierr = PetscFree(work);CHKERRQ(ierr); 2397 PetscFunctionReturn(0); 2398 } 2399 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "MatRelax_MPISBAIJ" 2402 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2403 { 2404 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2405 int ierr,mbs=mat->mbs,bs=mat->bs; 2406 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2407 Vec bb1; 2408 2409 PetscFunctionBegin; 2410 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2411 if (bs > 1) 2412 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2413 2414 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2415 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2416 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2417 its--; 2418 } 2419 2420 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2421 while (its--){ 2422 2423 /* lower triangular part: slvec0b = - B^T*xx */ 2424 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2425 2426 /* copy xx into slvec0a */ 2427 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2428 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2429 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2430 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2431 2432 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2433 2434 /* copy bb into slvec1a */ 2435 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2436 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2437 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2438 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2439 2440 /* set slvec1b = 0 */ 2441 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2442 2443 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2444 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2445 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2446 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2447 2448 /* upper triangular part: bb1 = bb1 - B*x */ 2449 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2450 2451 /* local diagonal sweep */ 2452 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2453 } 2454 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2455 } else { 2456 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2457 } 2458 PetscFunctionReturn(0); 2459 } 2460 2461 #undef __FUNCT__ 2462 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2463 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2464 { 2465 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2466 int ierr; 2467 PetscScalar mone=-1.0; 2468 Vec lvec1,bb1; 2469 2470 PetscFunctionBegin; 2471 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2472 if (mat->bs > 1) 2473 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2474 2475 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2476 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2477 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2478 its--; 2479 } 2480 2481 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2482 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2483 while (its--){ 2484 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2485 2486 /* lower diagonal part: bb1 = bb - B^T*xx */ 2487 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2488 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2489 2490 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2491 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2492 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2493 2494 /* upper diagonal part: bb1 = bb1 - B*x */ 2495 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2496 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2497 2498 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2499 2500 /* diagonal sweep */ 2501 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2502 } 2503 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2504 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2505 } else { 2506 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2507 } 2508 PetscFunctionReturn(0); 2509 } 2510 2511