1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *); 12 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode); 13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 14 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 15 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 16 extern int MatPrintHelp_SeqSBAIJ(Mat); 17 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 18 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 19 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 20 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 21 22 /* UGLY, ugly, ugly 23 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 24 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 25 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 26 converts the entries into single precision and then calls ..._MatScalar() to put them 27 into the single precision data structures. 28 */ 29 #if defined(PETSC_USE_MAT_SINGLE) 30 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 32 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 35 #else 36 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 37 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 38 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 39 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 40 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 41 #endif 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 46 int MatStoreValues_MPISBAIJ(Mat mat) 47 { 48 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 49 int ierr; 50 51 PetscFunctionBegin; 52 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 53 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 EXTERN_C_END 57 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 61 int MatRetrieveValues_MPISBAIJ(Mat mat) 62 { 63 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 64 int ierr; 65 66 PetscFunctionBegin; 67 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 68 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 69 PetscFunctionReturn(0); 70 } 71 EXTERN_C_END 72 73 /* 74 Local utility routine that creates a mapping from the global column 75 number to the local number in the off-diagonal part of the local 76 storage of the matrix. This is done in a non scable way since the 77 length of colmap equals the global matrix length. 78 */ 79 #undef __FUNCT__ 80 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private" 81 static int CreateColmap_MPISBAIJ_Private(Mat mat) 82 { 83 PetscFunctionBegin; 84 SETERRQ(1,"Function not yet written for SBAIJ format"); 85 /* PetscFunctionReturn(0); */ 86 } 87 88 #define CHUNKSIZE 10 89 90 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 91 { \ 92 \ 93 brow = row/bs; \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow]; nrow = ailen[brow]; \ 96 bcol = col/bs; \ 97 ridx = row % bs; cidx = col % bs; \ 98 low = 0; high = nrow; \ 99 while (high-low > 3) { \ 100 t = (low+high)/2; \ 101 if (rp[t] > bcol) high = t; \ 102 else low = t; \ 103 } \ 104 for (_i=low; _i<high; _i++) { \ 105 if (rp[_i] > bcol) break; \ 106 if (rp[_i] == bcol) { \ 107 bap = ap + bs2*_i + bs*cidx + ridx; \ 108 if (addv == ADD_VALUES) *bap += value; \ 109 else *bap = value; \ 110 goto a_noinsert; \ 111 } \ 112 } \ 113 if (a->nonew == 1) goto a_noinsert; \ 114 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 115 if (nrow >= rmax) { \ 116 /* there is no extra room in row, therefore enlarge */ \ 117 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 118 MatScalar *new_a; \ 119 \ 120 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 121 \ 122 /* malloc new storage space */ \ 123 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 124 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 125 new_j = (int*)(new_a + bs2*new_nz); \ 126 new_i = new_j + new_nz; \ 127 \ 128 /* copy over old data into new slots */ \ 129 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 130 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 131 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 132 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 133 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 134 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 135 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 136 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 137 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 138 /* free up old matrix storage */ \ 139 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 140 if (!a->singlemalloc) { \ 141 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 142 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 143 } \ 144 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 145 a->singlemalloc = PETSC_TRUE; \ 146 \ 147 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 148 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 149 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 150 a->s_maxnz += bs2*CHUNKSIZE; \ 151 a->reallocs++; \ 152 a->s_nz++; \ 153 } \ 154 N = nrow++ - 1; \ 155 /* shift up all the later entries in this row */ \ 156 for (ii=N; ii>=_i; ii--) { \ 157 rp[ii+1] = rp[ii]; \ 158 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 159 } \ 160 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 161 rp[_i] = bcol; \ 162 ap[bs2*_i + bs*cidx + ridx] = value; \ 163 a_noinsert:; \ 164 ailen[brow] = nrow; \ 165 } 166 #ifndef MatSetValues_SeqBAIJ_B_Private 167 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 168 { \ 169 brow = row/bs; \ 170 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 171 rmax = bimax[brow]; nrow = bilen[brow]; \ 172 bcol = col/bs; \ 173 ridx = row % bs; cidx = col % bs; \ 174 low = 0; high = nrow; \ 175 while (high-low > 3) { \ 176 t = (low+high)/2; \ 177 if (rp[t] > bcol) high = t; \ 178 else low = t; \ 179 } \ 180 for (_i=low; _i<high; _i++) { \ 181 if (rp[_i] > bcol) break; \ 182 if (rp[_i] == bcol) { \ 183 bap = ap + bs2*_i + bs*cidx + ridx; \ 184 if (addv == ADD_VALUES) *bap += value; \ 185 else *bap = value; \ 186 goto b_noinsert; \ 187 } \ 188 } \ 189 if (b->nonew == 1) goto b_noinsert; \ 190 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 191 if (nrow >= rmax) { \ 192 /* there is no extra room in row, therefore enlarge */ \ 193 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 194 MatScalar *new_a; \ 195 \ 196 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 197 \ 198 /* malloc new storage space */ \ 199 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 200 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 201 new_j = (int*)(new_a + bs2*new_nz); \ 202 new_i = new_j + new_nz; \ 203 \ 204 /* copy over old data into new slots */ \ 205 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 206 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 207 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 208 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 209 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 210 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 211 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 212 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 213 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 214 /* free up old matrix storage */ \ 215 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 216 if (!b->singlemalloc) { \ 217 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 218 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 219 } \ 220 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 221 b->singlemalloc = PETSC_TRUE; \ 222 \ 223 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 224 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 225 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 226 b->maxnz += bs2*CHUNKSIZE; \ 227 b->reallocs++; \ 228 b->nz++; \ 229 } \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 for (ii=N; ii>=_i; ii--) { \ 233 rp[ii+1] = rp[ii]; \ 234 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 235 } \ 236 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 237 rp[_i] = bcol; \ 238 ap[bs2*_i + bs*cidx + ridx] = value; \ 239 b_noinsert:; \ 240 bilen[brow] = nrow; \ 241 } 242 #endif 243 244 #if defined(PETSC_USE_MAT_SINGLE) 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatSetValues_MPISBAIJ" 247 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 248 { 249 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 250 int ierr,i,N = m*n; 251 MatScalar *vsingle; 252 253 PetscFunctionBegin; 254 if (N > b->setvalueslen) { 255 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 256 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 257 b->setvalueslen = N; 258 } 259 vsingle = b->setvaluescopy; 260 261 for (i=0; i<N; i++) { 262 vsingle[i] = v[i]; 263 } 264 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 270 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 271 { 272 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 273 int ierr,i,N = m*n*b->bs2; 274 MatScalar *vsingle; 275 276 PetscFunctionBegin; 277 if (N > b->setvalueslen) { 278 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 279 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 280 b->setvalueslen = N; 281 } 282 vsingle = b->setvaluescopy; 283 for (i=0; i<N; i++) { 284 vsingle[i] = v[i]; 285 } 286 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 292 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 293 { 294 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 295 int ierr,i,N = m*n; 296 MatScalar *vsingle; 297 298 PetscFunctionBegin; 299 SETERRQ(1,"Function not yet written for SBAIJ format"); 300 /* PetscFunctionReturn(0); */ 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 305 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 306 { 307 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 308 int ierr,i,N = m*n*b->bs2; 309 MatScalar *vsingle; 310 311 PetscFunctionBegin; 312 SETERRQ(1,"Function not yet written for SBAIJ format"); 313 /* PetscFunctionReturn(0); */ 314 } 315 #endif 316 317 /* Only add/insert a(i,j) with i<=j (blocks). 318 Any a(i,j) with i>j input by user is ingored. 319 */ 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 322 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 323 { 324 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 325 MatScalar value; 326 PetscTruth roworiented = baij->roworiented; 327 int ierr,i,j,row,col; 328 int rstart_orig=baij->rstart_bs; 329 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 330 int cend_orig=baij->cend_bs,bs=baij->bs; 331 332 /* Some Variables required in the macro */ 333 Mat A = baij->A; 334 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 335 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 336 MatScalar *aa=a->a; 337 338 Mat B = baij->B; 339 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 340 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 341 MatScalar *ba=b->a; 342 343 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 344 int low,high,t,ridx,cidx,bs2=a->bs2; 345 MatScalar *ap,*bap; 346 347 /* for stash */ 348 int n_loc, *in_loc=0; 349 MatScalar *v_loc=0; 350 351 PetscFunctionBegin; 352 353 if(!baij->donotstash){ 354 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 355 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 356 } 357 358 for (i=0; i<m; i++) { 359 if (im[i] < 0) continue; 360 #if defined(PETSC_USE_BOPT_g) 361 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 362 #endif 363 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 364 row = im[i] - rstart_orig; /* local row index */ 365 for (j=0; j<n; j++) { 366 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 367 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 368 col = in[j] - cstart_orig; /* local col index */ 369 brow = row/bs; bcol = col/bs; 370 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 371 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 372 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 373 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 374 } else if (in[j] < 0) continue; 375 #if defined(PETSC_USE_BOPT_g) 376 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 377 #endif 378 else { /* off-diag entry (B) */ 379 if (mat->was_assembled) { 380 if (!baij->colmap) { 381 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 382 } 383 #if defined (PETSC_USE_CTABLE) 384 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 385 col = col - 1; 386 #else 387 col = baij->colmap[in[j]/bs] - 1; 388 #endif 389 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 390 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 391 col = in[j]; 392 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 393 B = baij->B; 394 b = (Mat_SeqBAIJ*)(B)->data; 395 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 396 ba=b->a; 397 } else col += in[j]%bs; 398 } else col = in[j]; 399 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 400 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 401 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 402 } 403 } 404 } else { /* off processor entry */ 405 if (!baij->donotstash) { 406 n_loc = 0; 407 for (j=0; j<n; j++){ 408 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 409 in_loc[n_loc] = in[j]; 410 if (roworiented) { 411 v_loc[n_loc] = v[i*n+j]; 412 } else { 413 v_loc[n_loc] = v[j*m+i]; 414 } 415 n_loc++; 416 } 417 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 418 } 419 } 420 } 421 422 if(!baij->donotstash){ 423 ierr = PetscFree(in_loc);CHKERRQ(ierr); 424 ierr = PetscFree(v_loc);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 431 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 432 { 433 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 434 MatScalar *value,*barray=baij->barray; 435 PetscTruth roworiented = baij->roworiented; 436 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 437 int rend=baij->rend,cstart=baij->cstart,stepval; 438 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 439 440 PetscFunctionBegin; 441 if(!barray) { 442 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 443 baij->barray = barray; 444 } 445 446 if (roworiented) { 447 stepval = (n-1)*bs; 448 } else { 449 stepval = (m-1)*bs; 450 } 451 for (i=0; i<m; i++) { 452 if (im[i] < 0) continue; 453 #if defined(PETSC_USE_BOPT_g) 454 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs); 455 #endif 456 if (im[i] >= rstart && im[i] < rend) { 457 row = im[i] - rstart; 458 for (j=0; j<n; j++) { 459 /* If NumCol = 1 then a copy is not required */ 460 if ((roworiented) && (n == 1)) { 461 barray = v + i*bs2; 462 } else if((!roworiented) && (m == 1)) { 463 barray = v + j*bs2; 464 } else { /* Here a copy is required */ 465 if (roworiented) { 466 value = v + i*(stepval+bs)*bs + j*bs; 467 } else { 468 value = v + j*(stepval+bs)*bs + i*bs; 469 } 470 for (ii=0; ii<bs; ii++,value+=stepval) { 471 for (jj=0; jj<bs; jj++) { 472 *barray++ = *value++; 473 } 474 } 475 barray -=bs2; 476 } 477 478 if (in[j] >= cstart && in[j] < cend){ 479 col = in[j] - cstart; 480 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 481 } 482 else if (in[j] < 0) continue; 483 #if defined(PETSC_USE_BOPT_g) 484 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);} 485 #endif 486 else { 487 if (mat->was_assembled) { 488 if (!baij->colmap) { 489 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 490 } 491 492 #if defined(PETSC_USE_BOPT_g) 493 #if defined (PETSC_USE_CTABLE) 494 { int data; 495 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 496 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 497 } 498 #else 499 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 500 #endif 501 #endif 502 #if defined (PETSC_USE_CTABLE) 503 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 504 col = (col - 1)/bs; 505 #else 506 col = (baij->colmap[in[j]] - 1)/bs; 507 #endif 508 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 509 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 510 col = in[j]; 511 } 512 } 513 else col = in[j]; 514 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 515 } 516 } 517 } else { 518 if (!baij->donotstash) { 519 if (roworiented) { 520 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 521 } else { 522 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 523 } 524 } 525 } 526 } 527 PetscFunctionReturn(0); 528 } 529 530 #define HASH_KEY 0.6180339887 531 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 532 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 533 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 534 #undef __FUNCT__ 535 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 536 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 537 { 538 PetscFunctionBegin; 539 SETERRQ(1,"Function not yet written for SBAIJ format"); 540 /* PetscFunctionReturn(0); */ 541 } 542 543 #undef __FUNCT__ 544 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 545 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 546 { 547 PetscFunctionBegin; 548 SETERRQ(1,"Function not yet written for SBAIJ format"); 549 /* PetscFunctionReturn(0); */ 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatGetValues_MPISBAIJ" 554 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 555 { 556 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 557 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 558 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 559 560 PetscFunctionBegin; 561 for (i=0; i<m; i++) { 562 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 563 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 564 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 565 row = idxm[i] - bsrstart; 566 for (j=0; j<n; j++) { 567 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 568 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 569 if (idxn[j] >= bscstart && idxn[j] < bscend){ 570 col = idxn[j] - bscstart; 571 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 572 } else { 573 if (!baij->colmap) { 574 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 575 } 576 #if defined (PETSC_USE_CTABLE) 577 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 578 data --; 579 #else 580 data = baij->colmap[idxn[j]/bs]-1; 581 #endif 582 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 583 else { 584 col = data + idxn[j]%bs; 585 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 586 } 587 } 588 } 589 } else { 590 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 591 } 592 } 593 PetscFunctionReturn(0); 594 } 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatNorm_MPISBAIJ" 598 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 599 { 600 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 601 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 602 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 603 int ierr; 604 PetscReal sum[2],*lnorm2; 605 606 PetscFunctionBegin; 607 if (baij->size == 1) { 608 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 609 } else { 610 if (type == NORM_FROBENIUS) { 611 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 612 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 613 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 614 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 615 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 616 /* 617 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 618 PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 619 */ 620 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 621 /* 622 PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 623 PetscSynchronizedFlush(mat->comm); */ 624 625 *norm = sqrt(sum[0] + 2*sum[1]); 626 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 627 } else { 628 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 /* 635 Creates the hash table, and sets the table 636 This table is created only once. 637 If new entried need to be added to the matrix 638 then the hash table has to be destroyed and 639 recreated. 640 */ 641 #undef __FUNCT__ 642 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 643 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 644 { 645 PetscFunctionBegin; 646 SETERRQ(1,"Function not yet written for SBAIJ format"); 647 /* PetscFunctionReturn(0); */ 648 } 649 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 652 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 653 { 654 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 655 int ierr,nstash,reallocs; 656 InsertMode addv; 657 658 PetscFunctionBegin; 659 if (baij->donotstash) { 660 PetscFunctionReturn(0); 661 } 662 663 /* make sure all processors are either in INSERTMODE or ADDMODE */ 664 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 665 if (addv == (ADD_VALUES|INSERT_VALUES)) { 666 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 667 } 668 mat->insertmode = addv; /* in case this processor had no cache */ 669 670 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 671 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 672 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 673 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 674 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 675 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 681 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 682 { 683 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 684 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 685 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 686 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 687 int *row,*col,other_disassembled; 688 PetscTruth r1,r2,r3; 689 MatScalar *val; 690 InsertMode addv = mat->insertmode; 691 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 692 PetscTruth flag; 693 #endif 694 695 PetscFunctionBegin; 696 697 if (!baij->donotstash) { 698 while (1) { 699 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 700 /* 701 PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 702 PetscSynchronizedFlush(mat->comm); 703 */ 704 if (!flg) break; 705 706 for (i=0; i<n;) { 707 /* Now identify the consecutive vals belonging to the same row */ 708 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 709 if (j < n) ncols = j-i; 710 else ncols = n-i; 711 /* Now assemble all these values with a single function call */ 712 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 713 i = j; 714 } 715 } 716 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 717 /* Now process the block-stash. Since the values are stashed column-oriented, 718 set the roworiented flag to column oriented, and after MatSetValues() 719 restore the original flags */ 720 r1 = baij->roworiented; 721 r2 = a->roworiented; 722 r3 = b->roworiented; 723 baij->roworiented = PETSC_FALSE; 724 a->roworiented = PETSC_FALSE; 725 b->roworiented = PETSC_FALSE; 726 while (1) { 727 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 728 if (!flg) break; 729 730 for (i=0; i<n;) { 731 /* Now identify the consecutive vals belonging to the same row */ 732 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 733 if (j < n) ncols = j-i; 734 else ncols = n-i; 735 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 736 i = j; 737 } 738 } 739 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 740 baij->roworiented = r1; 741 a->roworiented = r2; 742 b->roworiented = r3; 743 } 744 745 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 746 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 747 748 /* determine if any processor has disassembled, if so we must 749 also disassemble ourselfs, in order that we may reassemble. */ 750 /* 751 if nonzero structure of submatrix B cannot change then we know that 752 no processor disassembled thus we can skip this stuff 753 */ 754 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 755 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 756 if (mat->was_assembled && !other_disassembled) { 757 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 758 } 759 } 760 761 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 762 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 763 } 764 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 765 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 766 767 #if defined(PETSC_USE_BOPT_g) 768 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 769 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 770 baij->ht_total_ct = 0; 771 baij->ht_insert_ct = 0; 772 } 773 #endif 774 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 775 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 776 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 777 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 778 } 779 780 if (baij->rowvalues) { 781 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 782 baij->rowvalues = 0; 783 } 784 785 #if defined(PETSC_HAVE_SPOOLES) 786 ierr = PetscOptionsHasName(mat->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 787 if (flag) { ierr = MatUseSpooles_MPISBAIJ(mat);CHKERRQ(ierr); } 788 #endif 789 #if defined(PETSC_HAVE_MUMPS) 790 ierr = PetscOptionsHasName(mat->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 791 if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); } 792 #endif 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 798 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 799 { 800 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 801 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 802 PetscTruth isascii,isdraw; 803 PetscViewer sviewer; 804 PetscViewerFormat format; 805 806 PetscFunctionBegin; 807 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 808 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 809 if (isascii) { 810 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 811 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 812 MatInfo info; 813 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 814 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 816 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 817 baij->bs,(int)info.memory);CHKERRQ(ierr); 818 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 819 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 820 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 821 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 822 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 823 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } else if (format == PETSC_VIEWER_ASCII_INFO) { 826 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 } 830 831 if (isdraw) { 832 PetscDraw draw; 833 PetscTruth isnull; 834 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 835 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 836 } 837 838 if (size == 1) { 839 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 840 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 841 } else { 842 /* assemble the entire matrix onto first processor. */ 843 Mat A; 844 Mat_SeqSBAIJ *Aloc; 845 Mat_SeqBAIJ *Bloc; 846 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 847 MatScalar *a; 848 849 if (!rank) { 850 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 851 } else { 852 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 853 } 854 PetscLogObjectParent(mat,A); 855 856 /* copy over the A part */ 857 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 858 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 859 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 860 861 for (i=0; i<mbs; i++) { 862 rvals[0] = bs*(baij->rstart + i); 863 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 864 for (j=ai[i]; j<ai[i+1]; j++) { 865 col = (baij->cstart+aj[j])*bs; 866 for (k=0; k<bs; k++) { 867 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 868 col++; a += bs; 869 } 870 } 871 } 872 /* copy over the B part */ 873 Bloc = (Mat_SeqBAIJ*)baij->B->data; 874 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 875 for (i=0; i<mbs; i++) { 876 rvals[0] = bs*(baij->rstart + i); 877 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 878 for (j=ai[i]; j<ai[i+1]; j++) { 879 col = baij->garray[aj[j]]*bs; 880 for (k=0; k<bs; k++) { 881 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 882 col++; a += bs; 883 } 884 } 885 } 886 ierr = PetscFree(rvals);CHKERRQ(ierr); 887 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 889 /* 890 Everyone has to call to draw the matrix since the graphics waits are 891 synchronized across all processors that share the PetscDraw object 892 */ 893 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 894 if (!rank) { 895 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 896 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 897 } 898 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 899 ierr = MatDestroy(A);CHKERRQ(ierr); 900 } 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatView_MPISBAIJ" 906 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 907 { 908 int ierr; 909 PetscTruth isascii,isdraw,issocket,isbinary; 910 911 PetscFunctionBegin; 912 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 913 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 914 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 915 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 916 if (isascii || isdraw || issocket || isbinary) { 917 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 918 } else { 919 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 920 } 921 PetscFunctionReturn(0); 922 } 923 924 #undef __FUNCT__ 925 #define __FUNCT__ "MatDestroy_MPISBAIJ" 926 int MatDestroy_MPISBAIJ(Mat mat) 927 { 928 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 929 int ierr; 930 931 PetscFunctionBegin; 932 #if defined(PETSC_USE_LOG) 933 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 934 #endif 935 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 936 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 937 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 938 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 939 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 940 #if defined (PETSC_USE_CTABLE) 941 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 942 #else 943 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 944 #endif 945 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 946 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 947 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 948 if (baij->slvec0) { 949 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 950 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 951 } 952 if (baij->slvec1) { 953 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 954 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 955 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 956 } 957 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 958 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 959 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 960 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 961 #if defined(PETSC_USE_MAT_SINGLE) 962 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 963 #endif 964 ierr = PetscFree(baij);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatMult_MPISBAIJ" 970 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 971 { 972 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 973 int ierr,nt,mbs=a->mbs,bs=a->bs; 974 PetscScalar *x,*from,zero=0.0; 975 976 PetscFunctionBegin; 977 /* 978 PetscSynchronizedPrintf(A->comm," _1comm is called ...\n"); 979 PetscSynchronizedFlush(A->comm); 980 */ 981 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 982 if (nt != A->n) { 983 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 984 } 985 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 986 if (nt != A->m) { 987 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 988 } 989 990 /* diagonal part */ 991 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 992 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 993 994 /* subdiagonal part */ 995 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 996 997 /* copy x into the vec slvec0 */ 998 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 999 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1000 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1001 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1002 1003 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1004 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1006 1007 /* supperdiagonal part */ 1008 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1009 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1015 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1016 { 1017 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1018 int ierr,nt; 1019 1020 PetscFunctionBegin; 1021 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1022 if (nt != A->n) { 1023 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1024 } 1025 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1026 if (nt != A->m) { 1027 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1028 } 1029 1030 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1031 /* do diagonal part */ 1032 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1033 /* do supperdiagonal part */ 1034 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1035 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1036 /* do subdiagonal part */ 1037 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1038 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1039 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1040 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1046 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1047 { 1048 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1049 int ierr,mbs=a->mbs,bs=a->bs; 1050 PetscScalar *x,*from,zero=0.0; 1051 1052 PetscFunctionBegin; 1053 /* 1054 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 1055 PetscSynchronizedFlush(A->comm); 1056 */ 1057 /* diagonal part */ 1058 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1059 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1060 1061 /* subdiagonal part */ 1062 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1063 1064 /* copy x into the vec slvec0 */ 1065 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1066 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1067 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1068 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1069 1070 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1071 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1072 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1073 1074 /* supperdiagonal part */ 1075 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1076 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1082 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1083 { 1084 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1085 int ierr; 1086 1087 PetscFunctionBegin; 1088 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1089 /* do diagonal part */ 1090 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1091 /* do supperdiagonal part */ 1092 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1093 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1094 1095 /* do subdiagonal part */ 1096 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1097 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1098 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1099 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1105 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1106 { 1107 PetscFunctionBegin; 1108 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1109 /* PetscFunctionReturn(0); */ 1110 } 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1114 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1115 { 1116 PetscFunctionBegin; 1117 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1118 /* PetscFunctionReturn(0); */ 1119 } 1120 1121 /* 1122 This only works correctly for square matrices where the subblock A->A is the 1123 diagonal block 1124 */ 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1127 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1128 { 1129 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1130 int ierr; 1131 1132 PetscFunctionBegin; 1133 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1134 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatScale_MPISBAIJ" 1140 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A) 1141 { 1142 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1143 int ierr; 1144 1145 PetscFunctionBegin; 1146 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1147 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 #undef __FUNCT__ 1152 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1153 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1154 { 1155 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1156 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1157 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1158 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1159 int *cmap,*idx_p,cstart = mat->cstart; 1160 1161 PetscFunctionBegin; 1162 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1163 mat->getrowactive = PETSC_TRUE; 1164 1165 if (!mat->rowvalues && (idx || v)) { 1166 /* 1167 allocate enough space to hold information from the longest row. 1168 */ 1169 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1170 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1171 int max = 1,mbs = mat->mbs,tmp; 1172 for (i=0; i<mbs; i++) { 1173 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1174 if (max < tmp) { max = tmp; } 1175 } 1176 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1177 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1178 } 1179 1180 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1181 lrow = row - brstart; /* local row index */ 1182 1183 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1184 if (!v) {pvA = 0; pvB = 0;} 1185 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1186 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1187 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1188 nztot = nzA + nzB; 1189 1190 cmap = mat->garray; 1191 if (v || idx) { 1192 if (nztot) { 1193 /* Sort by increasing column numbers, assuming A and B already sorted */ 1194 int imark = -1; 1195 if (v) { 1196 *v = v_p = mat->rowvalues; 1197 for (i=0; i<nzB; i++) { 1198 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1199 else break; 1200 } 1201 imark = i; 1202 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1203 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1204 } 1205 if (idx) { 1206 *idx = idx_p = mat->rowindices; 1207 if (imark > -1) { 1208 for (i=0; i<imark; i++) { 1209 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1210 } 1211 } else { 1212 for (i=0; i<nzB; i++) { 1213 if (cmap[cworkB[i]/bs] < cstart) 1214 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1215 else break; 1216 } 1217 imark = i; 1218 } 1219 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1220 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1221 } 1222 } else { 1223 if (idx) *idx = 0; 1224 if (v) *v = 0; 1225 } 1226 } 1227 *nz = nztot; 1228 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1229 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1235 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1236 { 1237 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1238 1239 PetscFunctionBegin; 1240 if (baij->getrowactive == PETSC_FALSE) { 1241 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1242 } 1243 baij->getrowactive = PETSC_FALSE; 1244 PetscFunctionReturn(0); 1245 } 1246 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1249 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1250 { 1251 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1252 1253 PetscFunctionBegin; 1254 *bs = baij->bs; 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1260 int MatZeroEntries_MPISBAIJ(Mat A) 1261 { 1262 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1263 int ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1267 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1273 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1274 { 1275 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1276 Mat A = a->A,B = a->B; 1277 int ierr; 1278 PetscReal isend[5],irecv[5]; 1279 1280 PetscFunctionBegin; 1281 info->block_size = (PetscReal)a->bs; 1282 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1283 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1284 isend[3] = info->memory; isend[4] = info->mallocs; 1285 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1286 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1287 isend[3] += info->memory; isend[4] += info->mallocs; 1288 if (flag == MAT_LOCAL) { 1289 info->nz_used = isend[0]; 1290 info->nz_allocated = isend[1]; 1291 info->nz_unneeded = isend[2]; 1292 info->memory = isend[3]; 1293 info->mallocs = isend[4]; 1294 } else if (flag == MAT_GLOBAL_MAX) { 1295 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1296 info->nz_used = irecv[0]; 1297 info->nz_allocated = irecv[1]; 1298 info->nz_unneeded = irecv[2]; 1299 info->memory = irecv[3]; 1300 info->mallocs = irecv[4]; 1301 } else if (flag == MAT_GLOBAL_SUM) { 1302 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1303 info->nz_used = irecv[0]; 1304 info->nz_allocated = irecv[1]; 1305 info->nz_unneeded = irecv[2]; 1306 info->memory = irecv[3]; 1307 info->mallocs = irecv[4]; 1308 } else { 1309 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1310 } 1311 info->rows_global = (PetscReal)A->M; 1312 info->columns_global = (PetscReal)A->N; 1313 info->rows_local = (PetscReal)A->m; 1314 info->columns_local = (PetscReal)A->N; 1315 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1316 info->fill_ratio_needed = 0; 1317 info->factor_mallocs = 0; 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1323 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1324 { 1325 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1326 int ierr; 1327 1328 PetscFunctionBegin; 1329 switch (op) { 1330 case MAT_NO_NEW_NONZERO_LOCATIONS: 1331 case MAT_YES_NEW_NONZERO_LOCATIONS: 1332 case MAT_COLUMNS_UNSORTED: 1333 case MAT_COLUMNS_SORTED: 1334 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1335 case MAT_KEEP_ZEROED_ROWS: 1336 case MAT_NEW_NONZERO_LOCATION_ERR: 1337 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1338 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1339 break; 1340 case MAT_ROW_ORIENTED: 1341 a->roworiented = PETSC_TRUE; 1342 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1343 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1344 break; 1345 case MAT_ROWS_SORTED: 1346 case MAT_ROWS_UNSORTED: 1347 case MAT_YES_NEW_DIAGONALS: 1348 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1349 break; 1350 case MAT_COLUMN_ORIENTED: 1351 a->roworiented = PETSC_FALSE; 1352 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1353 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1354 break; 1355 case MAT_IGNORE_OFF_PROC_ENTRIES: 1356 a->donotstash = PETSC_TRUE; 1357 break; 1358 case MAT_NO_NEW_DIAGONALS: 1359 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1360 case MAT_USE_HASH_TABLE: 1361 a->ht_flag = PETSC_TRUE; 1362 break; 1363 default: 1364 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1365 } 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1371 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1372 { 1373 int ierr; 1374 PetscFunctionBegin; 1375 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1381 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1382 { 1383 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1384 Mat a = baij->A,b = baij->B; 1385 int ierr,s1,s2,s3; 1386 1387 PetscFunctionBegin; 1388 if (ll != rr) { 1389 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1390 } 1391 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1392 if (rr) { 1393 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1394 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1395 /* Overlap communication with computation. */ 1396 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1397 /*} if (ll) { */ 1398 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1399 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1400 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1401 /* } */ 1402 /* scale the diagonal block */ 1403 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1404 1405 /* if (rr) { */ 1406 /* Do a scatter end and then right scale the off-diagonal block */ 1407 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1408 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1409 } 1410 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1416 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag) 1417 { 1418 PetscFunctionBegin; 1419 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1424 int MatPrintHelp_MPISBAIJ(Mat A) 1425 { 1426 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1427 MPI_Comm comm = A->comm; 1428 static int called = 0; 1429 int ierr; 1430 1431 PetscFunctionBegin; 1432 if (!a->rank) { 1433 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1434 } 1435 if (called) {PetscFunctionReturn(0);} else called = 1; 1436 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1437 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1443 int MatSetUnfactored_MPISBAIJ(Mat A) 1444 { 1445 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1446 int ierr; 1447 1448 PetscFunctionBegin; 1449 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1450 PetscFunctionReturn(0); 1451 } 1452 1453 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1454 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatEqual_MPISBAIJ" 1457 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1458 { 1459 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1460 Mat a,b,c,d; 1461 PetscTruth flg; 1462 int ierr; 1463 1464 PetscFunctionBegin; 1465 a = matA->A; b = matA->B; 1466 c = matB->A; d = matB->B; 1467 1468 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1469 if (flg == PETSC_TRUE) { 1470 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1471 } 1472 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1478 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1479 { 1480 int ierr; 1481 1482 PetscFunctionBegin; 1483 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1484 PetscFunctionReturn(0); 1485 } 1486 /* -------------------------------------------------------------------*/ 1487 static struct _MatOps MatOps_Values = { 1488 MatSetValues_MPISBAIJ, 1489 MatGetRow_MPISBAIJ, 1490 MatRestoreRow_MPISBAIJ, 1491 MatMult_MPISBAIJ, 1492 MatMultAdd_MPISBAIJ, 1493 MatMultTranspose_MPISBAIJ, 1494 MatMultTransposeAdd_MPISBAIJ, 1495 0, 1496 0, 1497 0, 1498 0, 1499 0, 1500 0, 1501 MatRelax_MPISBAIJ, 1502 MatTranspose_MPISBAIJ, 1503 MatGetInfo_MPISBAIJ, 1504 MatEqual_MPISBAIJ, 1505 MatGetDiagonal_MPISBAIJ, 1506 MatDiagonalScale_MPISBAIJ, 1507 MatNorm_MPISBAIJ, 1508 MatAssemblyBegin_MPISBAIJ, 1509 MatAssemblyEnd_MPISBAIJ, 1510 0, 1511 MatSetOption_MPISBAIJ, 1512 MatZeroEntries_MPISBAIJ, 1513 MatZeroRows_MPISBAIJ, 1514 0, 1515 0, 1516 0, 1517 0, 1518 MatSetUpPreallocation_MPISBAIJ, 1519 0, 1520 0, 1521 0, 1522 0, 1523 MatDuplicate_MPISBAIJ, 1524 0, 1525 0, 1526 0, 1527 0, 1528 0, 1529 0, 1530 0, 1531 MatGetValues_MPISBAIJ, 1532 0, 1533 MatPrintHelp_MPISBAIJ, 1534 MatScale_MPISBAIJ, 1535 0, 1536 0, 1537 0, 1538 MatGetBlockSize_MPISBAIJ, 1539 0, 1540 0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 MatSetUnfactored_MPISBAIJ, 1546 0, 1547 MatSetValuesBlocked_MPISBAIJ, 1548 0, 1549 0, 1550 0, 1551 MatGetPetscMaps_Petsc, 1552 0, 1553 0, 1554 0, 1555 0, 1556 0, 1557 0, 1558 MatGetRowMax_MPISBAIJ}; 1559 1560 1561 EXTERN_C_BEGIN 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1564 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1565 { 1566 PetscFunctionBegin; 1567 *a = ((Mat_MPISBAIJ *)A->data)->A; 1568 *iscopy = PETSC_FALSE; 1569 PetscFunctionReturn(0); 1570 } 1571 EXTERN_C_END 1572 1573 EXTERN_C_BEGIN 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1576 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1577 { 1578 Mat_MPISBAIJ *b; 1579 int ierr,i,mbs,Mbs; 1580 1581 PetscFunctionBegin; 1582 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1583 1584 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1585 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1586 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1587 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1588 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1589 if (d_nnz) { 1590 for (i=0; i<B->m/bs; i++) { 1591 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1592 } 1593 } 1594 if (o_nnz) { 1595 for (i=0; i<B->m/bs; i++) { 1596 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1597 } 1598 } 1599 B->preallocated = PETSC_TRUE; 1600 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1601 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1602 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1603 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1604 1605 b = (Mat_MPISBAIJ*)B->data; 1606 mbs = B->m/bs; 1607 Mbs = B->M/bs; 1608 if (mbs*bs != B->m) { 1609 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1610 } 1611 1612 b->bs = bs; 1613 b->bs2 = bs*bs; 1614 b->mbs = mbs; 1615 b->nbs = mbs; 1616 b->Mbs = Mbs; 1617 b->Nbs = Mbs; 1618 1619 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1620 b->rowners[0] = 0; 1621 for (i=2; i<=b->size; i++) { 1622 b->rowners[i] += b->rowners[i-1]; 1623 } 1624 b->rstart = b->rowners[b->rank]; 1625 b->rend = b->rowners[b->rank+1]; 1626 b->cstart = b->rstart; 1627 b->cend = b->rend; 1628 for (i=0; i<=b->size; i++) { 1629 b->rowners_bs[i] = b->rowners[i]*bs; 1630 } 1631 b->rstart_bs = b-> rstart*bs; 1632 b->rend_bs = b->rend*bs; 1633 1634 b->cstart_bs = b->cstart*bs; 1635 b->cend_bs = b->cend*bs; 1636 1637 1638 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1639 PetscLogObjectParent(B,b->A); 1640 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1641 PetscLogObjectParent(B,b->B); 1642 1643 /* build cache for off array entries formed */ 1644 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1645 1646 PetscFunctionReturn(0); 1647 } 1648 EXTERN_C_END 1649 1650 EXTERN_C_BEGIN 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatCreate_MPISBAIJ" 1653 int MatCreate_MPISBAIJ(Mat B) 1654 { 1655 Mat_MPISBAIJ *b; 1656 int ierr; 1657 PetscTruth flg; 1658 1659 PetscFunctionBegin; 1660 1661 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1662 B->data = (void*)b; 1663 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1664 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1665 1666 B->ops->destroy = MatDestroy_MPISBAIJ; 1667 B->ops->view = MatView_MPISBAIJ; 1668 B->mapping = 0; 1669 B->factor = 0; 1670 B->assembled = PETSC_FALSE; 1671 1672 B->insertmode = NOT_SET_VALUES; 1673 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1674 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1675 1676 /* build local table of row and column ownerships */ 1677 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1678 b->cowners = b->rowners + b->size + 2; 1679 b->rowners_bs = b->cowners + b->size + 2; 1680 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1681 1682 /* build cache for off array entries formed */ 1683 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1684 b->donotstash = PETSC_FALSE; 1685 b->colmap = PETSC_NULL; 1686 b->garray = PETSC_NULL; 1687 b->roworiented = PETSC_TRUE; 1688 1689 #if defined(PETSC_USE_MAT_SINGLE) 1690 /* stuff for MatSetValues_XXX in single precision */ 1691 b->setvalueslen = 0; 1692 b->setvaluescopy = PETSC_NULL; 1693 #endif 1694 1695 /* stuff used in block assembly */ 1696 b->barray = 0; 1697 1698 /* stuff used for matrix vector multiply */ 1699 b->lvec = 0; 1700 b->Mvctx = 0; 1701 b->slvec0 = 0; 1702 b->slvec0b = 0; 1703 b->slvec1 = 0; 1704 b->slvec1a = 0; 1705 b->slvec1b = 0; 1706 b->sMvctx = 0; 1707 1708 /* stuff for MatGetRow() */ 1709 b->rowindices = 0; 1710 b->rowvalues = 0; 1711 b->getrowactive = PETSC_FALSE; 1712 1713 /* hash table stuff */ 1714 b->ht = 0; 1715 b->hd = 0; 1716 b->ht_size = 0; 1717 b->ht_flag = PETSC_FALSE; 1718 b->ht_fact = 0; 1719 b->ht_total_ct = 0; 1720 b->ht_insert_ct = 0; 1721 1722 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1723 if (flg) { 1724 PetscReal fact = 1.39; 1725 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1726 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1727 if (fact <= 1.0) fact = 1.39; 1728 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1729 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1730 } 1731 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1732 "MatStoreValues_MPISBAIJ", 1733 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1734 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1735 "MatRetrieveValues_MPISBAIJ", 1736 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1737 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1738 "MatGetDiagonalBlock_MPISBAIJ", 1739 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1740 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1741 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1742 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1743 PetscFunctionReturn(0); 1744 } 1745 EXTERN_C_END 1746 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1749 /*@C 1750 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1751 the user should preallocate the matrix storage by setting the parameters 1752 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1753 performance can be increased by more than a factor of 50. 1754 1755 Collective on Mat 1756 1757 Input Parameters: 1758 + A - the matrix 1759 . bs - size of blockk 1760 . d_nz - number of block nonzeros per block row in diagonal portion of local 1761 submatrix (same for all local rows) 1762 . d_nnz - array containing the number of block nonzeros in the various block rows 1763 in the upper triangular and diagonal part of the in diagonal portion of the local 1764 (possibly different for each block row) or PETSC_NULL. You must leave room 1765 for the diagonal entry even if it is zero. 1766 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1767 submatrix (same for all local rows). 1768 - o_nnz - array containing the number of nonzeros in the various block rows of the 1769 off-diagonal portion of the local submatrix (possibly different for 1770 each block row) or PETSC_NULL. 1771 1772 1773 Options Database Keys: 1774 . -mat_no_unroll - uses code that does not unroll the loops in the 1775 block calculations (much slower) 1776 . -mat_block_size - size of the blocks to use 1777 1778 Notes: 1779 1780 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1781 than it must be used on all processors that share the object for that argument. 1782 1783 Storage Information: 1784 For a square global matrix we define each processor's diagonal portion 1785 to be its local rows and the corresponding columns (a square submatrix); 1786 each processor's off-diagonal portion encompasses the remainder of the 1787 local matrix (a rectangular submatrix). 1788 1789 The user can specify preallocated storage for the diagonal part of 1790 the local submatrix with either d_nz or d_nnz (not both). Set 1791 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1792 memory allocation. Likewise, specify preallocated storage for the 1793 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1794 1795 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1796 the figure below we depict these three local rows and all columns (0-11). 1797 1798 .vb 1799 0 1 2 3 4 5 6 7 8 9 10 11 1800 ------------------- 1801 row 3 | o o o d d d o o o o o o 1802 row 4 | o o o d d d o o o o o o 1803 row 5 | o o o d d d o o o o o o 1804 ------------------- 1805 .ve 1806 1807 Thus, any entries in the d locations are stored in the d (diagonal) 1808 submatrix, and any entries in the o locations are stored in the 1809 o (off-diagonal) submatrix. Note that the d matrix is stored in 1810 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1811 1812 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1813 plus the diagonal part of the d matrix, 1814 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1815 In general, for PDE problems in which most nonzeros are near the diagonal, 1816 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1817 or you will get TERRIBLE performance; see the users' manual chapter on 1818 matrices. 1819 1820 Level: intermediate 1821 1822 .keywords: matrix, block, aij, compressed row, sparse, parallel 1823 1824 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1825 @*/ 1826 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1827 { 1828 int ierr,(*f)(Mat,int,int,int*,int,int*); 1829 1830 PetscFunctionBegin; 1831 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1832 if (f) { 1833 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1834 } 1835 PetscFunctionReturn(0); 1836 } 1837 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "MatCreateMPISBAIJ" 1840 /*@C 1841 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1842 (block compressed row). For good matrix assembly performance 1843 the user should preallocate the matrix storage by setting the parameters 1844 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1845 performance can be increased by more than a factor of 50. 1846 1847 Collective on MPI_Comm 1848 1849 Input Parameters: 1850 + comm - MPI communicator 1851 . bs - size of blockk 1852 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1853 This value should be the same as the local size used in creating the 1854 y vector for the matrix-vector product y = Ax. 1855 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1856 This value should be the same as the local size used in creating the 1857 x vector for the matrix-vector product y = Ax. 1858 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1859 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1860 . d_nz - number of block nonzeros per block row in diagonal portion of local 1861 submatrix (same for all local rows) 1862 . d_nnz - array containing the number of block nonzeros in the various block rows 1863 in the upper triangular portion of the in diagonal portion of the local 1864 (possibly different for each block block row) or PETSC_NULL. 1865 You must leave room for the diagonal entry even if it is zero. 1866 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1867 submatrix (same for all local rows). 1868 - o_nnz - array containing the number of nonzeros in the various block rows of the 1869 off-diagonal portion of the local submatrix (possibly different for 1870 each block row) or PETSC_NULL. 1871 1872 Output Parameter: 1873 . A - the matrix 1874 1875 Options Database Keys: 1876 . -mat_no_unroll - uses code that does not unroll the loops in the 1877 block calculations (much slower) 1878 . -mat_block_size - size of the blocks to use 1879 . -mat_mpi - use the parallel matrix data structures even on one processor 1880 (defaults to using SeqBAIJ format on one processor) 1881 1882 Notes: 1883 The user MUST specify either the local or global matrix dimensions 1884 (possibly both). 1885 1886 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1887 than it must be used on all processors that share the object for that argument. 1888 1889 Storage Information: 1890 For a square global matrix we define each processor's diagonal portion 1891 to be its local rows and the corresponding columns (a square submatrix); 1892 each processor's off-diagonal portion encompasses the remainder of the 1893 local matrix (a rectangular submatrix). 1894 1895 The user can specify preallocated storage for the diagonal part of 1896 the local submatrix with either d_nz or d_nnz (not both). Set 1897 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1898 memory allocation. Likewise, specify preallocated storage for the 1899 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1900 1901 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1902 the figure below we depict these three local rows and all columns (0-11). 1903 1904 .vb 1905 0 1 2 3 4 5 6 7 8 9 10 11 1906 ------------------- 1907 row 3 | o o o d d d o o o o o o 1908 row 4 | o o o d d d o o o o o o 1909 row 5 | o o o d d d o o o o o o 1910 ------------------- 1911 .ve 1912 1913 Thus, any entries in the d locations are stored in the d (diagonal) 1914 submatrix, and any entries in the o locations are stored in the 1915 o (off-diagonal) submatrix. Note that the d matrix is stored in 1916 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1917 1918 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1919 plus the diagonal part of the d matrix, 1920 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1921 In general, for PDE problems in which most nonzeros are near the diagonal, 1922 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1923 or you will get TERRIBLE performance; see the users' manual chapter on 1924 matrices. 1925 1926 Level: intermediate 1927 1928 .keywords: matrix, block, aij, compressed row, sparse, parallel 1929 1930 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1931 @*/ 1932 1933 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1934 { 1935 int ierr,size; 1936 1937 PetscFunctionBegin; 1938 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1939 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1940 if (size > 1) { 1941 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1942 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1943 } else { 1944 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1945 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1946 } 1947 PetscFunctionReturn(0); 1948 } 1949 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1953 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1954 { 1955 Mat mat; 1956 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1957 int ierr,len=0; 1958 1959 PetscFunctionBegin; 1960 *newmat = 0; 1961 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1962 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 1963 mat->preallocated = PETSC_TRUE; 1964 a = (Mat_MPISBAIJ*)mat->data; 1965 a->bs = oldmat->bs; 1966 a->bs2 = oldmat->bs2; 1967 a->mbs = oldmat->mbs; 1968 a->nbs = oldmat->nbs; 1969 a->Mbs = oldmat->Mbs; 1970 a->Nbs = oldmat->Nbs; 1971 1972 a->rstart = oldmat->rstart; 1973 a->rend = oldmat->rend; 1974 a->cstart = oldmat->cstart; 1975 a->cend = oldmat->cend; 1976 a->size = oldmat->size; 1977 a->rank = oldmat->rank; 1978 a->donotstash = oldmat->donotstash; 1979 a->roworiented = oldmat->roworiented; 1980 a->rowindices = 0; 1981 a->rowvalues = 0; 1982 a->getrowactive = PETSC_FALSE; 1983 a->barray = 0; 1984 a->rstart_bs = oldmat->rstart_bs; 1985 a->rend_bs = oldmat->rend_bs; 1986 a->cstart_bs = oldmat->cstart_bs; 1987 a->cend_bs = oldmat->cend_bs; 1988 1989 /* hash table stuff */ 1990 a->ht = 0; 1991 a->hd = 0; 1992 a->ht_size = 0; 1993 a->ht_flag = oldmat->ht_flag; 1994 a->ht_fact = oldmat->ht_fact; 1995 a->ht_total_ct = 0; 1996 a->ht_insert_ct = 0; 1997 1998 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1999 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2000 a->cowners = a->rowners + a->size + 2; 2001 a->rowners_bs = a->cowners + a->size + 2; 2002 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2003 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2004 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2005 if (oldmat->colmap) { 2006 #if defined (PETSC_USE_CTABLE) 2007 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2008 #else 2009 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2010 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2011 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2012 #endif 2013 } else a->colmap = 0; 2014 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2015 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2016 PetscLogObjectMemory(mat,len*sizeof(int)); 2017 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2018 } else a->garray = 0; 2019 2020 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2021 PetscLogObjectParent(mat,a->lvec); 2022 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2023 2024 PetscLogObjectParent(mat,a->Mvctx); 2025 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2026 PetscLogObjectParent(mat,a->A); 2027 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2028 PetscLogObjectParent(mat,a->B); 2029 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2030 *newmat = mat; 2031 PetscFunctionReturn(0); 2032 } 2033 2034 #include "petscsys.h" 2035 2036 EXTERN_C_BEGIN 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatLoad_MPISBAIJ" 2039 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2040 { 2041 Mat A; 2042 int i,nz,ierr,j,rstart,rend,fd; 2043 PetscScalar *vals,*buf; 2044 MPI_Comm comm = ((PetscObject)viewer)->comm; 2045 MPI_Status status; 2046 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2047 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2048 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2049 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2050 int dcount,kmax,k,nzcount,tmp; 2051 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 2052 PetscTruth flag; 2053 #endif 2054 2055 PetscFunctionBegin; 2056 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2057 2058 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2059 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2060 if (!rank) { 2061 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2062 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2063 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2064 if (header[3] < 0) { 2065 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2066 } 2067 } 2068 2069 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2070 M = header[1]; N = header[2]; 2071 2072 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2073 2074 /* 2075 This code adds extra rows to make sure the number of rows is 2076 divisible by the blocksize 2077 */ 2078 Mbs = M/bs; 2079 extra_rows = bs - M + bs*(Mbs); 2080 if (extra_rows == bs) extra_rows = 0; 2081 else Mbs++; 2082 if (extra_rows &&!rank) { 2083 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2084 } 2085 2086 /* determine ownership of all rows */ 2087 mbs = Mbs/size + ((Mbs % size) > rank); 2088 m = mbs*bs; 2089 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2090 browners = rowners + size + 1; 2091 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2092 rowners[0] = 0; 2093 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2094 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2095 rstart = rowners[rank]; 2096 rend = rowners[rank+1]; 2097 2098 /* distribute row lengths to all processors */ 2099 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2100 if (!rank) { 2101 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2102 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2103 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2104 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2105 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2106 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2107 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2108 } else { 2109 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2110 } 2111 2112 if (!rank) { /* procs[0] */ 2113 /* calculate the number of nonzeros on each processor */ 2114 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2115 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2116 for (i=0; i<size; i++) { 2117 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2118 procsnz[i] += rowlengths[j]; 2119 } 2120 } 2121 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2122 2123 /* determine max buffer needed and allocate it */ 2124 maxnz = 0; 2125 for (i=0; i<size; i++) { 2126 maxnz = PetscMax(maxnz,procsnz[i]); 2127 } 2128 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2129 2130 /* read in my part of the matrix column indices */ 2131 nz = procsnz[0]; 2132 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2133 mycols = ibuf; 2134 if (size == 1) nz -= extra_rows; 2135 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2136 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2137 2138 /* read in every ones (except the last) and ship off */ 2139 for (i=1; i<size-1; i++) { 2140 nz = procsnz[i]; 2141 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2142 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2143 } 2144 /* read in the stuff for the last proc */ 2145 if (size != 1) { 2146 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2147 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2148 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2149 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2150 } 2151 ierr = PetscFree(cols);CHKERRQ(ierr); 2152 } else { /* procs[i], i>0 */ 2153 /* determine buffer space needed for message */ 2154 nz = 0; 2155 for (i=0; i<m; i++) { 2156 nz += locrowlens[i]; 2157 } 2158 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2159 mycols = ibuf; 2160 /* receive message of column indices*/ 2161 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2162 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2163 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2164 } 2165 2166 /* loop over local rows, determining number of off diagonal entries */ 2167 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2168 odlens = dlens + (rend-rstart); 2169 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2170 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2171 masked1 = mask + Mbs; 2172 masked2 = masked1 + Mbs; 2173 rowcount = 0; nzcount = 0; 2174 for (i=0; i<mbs; i++) { 2175 dcount = 0; 2176 odcount = 0; 2177 for (j=0; j<bs; j++) { 2178 kmax = locrowlens[rowcount]; 2179 for (k=0; k<kmax; k++) { 2180 tmp = mycols[nzcount++]/bs; /* block col. index */ 2181 if (!mask[tmp]) { 2182 mask[tmp] = 1; 2183 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2184 else masked1[dcount++] = tmp; /* entry in diag portion */ 2185 } 2186 } 2187 rowcount++; 2188 } 2189 2190 dlens[i] = dcount; /* d_nzz[i] */ 2191 odlens[i] = odcount; /* o_nzz[i] */ 2192 2193 /* zero out the mask elements we set */ 2194 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2195 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2196 } 2197 2198 /* create our matrix */ 2199 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2200 CHKERRQ(ierr); 2201 A = *newmat; 2202 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2203 2204 if (!rank) { 2205 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2206 /* read in my part of the matrix numerical values */ 2207 nz = procsnz[0]; 2208 vals = buf; 2209 mycols = ibuf; 2210 if (size == 1) nz -= extra_rows; 2211 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2212 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2213 2214 /* insert into matrix */ 2215 jj = rstart*bs; 2216 for (i=0; i<m; i++) { 2217 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2218 mycols += locrowlens[i]; 2219 vals += locrowlens[i]; 2220 jj++; 2221 } 2222 2223 /* read in other processors (except the last one) and ship out */ 2224 for (i=1; i<size-1; i++) { 2225 nz = procsnz[i]; 2226 vals = buf; 2227 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2228 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2229 } 2230 /* the last proc */ 2231 if (size != 1){ 2232 nz = procsnz[i] - extra_rows; 2233 vals = buf; 2234 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2235 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2236 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2237 } 2238 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2239 2240 } else { 2241 /* receive numeric values */ 2242 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2243 2244 /* receive message of values*/ 2245 vals = buf; 2246 mycols = ibuf; 2247 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2248 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2249 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2250 2251 /* insert into matrix */ 2252 jj = rstart*bs; 2253 for (i=0; i<m; i++) { 2254 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2255 mycols += locrowlens[i]; 2256 vals += locrowlens[i]; 2257 jj++; 2258 } 2259 } 2260 2261 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2262 ierr = PetscFree(buf);CHKERRQ(ierr); 2263 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2264 ierr = PetscFree(rowners);CHKERRQ(ierr); 2265 ierr = PetscFree(dlens);CHKERRQ(ierr); 2266 ierr = PetscFree(mask);CHKERRQ(ierr); 2267 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2268 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2269 #if defined(PETSC_HAVE_SPOOLES) 2270 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 2271 if (flag) { 2272 if (size == 1) { 2273 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2274 } else { 2275 ierr = MatUseSpooles_MPISBAIJ(A);CHKERRQ(ierr); 2276 } 2277 } 2278 #endif 2279 #if defined(PETSC_HAVE_MUMPS) 2280 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 2281 if (flag) { 2282 ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); 2283 } 2284 #endif 2285 PetscFunctionReturn(0); 2286 } 2287 EXTERN_C_END 2288 2289 #undef __FUNCT__ 2290 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2291 /*@ 2292 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2293 2294 Input Parameters: 2295 . mat - the matrix 2296 . fact - factor 2297 2298 Collective on Mat 2299 2300 Level: advanced 2301 2302 Notes: 2303 This can also be set by the command line option: -mat_use_hash_table fact 2304 2305 .keywords: matrix, hashtable, factor, HT 2306 2307 .seealso: MatSetOption() 2308 @*/ 2309 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2310 { 2311 PetscFunctionBegin; 2312 SETERRQ(1,"Function not yet written for SBAIJ format"); 2313 /* PetscFunctionReturn(0); */ 2314 } 2315 2316 #undef __FUNCT__ 2317 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2318 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2319 { 2320 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2321 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2322 PetscReal atmp; 2323 PetscReal *work,*svalues,*rvalues; 2324 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2325 int rank,size,*rowners_bs,dest,count,source; 2326 PetscScalar *va; 2327 MatScalar *ba; 2328 MPI_Status stat; 2329 2330 PetscFunctionBegin; 2331 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2332 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2333 2334 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2335 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2336 2337 bs = a->bs; 2338 mbs = a->mbs; 2339 Mbs = a->Mbs; 2340 ba = b->a; 2341 bi = b->i; 2342 bj = b->j; 2343 /* 2344 PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2345 PetscSynchronizedFlush(A->comm); 2346 */ 2347 2348 /* find ownerships */ 2349 rowners_bs = a->rowners_bs; 2350 /* 2351 if (!rank){ 2352 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2353 } 2354 */ 2355 2356 /* each proc creates an array to be distributed */ 2357 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2358 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2359 2360 /* row_max for B */ 2361 if (rank != size-1){ 2362 for (i=0; i<mbs; i++) { 2363 ncols = bi[1] - bi[0]; bi++; 2364 brow = bs*i; 2365 for (j=0; j<ncols; j++){ 2366 bcol = bs*(*bj); 2367 for (kcol=0; kcol<bs; kcol++){ 2368 col = bcol + kcol; /* local col index */ 2369 col += rowners_bs[rank+1]; /* global col index */ 2370 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2371 for (krow=0; krow<bs; krow++){ 2372 atmp = PetscAbsScalar(*ba); ba++; 2373 row = brow + krow; /* local row index */ 2374 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2375 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2376 if (work[col] < atmp) work[col] = atmp; 2377 } 2378 } 2379 bj++; 2380 } 2381 } 2382 /* 2383 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2384 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2385 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2386 */ 2387 2388 /* send values to its owners */ 2389 for (dest=rank+1; dest<size; dest++){ 2390 svalues = work + rowners_bs[dest]; 2391 count = rowners_bs[dest+1]-rowners_bs[dest]; 2392 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2393 /* 2394 PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2395 PetscSynchronizedFlush(A->comm); 2396 */ 2397 } 2398 } 2399 2400 /* receive values */ 2401 if (rank){ 2402 rvalues = work; 2403 count = rowners_bs[rank+1]-rowners_bs[rank]; 2404 for (source=0; source<rank; source++){ 2405 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2406 /* process values */ 2407 for (i=0; i<count; i++){ 2408 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2409 } 2410 /* 2411 PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2412 PetscSynchronizedFlush(A->comm); 2413 */ 2414 } 2415 } 2416 2417 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2418 ierr = PetscFree(work);CHKERRQ(ierr); 2419 PetscFunctionReturn(0); 2420 } 2421 2422 #undef __FUNCT__ 2423 #define __FUNCT__ "MatRelax_MPISBAIJ" 2424 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2425 { 2426 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2427 int ierr,mbs=mat->mbs,bs=mat->bs; 2428 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2429 Vec bb1; 2430 2431 PetscFunctionBegin; 2432 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2433 if (bs > 1) 2434 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2435 2436 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2437 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2438 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2439 its--; 2440 } 2441 2442 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2443 while (its--){ 2444 2445 /* lower triangular part: slvec0b = - B^T*xx */ 2446 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2447 2448 /* copy xx into slvec0a */ 2449 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2450 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2451 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2452 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2453 2454 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2455 2456 /* copy bb into slvec1a */ 2457 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2458 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2459 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2460 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2461 2462 /* set slvec1b = 0 */ 2463 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2464 2465 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2466 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2467 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2468 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2469 2470 /* upper triangular part: bb1 = bb1 - B*x */ 2471 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2472 2473 /* local diagonal sweep */ 2474 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2475 } 2476 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2477 } else { 2478 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2479 } 2480 PetscFunctionReturn(0); 2481 } 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2485 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2486 { 2487 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2488 int ierr; 2489 PetscScalar mone=-1.0; 2490 Vec lvec1,bb1; 2491 2492 PetscFunctionBegin; 2493 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2494 if (mat->bs > 1) 2495 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2496 2497 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2498 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2499 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2500 its--; 2501 } 2502 2503 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2504 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2505 while (its--){ 2506 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2507 2508 /* lower diagonal part: bb1 = bb - B^T*xx */ 2509 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2510 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2511 2512 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2513 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2514 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2515 2516 /* upper diagonal part: bb1 = bb1 - B*x */ 2517 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2518 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2519 2520 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2521 2522 /* diagonal sweep */ 2523 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2524 } 2525 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2526 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2527 } else { 2528 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2529 } 2530 PetscFunctionReturn(0); 2531 } 2532 2533