1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *); 12 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode); 13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 14 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 15 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 16 extern int MatPrintHelp_SeqSBAIJ(Mat); 17 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 18 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 19 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 20 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 21 22 /* UGLY, ugly, ugly 23 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 24 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 25 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 26 converts the entries into single precision and then calls ..._MatScalar() to put them 27 into the single precision data structures. 28 */ 29 #if defined(PETSC_USE_MAT_SINGLE) 30 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 32 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 35 #else 36 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 37 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 38 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 39 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 40 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 41 #endif 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 46 int MatStoreValues_MPISBAIJ(Mat mat) 47 { 48 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 49 int ierr; 50 51 PetscFunctionBegin; 52 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 53 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 EXTERN_C_END 57 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 61 int MatRetrieveValues_MPISBAIJ(Mat mat) 62 { 63 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 64 int ierr; 65 66 PetscFunctionBegin; 67 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 68 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 69 PetscFunctionReturn(0); 70 } 71 EXTERN_C_END 72 73 /* 74 Local utility routine that creates a mapping from the global column 75 number to the local number in the off-diagonal part of the local 76 storage of the matrix. This is done in a non scable way since the 77 length of colmap equals the global matrix length. 78 */ 79 #undef __FUNCT__ 80 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private" 81 static int CreateColmap_MPISBAIJ_Private(Mat mat) 82 { 83 PetscFunctionBegin; 84 SETERRQ(1,"Function not yet written for SBAIJ format"); 85 /* PetscFunctionReturn(0); */ 86 } 87 88 #define CHUNKSIZE 10 89 90 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 91 { \ 92 \ 93 brow = row/bs; \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow]; nrow = ailen[brow]; \ 96 bcol = col/bs; \ 97 ridx = row % bs; cidx = col % bs; \ 98 low = 0; high = nrow; \ 99 while (high-low > 3) { \ 100 t = (low+high)/2; \ 101 if (rp[t] > bcol) high = t; \ 102 else low = t; \ 103 } \ 104 for (_i=low; _i<high; _i++) { \ 105 if (rp[_i] > bcol) break; \ 106 if (rp[_i] == bcol) { \ 107 bap = ap + bs2*_i + bs*cidx + ridx; \ 108 if (addv == ADD_VALUES) *bap += value; \ 109 else *bap = value; \ 110 goto a_noinsert; \ 111 } \ 112 } \ 113 if (a->nonew == 1) goto a_noinsert; \ 114 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 115 if (nrow >= rmax) { \ 116 /* there is no extra room in row, therefore enlarge */ \ 117 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 118 MatScalar *new_a; \ 119 \ 120 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 121 \ 122 /* malloc new storage space */ \ 123 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 124 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 125 new_j = (int*)(new_a + bs2*new_nz); \ 126 new_i = new_j + new_nz; \ 127 \ 128 /* copy over old data into new slots */ \ 129 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 130 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 131 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 132 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 133 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 134 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 135 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 136 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 137 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 138 /* free up old matrix storage */ \ 139 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 140 if (!a->singlemalloc) { \ 141 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 142 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 143 } \ 144 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 145 a->singlemalloc = PETSC_TRUE; \ 146 \ 147 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 148 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 149 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 150 a->s_maxnz += bs2*CHUNKSIZE; \ 151 a->reallocs++; \ 152 a->s_nz++; \ 153 } \ 154 N = nrow++ - 1; \ 155 /* shift up all the later entries in this row */ \ 156 for (ii=N; ii>=_i; ii--) { \ 157 rp[ii+1] = rp[ii]; \ 158 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 159 } \ 160 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 161 rp[_i] = bcol; \ 162 ap[bs2*_i + bs*cidx + ridx] = value; \ 163 a_noinsert:; \ 164 ailen[brow] = nrow; \ 165 } 166 #ifndef MatSetValues_SeqBAIJ_B_Private 167 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 168 { \ 169 brow = row/bs; \ 170 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 171 rmax = bimax[brow]; nrow = bilen[brow]; \ 172 bcol = col/bs; \ 173 ridx = row % bs; cidx = col % bs; \ 174 low = 0; high = nrow; \ 175 while (high-low > 3) { \ 176 t = (low+high)/2; \ 177 if (rp[t] > bcol) high = t; \ 178 else low = t; \ 179 } \ 180 for (_i=low; _i<high; _i++) { \ 181 if (rp[_i] > bcol) break; \ 182 if (rp[_i] == bcol) { \ 183 bap = ap + bs2*_i + bs*cidx + ridx; \ 184 if (addv == ADD_VALUES) *bap += value; \ 185 else *bap = value; \ 186 goto b_noinsert; \ 187 } \ 188 } \ 189 if (b->nonew == 1) goto b_noinsert; \ 190 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 191 if (nrow >= rmax) { \ 192 /* there is no extra room in row, therefore enlarge */ \ 193 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 194 MatScalar *new_a; \ 195 \ 196 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 197 \ 198 /* malloc new storage space */ \ 199 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 200 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 201 new_j = (int*)(new_a + bs2*new_nz); \ 202 new_i = new_j + new_nz; \ 203 \ 204 /* copy over old data into new slots */ \ 205 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 206 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 207 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 208 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 209 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 210 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 211 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 212 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 213 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 214 /* free up old matrix storage */ \ 215 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 216 if (!b->singlemalloc) { \ 217 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 218 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 219 } \ 220 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 221 b->singlemalloc = PETSC_TRUE; \ 222 \ 223 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 224 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 225 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 226 b->maxnz += bs2*CHUNKSIZE; \ 227 b->reallocs++; \ 228 b->nz++; \ 229 } \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 for (ii=N; ii>=_i; ii--) { \ 233 rp[ii+1] = rp[ii]; \ 234 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 235 } \ 236 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 237 rp[_i] = bcol; \ 238 ap[bs2*_i + bs*cidx + ridx] = value; \ 239 b_noinsert:; \ 240 bilen[brow] = nrow; \ 241 } 242 #endif 243 244 #if defined(PETSC_USE_MAT_SINGLE) 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatSetValues_MPISBAIJ" 247 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 248 { 249 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 250 int ierr,i,N = m*n; 251 MatScalar *vsingle; 252 253 PetscFunctionBegin; 254 if (N > b->setvalueslen) { 255 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 256 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 257 b->setvalueslen = N; 258 } 259 vsingle = b->setvaluescopy; 260 261 for (i=0; i<N; i++) { 262 vsingle[i] = v[i]; 263 } 264 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 270 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 271 { 272 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 273 int ierr,i,N = m*n*b->bs2; 274 MatScalar *vsingle; 275 276 PetscFunctionBegin; 277 if (N > b->setvalueslen) { 278 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 279 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 280 b->setvalueslen = N; 281 } 282 vsingle = b->setvaluescopy; 283 for (i=0; i<N; i++) { 284 vsingle[i] = v[i]; 285 } 286 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 292 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 293 { 294 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 295 int ierr,i,N = m*n; 296 MatScalar *vsingle; 297 298 PetscFunctionBegin; 299 SETERRQ(1,"Function not yet written for SBAIJ format"); 300 /* PetscFunctionReturn(0); */ 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 305 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 306 { 307 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 308 int ierr,i,N = m*n*b->bs2; 309 MatScalar *vsingle; 310 311 PetscFunctionBegin; 312 SETERRQ(1,"Function not yet written for SBAIJ format"); 313 /* PetscFunctionReturn(0); */ 314 } 315 #endif 316 317 /* Only add/insert a(i,j) with i<=j (blocks). 318 Any a(i,j) with i>j input by user is ingored. 319 */ 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatSetValues_MPIBAIJ" 322 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 323 { 324 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 325 MatScalar value; 326 PetscTruth roworiented = baij->roworiented; 327 int ierr,i,j,row,col; 328 int rstart_orig=baij->rstart_bs; 329 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 330 int cend_orig=baij->cend_bs,bs=baij->bs; 331 332 /* Some Variables required in the macro */ 333 Mat A = baij->A; 334 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 335 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 336 MatScalar *aa=a->a; 337 338 Mat B = baij->B; 339 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 340 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 341 MatScalar *ba=b->a; 342 343 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 344 int low,high,t,ridx,cidx,bs2=a->bs2; 345 MatScalar *ap,*bap; 346 347 /* for stash */ 348 int n_loc, *in_loc=0; 349 MatScalar *v_loc=0; 350 351 PetscFunctionBegin; 352 353 if(!baij->donotstash){ 354 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 355 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 356 } 357 358 for (i=0; i<m; i++) { 359 if (im[i] < 0) continue; 360 #if defined(PETSC_USE_BOPT_g) 361 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 362 #endif 363 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 364 row = im[i] - rstart_orig; /* local row index */ 365 for (j=0; j<n; j++) { 366 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 367 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 368 col = in[j] - cstart_orig; /* local col index */ 369 brow = row/bs; bcol = col/bs; 370 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 371 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 372 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 373 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 374 } else if (in[j] < 0) continue; 375 #if defined(PETSC_USE_BOPT_g) 376 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 377 #endif 378 else { /* off-diag entry (B) */ 379 if (mat->was_assembled) { 380 if (!baij->colmap) { 381 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 382 } 383 #if defined (PETSC_USE_CTABLE) 384 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 385 col = col - 1; 386 #else 387 col = baij->colmap[in[j]/bs] - 1; 388 #endif 389 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 390 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 391 col = in[j]; 392 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 393 B = baij->B; 394 b = (Mat_SeqBAIJ*)(B)->data; 395 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 396 ba=b->a; 397 } else col += in[j]%bs; 398 } else col = in[j]; 399 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 400 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 401 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 402 } 403 } 404 } else { /* off processor entry */ 405 if (!baij->donotstash) { 406 n_loc = 0; 407 for (j=0; j<n; j++){ 408 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 409 in_loc[n_loc] = in[j]; 410 if (roworiented) { 411 v_loc[n_loc] = v[i*n+j]; 412 } else { 413 v_loc[n_loc] = v[j*m+i]; 414 } 415 n_loc++; 416 } 417 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 418 } 419 } 420 } 421 422 if(!baij->donotstash){ 423 ierr = PetscFree(in_loc);CHKERRQ(ierr); 424 ierr = PetscFree(v_loc);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 431 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 432 { 433 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 434 MatScalar *value,*barray=baij->barray; 435 PetscTruth roworiented = baij->roworiented; 436 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 437 int rend=baij->rend,cstart=baij->cstart,stepval; 438 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 439 440 PetscFunctionBegin; 441 if(!barray) { 442 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 443 baij->barray = barray; 444 } 445 446 if (roworiented) { 447 stepval = (n-1)*bs; 448 } else { 449 stepval = (m-1)*bs; 450 } 451 for (i=0; i<m; i++) { 452 if (im[i] < 0) continue; 453 #if defined(PETSC_USE_BOPT_g) 454 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs); 455 #endif 456 if (im[i] >= rstart && im[i] < rend) { 457 row = im[i] - rstart; 458 for (j=0; j<n; j++) { 459 /* If NumCol = 1 then a copy is not required */ 460 if ((roworiented) && (n == 1)) { 461 barray = v + i*bs2; 462 } else if((!roworiented) && (m == 1)) { 463 barray = v + j*bs2; 464 } else { /* Here a copy is required */ 465 if (roworiented) { 466 value = v + i*(stepval+bs)*bs + j*bs; 467 } else { 468 value = v + j*(stepval+bs)*bs + i*bs; 469 } 470 for (ii=0; ii<bs; ii++,value+=stepval) { 471 for (jj=0; jj<bs; jj++) { 472 *barray++ = *value++; 473 } 474 } 475 barray -=bs2; 476 } 477 478 if (in[j] >= cstart && in[j] < cend){ 479 col = in[j] - cstart; 480 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 481 } 482 else if (in[j] < 0) continue; 483 #if defined(PETSC_USE_BOPT_g) 484 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);} 485 #endif 486 else { 487 if (mat->was_assembled) { 488 if (!baij->colmap) { 489 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 490 } 491 492 #if defined(PETSC_USE_BOPT_g) 493 #if defined (PETSC_USE_CTABLE) 494 { int data; 495 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 496 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 497 } 498 #else 499 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 500 #endif 501 #endif 502 #if defined (PETSC_USE_CTABLE) 503 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 504 col = (col - 1)/bs; 505 #else 506 col = (baij->colmap[in[j]] - 1)/bs; 507 #endif 508 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 509 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 510 col = in[j]; 511 } 512 } 513 else col = in[j]; 514 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 515 } 516 } 517 } else { 518 if (!baij->donotstash) { 519 if (roworiented) { 520 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 521 } else { 522 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 523 } 524 } 525 } 526 } 527 PetscFunctionReturn(0); 528 } 529 530 #define HASH_KEY 0.6180339887 531 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 532 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 533 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 534 #undef __FUNCT__ 535 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 536 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 537 { 538 PetscFunctionBegin; 539 SETERRQ(1,"Function not yet written for SBAIJ format"); 540 /* PetscFunctionReturn(0); */ 541 } 542 543 #undef __FUNCT__ 544 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 545 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 546 { 547 PetscFunctionBegin; 548 SETERRQ(1,"Function not yet written for SBAIJ format"); 549 /* PetscFunctionReturn(0); */ 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatGetValues_MPISBAIJ" 554 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 555 { 556 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 557 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 558 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 559 560 PetscFunctionBegin; 561 for (i=0; i<m; i++) { 562 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 563 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 564 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 565 row = idxm[i] - bsrstart; 566 for (j=0; j<n; j++) { 567 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 568 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 569 if (idxn[j] >= bscstart && idxn[j] < bscend){ 570 col = idxn[j] - bscstart; 571 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 572 } else { 573 if (!baij->colmap) { 574 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 575 } 576 #if defined (PETSC_USE_CTABLE) 577 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 578 data --; 579 #else 580 data = baij->colmap[idxn[j]/bs]-1; 581 #endif 582 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 583 else { 584 col = data + idxn[j]%bs; 585 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 586 } 587 } 588 } 589 } else { 590 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 591 } 592 } 593 PetscFunctionReturn(0); 594 } 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatNorm_MPISBAIJ" 598 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 599 { 600 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 601 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 602 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 603 int ierr; 604 PetscReal sum[2],*lnorm2; 605 606 PetscFunctionBegin; 607 if (baij->size == 1) { 608 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 609 } else { 610 if (type == NORM_FROBENIUS) { 611 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 612 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 613 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 614 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 615 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 616 /* 617 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 618 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 619 */ 620 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 621 /* 622 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 623 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 624 625 *norm = sqrt(sum[0] + 2*sum[1]); 626 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 627 } else { 628 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 /* 635 Creates the hash table, and sets the table 636 This table is created only once. 637 If new entried need to be added to the matrix 638 then the hash table has to be destroyed and 639 recreated. 640 */ 641 #undef __FUNCT__ 642 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 643 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 644 { 645 PetscFunctionBegin; 646 SETERRQ(1,"Function not yet written for SBAIJ format"); 647 /* PetscFunctionReturn(0); */ 648 } 649 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 652 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 653 { 654 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 655 int ierr,nstash,reallocs; 656 InsertMode addv; 657 658 PetscFunctionBegin; 659 if (baij->donotstash) { 660 PetscFunctionReturn(0); 661 } 662 663 /* make sure all processors are either in INSERTMODE or ADDMODE */ 664 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 665 if (addv == (ADD_VALUES|INSERT_VALUES)) { 666 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 667 } 668 mat->insertmode = addv; /* in case this processor had no cache */ 669 670 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 671 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 672 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 673 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 674 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 675 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 681 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 682 { 683 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 684 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 685 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 686 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 687 int *row,*col,other_disassembled; 688 PetscTruth r1,r2,r3; 689 MatScalar *val; 690 InsertMode addv = mat->insertmode; 691 #if defined(PETSC_HAVE_SPOOLES) 692 PetscTruth flag; 693 #endif 694 695 PetscFunctionBegin; 696 697 if (!baij->donotstash) { 698 while (1) { 699 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 700 /* 701 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 702 PetscSynchronizedFlush(PETSC_COMM_WORLD); 703 */ 704 if (!flg) break; 705 706 for (i=0; i<n;) { 707 /* Now identify the consecutive vals belonging to the same row */ 708 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 709 if (j < n) ncols = j-i; 710 else ncols = n-i; 711 /* Now assemble all these values with a single function call */ 712 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 713 i = j; 714 } 715 } 716 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 717 /* Now process the block-stash. Since the values are stashed column-oriented, 718 set the roworiented flag to column oriented, and after MatSetValues() 719 restore the original flags */ 720 r1 = baij->roworiented; 721 r2 = a->roworiented; 722 r3 = b->roworiented; 723 baij->roworiented = PETSC_FALSE; 724 a->roworiented = PETSC_FALSE; 725 b->roworiented = PETSC_FALSE; 726 while (1) { 727 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 728 if (!flg) break; 729 730 for (i=0; i<n;) { 731 /* Now identify the consecutive vals belonging to the same row */ 732 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 733 if (j < n) ncols = j-i; 734 else ncols = n-i; 735 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 736 i = j; 737 } 738 } 739 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 740 baij->roworiented = r1; 741 a->roworiented = r2; 742 b->roworiented = r3; 743 } 744 745 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 746 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 747 748 /* determine if any processor has disassembled, if so we must 749 also disassemble ourselfs, in order that we may reassemble. */ 750 /* 751 if nonzero structure of submatrix B cannot change then we know that 752 no processor disassembled thus we can skip this stuff 753 */ 754 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 755 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 756 if (mat->was_assembled && !other_disassembled) { 757 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 758 } 759 } 760 761 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 762 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 763 } 764 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 765 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 766 767 #if defined(PETSC_USE_BOPT_g) 768 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 769 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 770 baij->ht_total_ct = 0; 771 baij->ht_insert_ct = 0; 772 } 773 #endif 774 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 775 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 776 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 777 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 778 } 779 780 if (baij->rowvalues) { 781 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 782 baij->rowvalues = 0; 783 } 784 785 #if defined(PETSC_HAVE_SPOOLES) 786 ierr = PetscOptionsHasName(mat->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 787 if (flag) { ierr = MatUseSpooles_MPISBAIJ(mat);CHKERRQ(ierr); } 788 #endif 789 PetscFunctionReturn(0); 790 } 791 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 794 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 795 { 796 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 797 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 798 PetscTruth isascii,isdraw; 799 PetscViewer sviewer; 800 PetscViewerFormat format; 801 802 PetscFunctionBegin; 803 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 804 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 805 if (isascii) { 806 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 807 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 808 MatInfo info; 809 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 810 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 811 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 812 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 813 baij->bs,(int)info.memory);CHKERRQ(ierr); 814 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 816 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 818 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 819 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } else if (format == PETSC_VIEWER_ASCII_INFO) { 822 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 823 PetscFunctionReturn(0); 824 } 825 } 826 827 if (isdraw) { 828 PetscDraw draw; 829 PetscTruth isnull; 830 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 831 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 832 } 833 834 if (size == 1) { 835 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 836 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 837 } else { 838 /* assemble the entire matrix onto first processor. */ 839 Mat A; 840 Mat_SeqSBAIJ *Aloc; 841 Mat_SeqBAIJ *Bloc; 842 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 843 MatScalar *a; 844 845 if (!rank) { 846 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 847 } else { 848 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 849 } 850 PetscLogObjectParent(mat,A); 851 852 /* copy over the A part */ 853 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 854 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 855 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 856 857 for (i=0; i<mbs; i++) { 858 rvals[0] = bs*(baij->rstart + i); 859 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 860 for (j=ai[i]; j<ai[i+1]; j++) { 861 col = (baij->cstart+aj[j])*bs; 862 for (k=0; k<bs; k++) { 863 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 864 col++; a += bs; 865 } 866 } 867 } 868 /* copy over the B part */ 869 Bloc = (Mat_SeqBAIJ*)baij->B->data; 870 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 871 for (i=0; i<mbs; i++) { 872 rvals[0] = bs*(baij->rstart + i); 873 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 874 for (j=ai[i]; j<ai[i+1]; j++) { 875 col = baij->garray[aj[j]]*bs; 876 for (k=0; k<bs; k++) { 877 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 878 col++; a += bs; 879 } 880 } 881 } 882 ierr = PetscFree(rvals);CHKERRQ(ierr); 883 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 885 /* 886 Everyone has to call to draw the matrix since the graphics waits are 887 synchronized across all processors that share the PetscDraw object 888 */ 889 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 890 if (!rank) { 891 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 892 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 893 } 894 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 895 ierr = MatDestroy(A);CHKERRQ(ierr); 896 } 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatView_MPISBAIJ" 902 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 903 { 904 int ierr; 905 PetscTruth isascii,isdraw,issocket,isbinary; 906 907 PetscFunctionBegin; 908 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 909 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 910 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 911 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 912 if (isascii || isdraw || issocket || isbinary) { 913 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 914 } else { 915 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 916 } 917 PetscFunctionReturn(0); 918 } 919 920 #undef __FUNCT__ 921 #define __FUNCT__ "MatDestroy_MPISBAIJ" 922 int MatDestroy_MPISBAIJ(Mat mat) 923 { 924 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 925 int ierr; 926 927 PetscFunctionBegin; 928 #if defined(PETSC_USE_LOG) 929 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 930 #endif 931 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 932 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 933 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 934 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 935 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 936 #if defined (PETSC_USE_CTABLE) 937 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 938 #else 939 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 940 #endif 941 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 942 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 943 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 944 if (baij->slvec0) { 945 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 946 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 947 } 948 if (baij->slvec1) { 949 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 950 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 951 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 952 } 953 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 954 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 955 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 956 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 957 #if defined(PETSC_USE_MAT_SINGLE) 958 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 959 #endif 960 ierr = PetscFree(baij);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatMult_MPISBAIJ" 966 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 967 { 968 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 969 int ierr,nt,mbs=a->mbs,bs=a->bs; 970 PetscScalar *x,*from,zero=0.0; 971 972 PetscFunctionBegin; 973 /* 974 PetscSynchronizedPrintf(PETSC_COMM_WORLD," _1comm is called ...\n"); 975 PetscSynchronizedFlush(PETSC_COMM_WORLD); 976 */ 977 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 978 if (nt != A->n) { 979 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 980 } 981 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 982 if (nt != A->m) { 983 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 984 } 985 986 /* diagonal part */ 987 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 988 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 989 990 /* subdiagonal part */ 991 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 992 993 /* copy x into the vec slvec0 */ 994 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 995 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 996 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 997 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 998 999 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1000 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1002 1003 /* supperdiagonal part */ 1004 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1005 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1011 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1012 { 1013 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1014 int ierr,nt; 1015 1016 PetscFunctionBegin; 1017 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1018 if (nt != A->n) { 1019 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1020 } 1021 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1022 if (nt != A->m) { 1023 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1024 } 1025 1026 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1027 /* do diagonal part */ 1028 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1029 /* do supperdiagonal part */ 1030 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1031 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1032 /* do subdiagonal part */ 1033 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1034 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1036 1037 PetscFunctionReturn(0); 1038 } 1039 1040 #undef __FUNCT__ 1041 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1042 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1043 { 1044 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1045 int ierr,mbs=a->mbs,bs=a->bs; 1046 PetscScalar *x,*from,zero=0.0; 1047 1048 PetscFunctionBegin; 1049 /* 1050 PetscSynchronizedPrintf(PETSC_COMM_WORLD," MatMultAdd is called ...\n"); 1051 PetscSynchronizedFlush(PETSC_COMM_WORLD); 1052 */ 1053 /* diagonal part */ 1054 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1055 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1056 1057 /* subdiagonal part */ 1058 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1059 1060 /* copy x into the vec slvec0 */ 1061 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1062 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1063 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1064 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1065 1066 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1067 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1068 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1069 1070 /* supperdiagonal part */ 1071 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1072 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1078 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1079 { 1080 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1081 int ierr; 1082 1083 PetscFunctionBegin; 1084 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1085 /* do diagonal part */ 1086 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1087 /* do supperdiagonal part */ 1088 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1089 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1090 1091 /* do subdiagonal part */ 1092 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1093 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1094 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1095 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1101 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1102 { 1103 PetscFunctionBegin; 1104 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1105 /* PetscFunctionReturn(0); */ 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1110 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 PetscFunctionBegin; 1113 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1114 /* PetscFunctionReturn(0); */ 1115 } 1116 1117 /* 1118 This only works correctly for square matrices where the subblock A->A is the 1119 diagonal block 1120 */ 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1123 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1124 { 1125 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1126 int ierr; 1127 1128 PetscFunctionBegin; 1129 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1130 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatScale_MPISBAIJ" 1136 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A) 1137 { 1138 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1139 int ierr; 1140 1141 PetscFunctionBegin; 1142 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1143 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1149 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1150 { 1151 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1152 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1153 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1154 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1155 int *cmap,*idx_p,cstart = mat->cstart; 1156 1157 PetscFunctionBegin; 1158 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1159 mat->getrowactive = PETSC_TRUE; 1160 1161 if (!mat->rowvalues && (idx || v)) { 1162 /* 1163 allocate enough space to hold information from the longest row. 1164 */ 1165 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1166 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1167 int max = 1,mbs = mat->mbs,tmp; 1168 for (i=0; i<mbs; i++) { 1169 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1170 if (max < tmp) { max = tmp; } 1171 } 1172 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1173 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1174 } 1175 1176 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1177 lrow = row - brstart; /* local row index */ 1178 1179 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1180 if (!v) {pvA = 0; pvB = 0;} 1181 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1182 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1183 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1184 nztot = nzA + nzB; 1185 1186 cmap = mat->garray; 1187 if (v || idx) { 1188 if (nztot) { 1189 /* Sort by increasing column numbers, assuming A and B already sorted */ 1190 int imark = -1; 1191 if (v) { 1192 *v = v_p = mat->rowvalues; 1193 for (i=0; i<nzB; i++) { 1194 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1195 else break; 1196 } 1197 imark = i; 1198 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1199 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1200 } 1201 if (idx) { 1202 *idx = idx_p = mat->rowindices; 1203 if (imark > -1) { 1204 for (i=0; i<imark; i++) { 1205 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1206 } 1207 } else { 1208 for (i=0; i<nzB; i++) { 1209 if (cmap[cworkB[i]/bs] < cstart) 1210 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1211 else break; 1212 } 1213 imark = i; 1214 } 1215 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1216 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1217 } 1218 } else { 1219 if (idx) *idx = 0; 1220 if (v) *v = 0; 1221 } 1222 } 1223 *nz = nztot; 1224 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1225 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1231 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1232 { 1233 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1234 1235 PetscFunctionBegin; 1236 if (baij->getrowactive == PETSC_FALSE) { 1237 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1238 } 1239 baij->getrowactive = PETSC_FALSE; 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1245 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1246 { 1247 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1248 1249 PetscFunctionBegin; 1250 *bs = baij->bs; 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1256 int MatZeroEntries_MPISBAIJ(Mat A) 1257 { 1258 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1259 int ierr; 1260 1261 PetscFunctionBegin; 1262 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1263 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1269 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1270 { 1271 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1272 Mat A = a->A,B = a->B; 1273 int ierr; 1274 PetscReal isend[5],irecv[5]; 1275 1276 PetscFunctionBegin; 1277 info->block_size = (PetscReal)a->bs; 1278 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1279 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1280 isend[3] = info->memory; isend[4] = info->mallocs; 1281 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1282 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1283 isend[3] += info->memory; isend[4] += info->mallocs; 1284 if (flag == MAT_LOCAL) { 1285 info->nz_used = isend[0]; 1286 info->nz_allocated = isend[1]; 1287 info->nz_unneeded = isend[2]; 1288 info->memory = isend[3]; 1289 info->mallocs = isend[4]; 1290 } else if (flag == MAT_GLOBAL_MAX) { 1291 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1292 info->nz_used = irecv[0]; 1293 info->nz_allocated = irecv[1]; 1294 info->nz_unneeded = irecv[2]; 1295 info->memory = irecv[3]; 1296 info->mallocs = irecv[4]; 1297 } else if (flag == MAT_GLOBAL_SUM) { 1298 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1299 info->nz_used = irecv[0]; 1300 info->nz_allocated = irecv[1]; 1301 info->nz_unneeded = irecv[2]; 1302 info->memory = irecv[3]; 1303 info->mallocs = irecv[4]; 1304 } else { 1305 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1306 } 1307 info->rows_global = (PetscReal)A->M; 1308 info->columns_global = (PetscReal)A->N; 1309 info->rows_local = (PetscReal)A->m; 1310 info->columns_local = (PetscReal)A->N; 1311 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1312 info->fill_ratio_needed = 0; 1313 info->factor_mallocs = 0; 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1319 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1320 { 1321 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1322 int ierr; 1323 1324 PetscFunctionBegin; 1325 switch (op) { 1326 case MAT_NO_NEW_NONZERO_LOCATIONS: 1327 case MAT_YES_NEW_NONZERO_LOCATIONS: 1328 case MAT_COLUMNS_UNSORTED: 1329 case MAT_COLUMNS_SORTED: 1330 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1331 case MAT_KEEP_ZEROED_ROWS: 1332 case MAT_NEW_NONZERO_LOCATION_ERR: 1333 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1334 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1335 break; 1336 case MAT_ROW_ORIENTED: 1337 a->roworiented = PETSC_TRUE; 1338 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1339 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1340 break; 1341 case MAT_ROWS_SORTED: 1342 case MAT_ROWS_UNSORTED: 1343 case MAT_YES_NEW_DIAGONALS: 1344 case MAT_USE_SINGLE_PRECISION_SOLVES: 1345 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1346 break; 1347 case MAT_COLUMN_ORIENTED: 1348 a->roworiented = PETSC_FALSE; 1349 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1350 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1351 break; 1352 case MAT_IGNORE_OFF_PROC_ENTRIES: 1353 a->donotstash = PETSC_TRUE; 1354 break; 1355 case MAT_NO_NEW_DIAGONALS: 1356 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1357 case MAT_USE_HASH_TABLE: 1358 a->ht_flag = PETSC_TRUE; 1359 break; 1360 default: 1361 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1362 } 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1368 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1369 { 1370 int ierr; 1371 PetscFunctionBegin; 1372 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 #undef __FUNCT__ 1377 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1378 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1379 { 1380 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1381 Mat a = baij->A,b = baij->B; 1382 int ierr,s1,s2,s3; 1383 1384 PetscFunctionBegin; 1385 if (ll != rr) { 1386 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1387 } 1388 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1389 if (rr) { 1390 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1391 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1392 /* Overlap communication with computation. */ 1393 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1394 /*} if (ll) { */ 1395 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1396 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1397 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1398 /* } */ 1399 /* scale the diagonal block */ 1400 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1401 1402 /* if (rr) { */ 1403 /* Do a scatter end and then right scale the off-diagonal block */ 1404 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1405 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1406 } 1407 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1413 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag) 1414 { 1415 PetscFunctionBegin; 1416 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 1417 } 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1421 int MatPrintHelp_MPISBAIJ(Mat A) 1422 { 1423 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1424 MPI_Comm comm = A->comm; 1425 static int called = 0; 1426 int ierr; 1427 1428 PetscFunctionBegin; 1429 if (!a->rank) { 1430 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1431 } 1432 if (called) {PetscFunctionReturn(0);} else called = 1; 1433 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1434 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1440 int MatSetUnfactored_MPISBAIJ(Mat A) 1441 { 1442 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1443 int ierr; 1444 1445 PetscFunctionBegin; 1446 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1447 PetscFunctionReturn(0); 1448 } 1449 1450 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatEqual_MPISBAIJ" 1454 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1455 { 1456 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1457 Mat a,b,c,d; 1458 PetscTruth flg; 1459 int ierr; 1460 1461 PetscFunctionBegin; 1462 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr); 1463 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1464 a = matA->A; b = matA->B; 1465 c = matB->A; d = matB->B; 1466 1467 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1468 if (flg == PETSC_TRUE) { 1469 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1470 } 1471 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1477 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1478 { 1479 int ierr; 1480 1481 PetscFunctionBegin; 1482 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } 1485 /* -------------------------------------------------------------------*/ 1486 static struct _MatOps MatOps_Values = { 1487 MatSetValues_MPISBAIJ, 1488 MatGetRow_MPISBAIJ, 1489 MatRestoreRow_MPISBAIJ, 1490 MatMult_MPISBAIJ, 1491 MatMultAdd_MPISBAIJ, 1492 MatMultTranspose_MPISBAIJ, 1493 MatMultTransposeAdd_MPISBAIJ, 1494 0, 1495 0, 1496 0, 1497 0, 1498 0, 1499 0, 1500 MatRelax_MPISBAIJ, 1501 MatTranspose_MPISBAIJ, 1502 MatGetInfo_MPISBAIJ, 1503 MatEqual_MPISBAIJ, 1504 MatGetDiagonal_MPISBAIJ, 1505 MatDiagonalScale_MPISBAIJ, 1506 MatNorm_MPISBAIJ, 1507 MatAssemblyBegin_MPISBAIJ, 1508 MatAssemblyEnd_MPISBAIJ, 1509 0, 1510 MatSetOption_MPISBAIJ, 1511 MatZeroEntries_MPISBAIJ, 1512 MatZeroRows_MPISBAIJ, 1513 0, 1514 0, 1515 0, 1516 0, 1517 MatSetUpPreallocation_MPISBAIJ, 1518 0, 1519 0, 1520 0, 1521 0, 1522 MatDuplicate_MPISBAIJ, 1523 0, 1524 0, 1525 0, 1526 0, 1527 0, 1528 0, 1529 0, 1530 MatGetValues_MPISBAIJ, 1531 0, 1532 MatPrintHelp_MPISBAIJ, 1533 MatScale_MPISBAIJ, 1534 0, 1535 0, 1536 0, 1537 MatGetBlockSize_MPISBAIJ, 1538 0, 1539 0, 1540 0, 1541 0, 1542 0, 1543 0, 1544 MatSetUnfactored_MPISBAIJ, 1545 0, 1546 MatSetValuesBlocked_MPISBAIJ, 1547 0, 1548 0, 1549 0, 1550 MatGetPetscMaps_Petsc, 1551 0, 1552 0, 1553 0, 1554 0, 1555 0, 1556 0, 1557 MatGetRowMax_MPISBAIJ}; 1558 1559 1560 EXTERN_C_BEGIN 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1563 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1564 { 1565 PetscFunctionBegin; 1566 *a = ((Mat_MPISBAIJ *)A->data)->A; 1567 *iscopy = PETSC_FALSE; 1568 PetscFunctionReturn(0); 1569 } 1570 EXTERN_C_END 1571 1572 EXTERN_C_BEGIN 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatCreate_MPISBAIJ" 1575 int MatCreate_MPISBAIJ(Mat B) 1576 { 1577 Mat_MPISBAIJ *b; 1578 int ierr; 1579 PetscTruth flg; 1580 1581 PetscFunctionBegin; 1582 1583 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1584 B->data = (void*)b; 1585 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1586 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1587 1588 B->ops->destroy = MatDestroy_MPISBAIJ; 1589 B->ops->view = MatView_MPISBAIJ; 1590 B->mapping = 0; 1591 B->factor = 0; 1592 B->assembled = PETSC_FALSE; 1593 1594 B->insertmode = NOT_SET_VALUES; 1595 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1596 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1597 1598 /* build local table of row and column ownerships */ 1599 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1600 b->cowners = b->rowners + b->size + 2; 1601 b->rowners_bs = b->cowners + b->size + 2; 1602 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1603 1604 /* build cache for off array entries formed */ 1605 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1606 b->donotstash = PETSC_FALSE; 1607 b->colmap = PETSC_NULL; 1608 b->garray = PETSC_NULL; 1609 b->roworiented = PETSC_TRUE; 1610 1611 #if defined(PETSC_USE_MAT_SINGLE) 1612 /* stuff for MatSetValues_XXX in single precision */ 1613 b->setvalueslen = 0; 1614 b->setvaluescopy = PETSC_NULL; 1615 #endif 1616 1617 /* stuff used in block assembly */ 1618 b->barray = 0; 1619 1620 /* stuff used for matrix vector multiply */ 1621 b->lvec = 0; 1622 b->Mvctx = 0; 1623 b->slvec0 = 0; 1624 b->slvec0b = 0; 1625 b->slvec1 = 0; 1626 b->slvec1a = 0; 1627 b->slvec1b = 0; 1628 b->sMvctx = 0; 1629 1630 /* stuff for MatGetRow() */ 1631 b->rowindices = 0; 1632 b->rowvalues = 0; 1633 b->getrowactive = PETSC_FALSE; 1634 1635 /* hash table stuff */ 1636 b->ht = 0; 1637 b->hd = 0; 1638 b->ht_size = 0; 1639 b->ht_flag = PETSC_FALSE; 1640 b->ht_fact = 0; 1641 b->ht_total_ct = 0; 1642 b->ht_insert_ct = 0; 1643 1644 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1645 if (flg) { 1646 PetscReal fact = 1.39; 1647 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1648 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1649 if (fact <= 1.0) fact = 1.39; 1650 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1651 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1652 } 1653 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1654 "MatStoreValues_MPISBAIJ", 1655 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1656 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1657 "MatRetrieveValues_MPISBAIJ", 1658 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1659 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1660 "MatGetDiagonalBlock_MPISBAIJ", 1661 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1662 PetscFunctionReturn(0); 1663 } 1664 EXTERN_C_END 1665 1666 #undef __FUNCT__ 1667 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1668 /*@C 1669 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1670 the user should preallocate the matrix storage by setting the parameters 1671 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1672 performance can be increased by more than a factor of 50. 1673 1674 Collective on Mat 1675 1676 Input Parameters: 1677 + A - the matrix 1678 . bs - size of blockk 1679 . d_nz - number of block nonzeros per block row in diagonal portion of local 1680 submatrix (same for all local rows) 1681 . d_nnz - array containing the number of block nonzeros in the various block rows 1682 in the upper triangular and diagonal part of the in diagonal portion of the local 1683 (possibly different for each block row) or PETSC_NULL. You must leave room 1684 for the diagonal entry even if it is zero. 1685 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1686 submatrix (same for all local rows). 1687 - o_nnz - array containing the number of nonzeros in the various block rows of the 1688 off-diagonal portion of the local submatrix (possibly different for 1689 each block row) or PETSC_NULL. 1690 1691 1692 Options Database Keys: 1693 . -mat_no_unroll - uses code that does not unroll the loops in the 1694 block calculations (much slower) 1695 . -mat_block_size - size of the blocks to use 1696 1697 Notes: 1698 1699 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1700 than it must be used on all processors that share the object for that argument. 1701 1702 Storage Information: 1703 For a square global matrix we define each processor's diagonal portion 1704 to be its local rows and the corresponding columns (a square submatrix); 1705 each processor's off-diagonal portion encompasses the remainder of the 1706 local matrix (a rectangular submatrix). 1707 1708 The user can specify preallocated storage for the diagonal part of 1709 the local submatrix with either d_nz or d_nnz (not both). Set 1710 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1711 memory allocation. Likewise, specify preallocated storage for the 1712 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1713 1714 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1715 the figure below we depict these three local rows and all columns (0-11). 1716 1717 .vb 1718 0 1 2 3 4 5 6 7 8 9 10 11 1719 ------------------- 1720 row 3 | o o o d d d o o o o o o 1721 row 4 | o o o d d d o o o o o o 1722 row 5 | o o o d d d o o o o o o 1723 ------------------- 1724 .ve 1725 1726 Thus, any entries in the d locations are stored in the d (diagonal) 1727 submatrix, and any entries in the o locations are stored in the 1728 o (off-diagonal) submatrix. Note that the d matrix is stored in 1729 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1730 1731 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1732 plus the diagonal part of the d matrix, 1733 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1734 In general, for PDE problems in which most nonzeros are near the diagonal, 1735 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1736 or you will get TERRIBLE performance; see the users' manual chapter on 1737 matrices. 1738 1739 Level: intermediate 1740 1741 .keywords: matrix, block, aij, compressed row, sparse, parallel 1742 1743 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1744 @*/ 1745 1746 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1747 { 1748 Mat_MPISBAIJ *b; 1749 int ierr,i,mbs,Mbs; 1750 PetscTruth flg2; 1751 1752 PetscFunctionBegin; 1753 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr); 1754 if (!flg2) PetscFunctionReturn(0); 1755 1756 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1757 1758 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1759 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1760 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1761 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1762 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1763 if (d_nnz) { 1764 for (i=0; i<B->m/bs; i++) { 1765 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1766 } 1767 } 1768 if (o_nnz) { 1769 for (i=0; i<B->m/bs; i++) { 1770 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1771 } 1772 } 1773 B->preallocated = PETSC_TRUE; 1774 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1775 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1776 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1777 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1778 1779 b = (Mat_MPISBAIJ*)B->data; 1780 mbs = B->m/bs; 1781 Mbs = B->M/bs; 1782 if (mbs*bs != B->m) { 1783 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1784 } 1785 1786 b->bs = bs; 1787 b->bs2 = bs*bs; 1788 b->mbs = mbs; 1789 b->nbs = mbs; 1790 b->Mbs = Mbs; 1791 b->Nbs = Mbs; 1792 1793 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1794 b->rowners[0] = 0; 1795 for (i=2; i<=b->size; i++) { 1796 b->rowners[i] += b->rowners[i-1]; 1797 } 1798 b->rstart = b->rowners[b->rank]; 1799 b->rend = b->rowners[b->rank+1]; 1800 b->cstart = b->rstart; 1801 b->cend = b->rend; 1802 for (i=0; i<=b->size; i++) { 1803 b->rowners_bs[i] = b->rowners[i]*bs; 1804 } 1805 b->rstart_bs = b-> rstart*bs; 1806 b->rend_bs = b->rend*bs; 1807 1808 b->cstart_bs = b->cstart*bs; 1809 b->cend_bs = b->cend*bs; 1810 1811 1812 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1813 PetscLogObjectParent(B,b->A); 1814 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1815 PetscLogObjectParent(B,b->B); 1816 1817 /* build cache for off array entries formed */ 1818 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1819 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatCreateMPISBAIJ" 1825 /*@C 1826 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1827 (block compressed row). For good matrix assembly performance 1828 the user should preallocate the matrix storage by setting the parameters 1829 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1830 performance can be increased by more than a factor of 50. 1831 1832 Collective on MPI_Comm 1833 1834 Input Parameters: 1835 + comm - MPI communicator 1836 . bs - size of blockk 1837 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1838 This value should be the same as the local size used in creating the 1839 y vector for the matrix-vector product y = Ax. 1840 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1841 This value should be the same as the local size used in creating the 1842 x vector for the matrix-vector product y = Ax. 1843 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1844 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1845 . d_nz - number of block nonzeros per block row in diagonal portion of local 1846 submatrix (same for all local rows) 1847 . d_nnz - array containing the number of block nonzeros in the various block rows 1848 in the upper triangular portion of the in diagonal portion of the local 1849 (possibly different for each block block row) or PETSC_NULL. 1850 You must leave room for the diagonal entry even if it is zero. 1851 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1852 submatrix (same for all local rows). 1853 - o_nnz - array containing the number of nonzeros in the various block rows of the 1854 off-diagonal portion of the local submatrix (possibly different for 1855 each block row) or PETSC_NULL. 1856 1857 Output Parameter: 1858 . A - the matrix 1859 1860 Options Database Keys: 1861 . -mat_no_unroll - uses code that does not unroll the loops in the 1862 block calculations (much slower) 1863 . -mat_block_size - size of the blocks to use 1864 . -mat_mpi - use the parallel matrix data structures even on one processor 1865 (defaults to using SeqBAIJ format on one processor) 1866 1867 Notes: 1868 The user MUST specify either the local or global matrix dimensions 1869 (possibly both). 1870 1871 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1872 than it must be used on all processors that share the object for that argument. 1873 1874 Storage Information: 1875 For a square global matrix we define each processor's diagonal portion 1876 to be its local rows and the corresponding columns (a square submatrix); 1877 each processor's off-diagonal portion encompasses the remainder of the 1878 local matrix (a rectangular submatrix). 1879 1880 The user can specify preallocated storage for the diagonal part of 1881 the local submatrix with either d_nz or d_nnz (not both). Set 1882 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1883 memory allocation. Likewise, specify preallocated storage for the 1884 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1885 1886 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1887 the figure below we depict these three local rows and all columns (0-11). 1888 1889 .vb 1890 0 1 2 3 4 5 6 7 8 9 10 11 1891 ------------------- 1892 row 3 | o o o d d d o o o o o o 1893 row 4 | o o o d d d o o o o o o 1894 row 5 | o o o d d d o o o o o o 1895 ------------------- 1896 .ve 1897 1898 Thus, any entries in the d locations are stored in the d (diagonal) 1899 submatrix, and any entries in the o locations are stored in the 1900 o (off-diagonal) submatrix. Note that the d matrix is stored in 1901 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1902 1903 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1904 plus the diagonal part of the d matrix, 1905 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1906 In general, for PDE problems in which most nonzeros are near the diagonal, 1907 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1908 or you will get TERRIBLE performance; see the users' manual chapter on 1909 matrices. 1910 1911 Level: intermediate 1912 1913 .keywords: matrix, block, aij, compressed row, sparse, parallel 1914 1915 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1916 @*/ 1917 1918 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1919 { 1920 int ierr,size; 1921 1922 PetscFunctionBegin; 1923 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1924 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1925 if (size > 1) { 1926 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1927 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1928 } else { 1929 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1930 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1931 } 1932 PetscFunctionReturn(0); 1933 } 1934 1935 1936 #undef __FUNCT__ 1937 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1938 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1939 { 1940 Mat mat; 1941 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1942 int ierr,len=0; 1943 1944 PetscFunctionBegin; 1945 *newmat = 0; 1946 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1947 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 1948 mat->preallocated = PETSC_TRUE; 1949 a = (Mat_MPISBAIJ*)mat->data; 1950 a->bs = oldmat->bs; 1951 a->bs2 = oldmat->bs2; 1952 a->mbs = oldmat->mbs; 1953 a->nbs = oldmat->nbs; 1954 a->Mbs = oldmat->Mbs; 1955 a->Nbs = oldmat->Nbs; 1956 1957 a->rstart = oldmat->rstart; 1958 a->rend = oldmat->rend; 1959 a->cstart = oldmat->cstart; 1960 a->cend = oldmat->cend; 1961 a->size = oldmat->size; 1962 a->rank = oldmat->rank; 1963 a->donotstash = oldmat->donotstash; 1964 a->roworiented = oldmat->roworiented; 1965 a->rowindices = 0; 1966 a->rowvalues = 0; 1967 a->getrowactive = PETSC_FALSE; 1968 a->barray = 0; 1969 a->rstart_bs = oldmat->rstart_bs; 1970 a->rend_bs = oldmat->rend_bs; 1971 a->cstart_bs = oldmat->cstart_bs; 1972 a->cend_bs = oldmat->cend_bs; 1973 1974 /* hash table stuff */ 1975 a->ht = 0; 1976 a->hd = 0; 1977 a->ht_size = 0; 1978 a->ht_flag = oldmat->ht_flag; 1979 a->ht_fact = oldmat->ht_fact; 1980 a->ht_total_ct = 0; 1981 a->ht_insert_ct = 0; 1982 1983 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1984 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1985 a->cowners = a->rowners + a->size + 2; 1986 a->rowners_bs = a->cowners + a->size + 2; 1987 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1988 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1989 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 1990 if (oldmat->colmap) { 1991 #if defined (PETSC_USE_CTABLE) 1992 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1993 #else 1994 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1995 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1996 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 1997 #endif 1998 } else a->colmap = 0; 1999 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2000 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2001 PetscLogObjectMemory(mat,len*sizeof(int)); 2002 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2003 } else a->garray = 0; 2004 2005 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2006 PetscLogObjectParent(mat,a->lvec); 2007 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2008 2009 PetscLogObjectParent(mat,a->Mvctx); 2010 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2011 PetscLogObjectParent(mat,a->A); 2012 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2013 PetscLogObjectParent(mat,a->B); 2014 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2015 *newmat = mat; 2016 PetscFunctionReturn(0); 2017 } 2018 2019 #include "petscsys.h" 2020 2021 EXTERN_C_BEGIN 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatLoad_MPISBAIJ" 2024 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2025 { 2026 Mat A; 2027 int i,nz,ierr,j,rstart,rend,fd; 2028 PetscScalar *vals,*buf; 2029 MPI_Comm comm = ((PetscObject)viewer)->comm; 2030 MPI_Status status; 2031 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2032 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2033 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2034 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2035 int dcount,kmax,k,nzcount,tmp; 2036 2037 PetscFunctionBegin; 2038 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2039 2040 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2041 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2042 if (!rank) { 2043 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2044 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2045 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2046 if (header[3] < 0) { 2047 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2048 } 2049 } 2050 2051 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2052 M = header[1]; N = header[2]; 2053 2054 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2055 2056 /* 2057 This code adds extra rows to make sure the number of rows is 2058 divisible by the blocksize 2059 */ 2060 Mbs = M/bs; 2061 extra_rows = bs - M + bs*(Mbs); 2062 if (extra_rows == bs) extra_rows = 0; 2063 else Mbs++; 2064 if (extra_rows &&!rank) { 2065 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2066 } 2067 2068 /* determine ownership of all rows */ 2069 mbs = Mbs/size + ((Mbs % size) > rank); 2070 m = mbs*bs; 2071 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2072 browners = rowners + size + 1; 2073 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2074 rowners[0] = 0; 2075 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2076 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2077 rstart = rowners[rank]; 2078 rend = rowners[rank+1]; 2079 2080 /* distribute row lengths to all processors */ 2081 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2082 if (!rank) { 2083 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2084 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2085 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2086 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2087 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2088 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2089 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2090 } else { 2091 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2092 } 2093 2094 if (!rank) { /* procs[0] */ 2095 /* calculate the number of nonzeros on each processor */ 2096 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2097 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2098 for (i=0; i<size; i++) { 2099 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2100 procsnz[i] += rowlengths[j]; 2101 } 2102 } 2103 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2104 2105 /* determine max buffer needed and allocate it */ 2106 maxnz = 0; 2107 for (i=0; i<size; i++) { 2108 maxnz = PetscMax(maxnz,procsnz[i]); 2109 } 2110 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2111 2112 /* read in my part of the matrix column indices */ 2113 nz = procsnz[0]; 2114 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2115 mycols = ibuf; 2116 if (size == 1) nz -= extra_rows; 2117 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2118 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2119 2120 /* read in every ones (except the last) and ship off */ 2121 for (i=1; i<size-1; i++) { 2122 nz = procsnz[i]; 2123 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2124 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2125 } 2126 /* read in the stuff for the last proc */ 2127 if (size != 1) { 2128 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2129 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2130 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2131 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2132 } 2133 ierr = PetscFree(cols);CHKERRQ(ierr); 2134 } else { /* procs[i], i>0 */ 2135 /* determine buffer space needed for message */ 2136 nz = 0; 2137 for (i=0; i<m; i++) { 2138 nz += locrowlens[i]; 2139 } 2140 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2141 mycols = ibuf; 2142 /* receive message of column indices*/ 2143 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2144 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2145 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2146 } 2147 2148 /* loop over local rows, determining number of off diagonal entries */ 2149 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2150 odlens = dlens + (rend-rstart); 2151 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2152 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2153 masked1 = mask + Mbs; 2154 masked2 = masked1 + Mbs; 2155 rowcount = 0; nzcount = 0; 2156 for (i=0; i<mbs; i++) { 2157 dcount = 0; 2158 odcount = 0; 2159 for (j=0; j<bs; j++) { 2160 kmax = locrowlens[rowcount]; 2161 for (k=0; k<kmax; k++) { 2162 tmp = mycols[nzcount++]/bs; /* block col. index */ 2163 if (!mask[tmp]) { 2164 mask[tmp] = 1; 2165 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2166 else masked1[dcount++] = tmp; /* entry in diag portion */ 2167 } 2168 } 2169 rowcount++; 2170 } 2171 2172 dlens[i] = dcount; /* d_nzz[i] */ 2173 odlens[i] = odcount; /* o_nzz[i] */ 2174 2175 /* zero out the mask elements we set */ 2176 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2177 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2178 } 2179 2180 /* create our matrix */ 2181 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2182 CHKERRQ(ierr); 2183 A = *newmat; 2184 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2185 2186 if (!rank) { 2187 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2188 /* read in my part of the matrix numerical values */ 2189 nz = procsnz[0]; 2190 vals = buf; 2191 mycols = ibuf; 2192 if (size == 1) nz -= extra_rows; 2193 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2194 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2195 2196 /* insert into matrix */ 2197 jj = rstart*bs; 2198 for (i=0; i<m; i++) { 2199 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2200 mycols += locrowlens[i]; 2201 vals += locrowlens[i]; 2202 jj++; 2203 } 2204 2205 /* read in other processors (except the last one) and ship out */ 2206 for (i=1; i<size-1; i++) { 2207 nz = procsnz[i]; 2208 vals = buf; 2209 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2210 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2211 } 2212 /* the last proc */ 2213 if (size != 1){ 2214 nz = procsnz[i] - extra_rows; 2215 vals = buf; 2216 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2217 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2218 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2219 } 2220 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2221 2222 } else { 2223 /* receive numeric values */ 2224 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2225 2226 /* receive message of values*/ 2227 vals = buf; 2228 mycols = ibuf; 2229 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2230 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2231 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2232 2233 /* insert into matrix */ 2234 jj = rstart*bs; 2235 for (i=0; i<m; i++) { 2236 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2237 mycols += locrowlens[i]; 2238 vals += locrowlens[i]; 2239 jj++; 2240 } 2241 } 2242 2243 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2244 ierr = PetscFree(buf);CHKERRQ(ierr); 2245 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2246 ierr = PetscFree(rowners);CHKERRQ(ierr); 2247 ierr = PetscFree(dlens);CHKERRQ(ierr); 2248 ierr = PetscFree(mask);CHKERRQ(ierr); 2249 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2250 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2251 PetscFunctionReturn(0); 2252 } 2253 EXTERN_C_END 2254 2255 #undef __FUNCT__ 2256 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2257 /*@ 2258 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2259 2260 Input Parameters: 2261 . mat - the matrix 2262 . fact - factor 2263 2264 Collective on Mat 2265 2266 Level: advanced 2267 2268 Notes: 2269 This can also be set by the command line option: -mat_use_hash_table fact 2270 2271 .keywords: matrix, hashtable, factor, HT 2272 2273 .seealso: MatSetOption() 2274 @*/ 2275 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2276 { 2277 PetscFunctionBegin; 2278 SETERRQ(1,"Function not yet written for SBAIJ format"); 2279 /* PetscFunctionReturn(0); */ 2280 } 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2284 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2285 { 2286 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2287 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2288 PetscReal atmp; 2289 PetscReal *work,*svalues,*rvalues; 2290 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2291 int rank,size,*rowners_bs,dest,count,source; 2292 PetscScalar *va; 2293 MatScalar *ba; 2294 MPI_Status stat; 2295 2296 PetscFunctionBegin; 2297 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2298 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2299 2300 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 2301 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 2302 2303 bs = a->bs; 2304 mbs = a->mbs; 2305 Mbs = a->Mbs; 2306 ba = b->a; 2307 bi = b->i; 2308 bj = b->j; 2309 /* 2310 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2311 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2312 */ 2313 2314 /* find ownerships */ 2315 rowners_bs = a->rowners_bs; 2316 /* 2317 if (!rank){ 2318 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2319 } 2320 */ 2321 2322 /* each proc creates an array to be distributed */ 2323 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2324 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2325 2326 /* row_max for B */ 2327 if (rank != size-1){ 2328 for (i=0; i<mbs; i++) { 2329 ncols = bi[1] - bi[0]; bi++; 2330 brow = bs*i; 2331 for (j=0; j<ncols; j++){ 2332 bcol = bs*(*bj); 2333 for (kcol=0; kcol<bs; kcol++){ 2334 col = bcol + kcol; /* local col index */ 2335 col += rowners_bs[rank+1]; /* global col index */ 2336 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2337 for (krow=0; krow<bs; krow++){ 2338 atmp = PetscAbsScalar(*ba); ba++; 2339 row = brow + krow; /* local row index */ 2340 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2341 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2342 if (work[col] < atmp) work[col] = atmp; 2343 } 2344 } 2345 bj++; 2346 } 2347 } 2348 /* 2349 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2350 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2351 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2352 */ 2353 2354 /* send values to its owners */ 2355 for (dest=rank+1; dest<size; dest++){ 2356 svalues = work + rowners_bs[dest]; 2357 count = rowners_bs[dest+1]-rowners_bs[dest]; 2358 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PETSC_COMM_WORLD);CHKERRQ(ierr); 2359 /* 2360 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2361 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2362 */ 2363 } 2364 } 2365 2366 /* receive values */ 2367 if (rank){ 2368 rvalues = work; 2369 count = rowners_bs[rank+1]-rowners_bs[rank]; 2370 for (source=0; source<rank; source++){ 2371 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);CHKERRQ(ierr); 2372 /* process values */ 2373 for (i=0; i<count; i++){ 2374 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2375 } 2376 /* 2377 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2378 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2379 */ 2380 } 2381 } 2382 2383 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2384 ierr = PetscFree(work);CHKERRQ(ierr); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatRelax_MPISBAIJ" 2390 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2391 { 2392 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2393 int ierr,mbs=mat->mbs,bs=mat->bs; 2394 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2395 Vec bb1; 2396 2397 PetscFunctionBegin; 2398 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2399 if (bs > 1) 2400 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2401 2402 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2403 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2404 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2405 its--; 2406 } 2407 2408 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2409 while (its--){ 2410 2411 /* lower triangular part: slvec0b = - B^T*xx */ 2412 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2413 2414 /* copy xx into slvec0a */ 2415 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2416 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2417 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2418 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2419 2420 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2421 2422 /* copy bb into slvec1a */ 2423 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2424 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2425 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2426 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2427 2428 /* set slvec1b = 0 */ 2429 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2430 2431 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2432 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2433 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2434 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2435 2436 /* upper triangular part: bb1 = bb1 - B*x */ 2437 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2438 2439 /* local diagonal sweep */ 2440 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2441 } 2442 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2443 } else { 2444 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2445 } 2446 PetscFunctionReturn(0); 2447 } 2448 2449 #undef __FUNCT__ 2450 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2451 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2452 { 2453 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2454 int ierr; 2455 PetscScalar mone=-1.0; 2456 Vec lvec1,bb1; 2457 2458 PetscFunctionBegin; 2459 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2460 if (mat->bs > 1) 2461 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2462 2463 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2464 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2465 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2466 its--; 2467 } 2468 2469 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2470 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2471 while (its--){ 2472 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2473 2474 /* lower diagonal part: bb1 = bb - B^T*xx */ 2475 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2476 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2477 2478 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2479 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2480 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2481 2482 /* upper diagonal part: bb1 = bb1 - B*x */ 2483 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2484 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2485 2486 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2487 2488 /* diagonal sweep */ 2489 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2490 } 2491 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2492 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2493 } else { 2494 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2495 } 2496 PetscFunctionReturn(0); 2497 } 2498 2499