1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 12 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 13 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *); 14 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode); 15 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 18 extern int MatPrintHelp_SeqSBAIJ(Mat); 19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 int MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 int ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 int MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 int ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 /* 76 Local utility routine that creates a mapping from the global column 77 number to the local number in the off-diagonal part of the local 78 storage of the matrix. This is done in a non scable way since the 79 length of colmap equals the global matrix length. 80 */ 81 #undef __FUNCT__ 82 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private" 83 static int CreateColmap_MPISBAIJ_Private(Mat mat) 84 { 85 PetscFunctionBegin; 86 SETERRQ(1,"Function not yet written for SBAIJ format"); 87 /* PetscFunctionReturn(0); */ 88 } 89 90 #define CHUNKSIZE 10 91 92 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 93 { \ 94 \ 95 brow = row/bs; \ 96 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 97 rmax = aimax[brow]; nrow = ailen[brow]; \ 98 bcol = col/bs; \ 99 ridx = row % bs; cidx = col % bs; \ 100 low = 0; high = nrow; \ 101 while (high-low > 3) { \ 102 t = (low+high)/2; \ 103 if (rp[t] > bcol) high = t; \ 104 else low = t; \ 105 } \ 106 for (_i=low; _i<high; _i++) { \ 107 if (rp[_i] > bcol) break; \ 108 if (rp[_i] == bcol) { \ 109 bap = ap + bs2*_i + bs*cidx + ridx; \ 110 if (addv == ADD_VALUES) *bap += value; \ 111 else *bap = value; \ 112 goto a_noinsert; \ 113 } \ 114 } \ 115 if (a->nonew == 1) goto a_noinsert; \ 116 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 117 if (nrow >= rmax) { \ 118 /* there is no extra room in row, therefore enlarge */ \ 119 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 120 MatScalar *new_a; \ 121 \ 122 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 123 \ 124 /* malloc new storage space */ \ 125 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 126 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 127 new_j = (int*)(new_a + bs2*new_nz); \ 128 new_i = new_j + new_nz; \ 129 \ 130 /* copy over old data into new slots */ \ 131 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 132 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 133 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 134 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 135 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 136 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 137 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 138 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 139 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 140 /* free up old matrix storage */ \ 141 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 142 if (!a->singlemalloc) { \ 143 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 144 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 145 } \ 146 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 147 a->singlemalloc = PETSC_TRUE; \ 148 \ 149 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 150 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 151 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 152 a->s_maxnz += bs2*CHUNKSIZE; \ 153 a->reallocs++; \ 154 a->s_nz++; \ 155 } \ 156 N = nrow++ - 1; \ 157 /* shift up all the later entries in this row */ \ 158 for (ii=N; ii>=_i; ii--) { \ 159 rp[ii+1] = rp[ii]; \ 160 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 161 } \ 162 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 163 rp[_i] = bcol; \ 164 ap[bs2*_i + bs*cidx + ridx] = value; \ 165 a_noinsert:; \ 166 ailen[brow] = nrow; \ 167 } 168 #ifndef MatSetValues_SeqBAIJ_B_Private 169 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 170 { \ 171 brow = row/bs; \ 172 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 173 rmax = bimax[brow]; nrow = bilen[brow]; \ 174 bcol = col/bs; \ 175 ridx = row % bs; cidx = col % bs; \ 176 low = 0; high = nrow; \ 177 while (high-low > 3) { \ 178 t = (low+high)/2; \ 179 if (rp[t] > bcol) high = t; \ 180 else low = t; \ 181 } \ 182 for (_i=low; _i<high; _i++) { \ 183 if (rp[_i] > bcol) break; \ 184 if (rp[_i] == bcol) { \ 185 bap = ap + bs2*_i + bs*cidx + ridx; \ 186 if (addv == ADD_VALUES) *bap += value; \ 187 else *bap = value; \ 188 goto b_noinsert; \ 189 } \ 190 } \ 191 if (b->nonew == 1) goto b_noinsert; \ 192 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 193 if (nrow >= rmax) { \ 194 /* there is no extra room in row, therefore enlarge */ \ 195 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 196 MatScalar *new_a; \ 197 \ 198 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 199 \ 200 /* malloc new storage space */ \ 201 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 202 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 203 new_j = (int*)(new_a + bs2*new_nz); \ 204 new_i = new_j + new_nz; \ 205 \ 206 /* copy over old data into new slots */ \ 207 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 208 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 209 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 210 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 211 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 212 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 213 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 214 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 215 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 216 /* free up old matrix storage */ \ 217 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 218 if (!b->singlemalloc) { \ 219 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 220 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 221 } \ 222 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 223 b->singlemalloc = PETSC_TRUE; \ 224 \ 225 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 226 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 227 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 228 b->maxnz += bs2*CHUNKSIZE; \ 229 b->reallocs++; \ 230 b->nz++; \ 231 } \ 232 N = nrow++ - 1; \ 233 /* shift up all the later entries in this row */ \ 234 for (ii=N; ii>=_i; ii--) { \ 235 rp[ii+1] = rp[ii]; \ 236 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 237 } \ 238 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 239 rp[_i] = bcol; \ 240 ap[bs2*_i + bs*cidx + ridx] = value; \ 241 b_noinsert:; \ 242 bilen[brow] = nrow; \ 243 } 244 #endif 245 246 #if defined(PETSC_USE_MAT_SINGLE) 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatSetValues_MPISBAIJ" 249 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 250 { 251 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 252 int ierr,i,N = m*n; 253 MatScalar *vsingle; 254 255 PetscFunctionBegin; 256 if (N > b->setvalueslen) { 257 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 258 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 259 b->setvalueslen = N; 260 } 261 vsingle = b->setvaluescopy; 262 263 for (i=0; i<N; i++) { 264 vsingle[i] = v[i]; 265 } 266 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 272 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 273 { 274 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 275 int ierr,i,N = m*n*b->bs2; 276 MatScalar *vsingle; 277 278 PetscFunctionBegin; 279 if (N > b->setvalueslen) { 280 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 281 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 282 b->setvalueslen = N; 283 } 284 vsingle = b->setvaluescopy; 285 for (i=0; i<N; i++) { 286 vsingle[i] = v[i]; 287 } 288 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 294 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 295 { 296 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 297 int ierr,i,N = m*n; 298 MatScalar *vsingle; 299 300 PetscFunctionBegin; 301 SETERRQ(1,"Function not yet written for SBAIJ format"); 302 /* PetscFunctionReturn(0); */ 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 307 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 308 { 309 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 310 int ierr,i,N = m*n*b->bs2; 311 MatScalar *vsingle; 312 313 PetscFunctionBegin; 314 SETERRQ(1,"Function not yet written for SBAIJ format"); 315 /* PetscFunctionReturn(0); */ 316 } 317 #endif 318 319 /* Only add/insert a(i,j) with i<=j (blocks). 320 Any a(i,j) with i>j input by user is ingored. 321 */ 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatSetValues_MPIBAIJ" 324 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 325 { 326 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 327 MatScalar value; 328 PetscTruth roworiented = baij->roworiented; 329 int ierr,i,j,row,col; 330 int rstart_orig=baij->rstart_bs; 331 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 332 int cend_orig=baij->cend_bs,bs=baij->bs; 333 334 /* Some Variables required in the macro */ 335 Mat A = baij->A; 336 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 337 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 338 MatScalar *aa=a->a; 339 340 Mat B = baij->B; 341 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 342 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 343 MatScalar *ba=b->a; 344 345 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 346 int low,high,t,ridx,cidx,bs2=a->bs2; 347 MatScalar *ap,*bap; 348 349 /* for stash */ 350 int n_loc, *in_loc=0; 351 MatScalar *v_loc=0; 352 353 PetscFunctionBegin; 354 355 if(!baij->donotstash){ 356 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 357 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 358 } 359 360 for (i=0; i<m; i++) { 361 if (im[i] < 0) continue; 362 #if defined(PETSC_USE_BOPT_g) 363 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 364 #endif 365 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 366 row = im[i] - rstart_orig; /* local row index */ 367 for (j=0; j<n; j++) { 368 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 369 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 370 col = in[j] - cstart_orig; /* local col index */ 371 brow = row/bs; bcol = col/bs; 372 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 373 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 374 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 375 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 376 } else if (in[j] < 0) continue; 377 #if defined(PETSC_USE_BOPT_g) 378 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 379 #endif 380 else { /* off-diag entry (B) */ 381 if (mat->was_assembled) { 382 if (!baij->colmap) { 383 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 384 } 385 #if defined (PETSC_USE_CTABLE) 386 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 387 col = col - 1; 388 #else 389 col = baij->colmap[in[j]/bs] - 1; 390 #endif 391 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 392 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 393 col = in[j]; 394 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 395 B = baij->B; 396 b = (Mat_SeqBAIJ*)(B)->data; 397 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 398 ba=b->a; 399 } else col += in[j]%bs; 400 } else col = in[j]; 401 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 402 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 403 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 404 } 405 } 406 } else { /* off processor entry */ 407 if (!baij->donotstash) { 408 n_loc = 0; 409 for (j=0; j<n; j++){ 410 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 411 in_loc[n_loc] = in[j]; 412 if (roworiented) { 413 v_loc[n_loc] = v[i*n+j]; 414 } else { 415 v_loc[n_loc] = v[j*m+i]; 416 } 417 n_loc++; 418 } 419 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 420 } 421 } 422 } 423 424 if(!baij->donotstash){ 425 ierr = PetscFree(in_loc);CHKERRQ(ierr); 426 ierr = PetscFree(v_loc);CHKERRQ(ierr); 427 } 428 PetscFunctionReturn(0); 429 } 430 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 433 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 434 { 435 PetscFunctionBegin; 436 SETERRQ(1,"Function not yet written for SBAIJ format"); 437 /* PetscFunctionReturn(0); */ 438 } 439 440 #define HASH_KEY 0.6180339887 441 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 442 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 443 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 446 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 447 { 448 PetscFunctionBegin; 449 SETERRQ(1,"Function not yet written for SBAIJ format"); 450 /* PetscFunctionReturn(0); */ 451 } 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 455 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 456 { 457 PetscFunctionBegin; 458 SETERRQ(1,"Function not yet written for SBAIJ format"); 459 /* PetscFunctionReturn(0); */ 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatGetValues_MPISBAIJ" 464 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 465 { 466 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 467 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 468 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 469 470 PetscFunctionBegin; 471 for (i=0; i<m; i++) { 472 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 473 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 474 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 475 row = idxm[i] - bsrstart; 476 for (j=0; j<n; j++) { 477 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 478 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 479 if (idxn[j] >= bscstart && idxn[j] < bscend){ 480 col = idxn[j] - bscstart; 481 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 482 } else { 483 if (!baij->colmap) { 484 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 485 } 486 #if defined (PETSC_USE_CTABLE) 487 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 488 data --; 489 #else 490 data = baij->colmap[idxn[j]/bs]-1; 491 #endif 492 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 493 else { 494 col = data + idxn[j]%bs; 495 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 496 } 497 } 498 } 499 } else { 500 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 501 } 502 } 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatNorm_MPISBAIJ" 508 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 509 { 510 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 511 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 512 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 513 int ierr; 514 PetscReal sum[2],*lnorm2; 515 516 PetscFunctionBegin; 517 if (baij->size == 1) { 518 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 519 } else { 520 if (type == NORM_FROBENIUS) { 521 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 522 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 523 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 524 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 525 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 526 /* 527 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 528 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 529 */ 530 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 531 /* 532 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 533 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 534 535 *norm = sqrt(sum[0] + 2*sum[1]); 536 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 537 } else { 538 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 539 } 540 } 541 PetscFunctionReturn(0); 542 } 543 544 /* 545 Creates the hash table, and sets the table 546 This table is created only once. 547 If new entried need to be added to the matrix 548 then the hash table has to be destroyed and 549 recreated. 550 */ 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 553 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 554 { 555 PetscFunctionBegin; 556 SETERRQ(1,"Function not yet written for SBAIJ format"); 557 /* PetscFunctionReturn(0); */ 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 562 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 563 { 564 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 565 int ierr,nstash,reallocs; 566 InsertMode addv; 567 568 PetscFunctionBegin; 569 if (baij->donotstash) { 570 PetscFunctionReturn(0); 571 } 572 573 /* make sure all processors are either in INSERTMODE or ADDMODE */ 574 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 575 if (addv == (ADD_VALUES|INSERT_VALUES)) { 576 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 577 } 578 mat->insertmode = addv; /* in case this processor had no cache */ 579 580 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 581 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 582 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 583 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 584 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 585 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 591 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 592 { 593 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 594 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 595 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 596 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 597 int *row,*col,other_disassembled; 598 PetscTruth r1,r2,r3; 599 MatScalar *val; 600 InsertMode addv = mat->insertmode; 601 #if defined(PETSC_HAVE_SPOOLES) 602 PetscTruth flag; 603 #endif 604 605 PetscFunctionBegin; 606 607 if (!baij->donotstash) { 608 while (1) { 609 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 610 /* 611 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 612 PetscSynchronizedFlush(PETSC_COMM_WORLD); 613 */ 614 if (!flg) break; 615 616 for (i=0; i<n;) { 617 /* Now identify the consecutive vals belonging to the same row */ 618 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 619 if (j < n) ncols = j-i; 620 else ncols = n-i; 621 /* Now assemble all these values with a single function call */ 622 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 623 i = j; 624 } 625 } 626 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 627 /* Now process the block-stash. Since the values are stashed column-oriented, 628 set the roworiented flag to column oriented, and after MatSetValues() 629 restore the original flags */ 630 r1 = baij->roworiented; 631 r2 = a->roworiented; 632 r3 = b->roworiented; 633 baij->roworiented = PETSC_FALSE; 634 a->roworiented = PETSC_FALSE; 635 b->roworiented = PETSC_FALSE; 636 while (1) { 637 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 638 if (!flg) break; 639 640 for (i=0; i<n;) { 641 /* Now identify the consecutive vals belonging to the same row */ 642 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 643 if (j < n) ncols = j-i; 644 else ncols = n-i; 645 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 646 i = j; 647 } 648 } 649 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 650 baij->roworiented = r1; 651 a->roworiented = r2; 652 b->roworiented = r3; 653 } 654 655 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 656 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 657 658 /* determine if any processor has disassembled, if so we must 659 also disassemble ourselfs, in order that we may reassemble. */ 660 /* 661 if nonzero structure of submatrix B cannot change then we know that 662 no processor disassembled thus we can skip this stuff 663 */ 664 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 665 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 666 if (mat->was_assembled && !other_disassembled) { 667 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 668 } 669 } 670 671 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 672 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 673 } 674 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 675 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 676 677 #if defined(PETSC_USE_BOPT_g) 678 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 679 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 680 baij->ht_total_ct = 0; 681 baij->ht_insert_ct = 0; 682 } 683 #endif 684 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 685 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 686 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 687 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 688 } 689 690 if (baij->rowvalues) { 691 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 692 baij->rowvalues = 0; 693 } 694 695 #if defined(PETSC_HAVE_SPOOLES) 696 ierr = PetscOptionsHasName(mat,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 697 if (flag) { ierr = MatUseSpooles_MPISBAIJ(mat);CHKERRQ(ierr); } 698 #endif 699 PetscFunctionReturn(0); 700 } 701 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 704 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 705 { 706 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 707 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 708 PetscTruth isascii,isdraw; 709 PetscViewer sviewer; 710 PetscViewerFormat format; 711 712 PetscFunctionBegin; 713 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 714 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 715 if (isascii) { 716 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 717 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 718 MatInfo info; 719 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 720 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 721 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 722 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 723 baij->bs,(int)info.memory);CHKERRQ(ierr); 724 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 725 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 726 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 727 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 728 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 729 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } else if (format == PETSC_VIEWER_ASCII_INFO) { 732 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 733 PetscFunctionReturn(0); 734 } 735 } 736 737 if (isdraw) { 738 PetscDraw draw; 739 PetscTruth isnull; 740 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 741 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 742 } 743 744 if (size == 1) { 745 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 746 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 747 } else { 748 /* assemble the entire matrix onto first processor. */ 749 Mat A; 750 Mat_SeqSBAIJ *Aloc; 751 Mat_SeqBAIJ *Bloc; 752 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 753 MatScalar *a; 754 755 if (!rank) { 756 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 757 } else { 758 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 759 } 760 PetscLogObjectParent(mat,A); 761 762 /* copy over the A part */ 763 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 764 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 765 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 766 767 for (i=0; i<mbs; i++) { 768 rvals[0] = bs*(baij->rstart + i); 769 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 770 for (j=ai[i]; j<ai[i+1]; j++) { 771 col = (baij->cstart+aj[j])*bs; 772 for (k=0; k<bs; k++) { 773 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 774 col++; a += bs; 775 } 776 } 777 } 778 /* copy over the B part */ 779 Bloc = (Mat_SeqBAIJ*)baij->B->data; 780 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 781 for (i=0; i<mbs; i++) { 782 rvals[0] = bs*(baij->rstart + i); 783 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 784 for (j=ai[i]; j<ai[i+1]; j++) { 785 col = baij->garray[aj[j]]*bs; 786 for (k=0; k<bs; k++) { 787 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 788 col++; a += bs; 789 } 790 } 791 } 792 ierr = PetscFree(rvals);CHKERRQ(ierr); 793 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 794 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 795 /* 796 Everyone has to call to draw the matrix since the graphics waits are 797 synchronized across all processors that share the PetscDraw object 798 */ 799 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 800 if (!rank) { 801 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 802 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 803 } 804 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 805 ierr = MatDestroy(A);CHKERRQ(ierr); 806 } 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatView_MPISBAIJ" 812 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 813 { 814 int ierr; 815 PetscTruth isascii,isdraw,issocket,isbinary; 816 817 PetscFunctionBegin; 818 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 819 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 820 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 821 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 822 if (isascii || isdraw || issocket || isbinary) { 823 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 824 } else { 825 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 826 } 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatDestroy_MPISBAIJ" 832 int MatDestroy_MPISBAIJ(Mat mat) 833 { 834 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 835 int ierr; 836 837 PetscFunctionBegin; 838 #if defined(PETSC_USE_LOG) 839 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 840 #endif 841 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 842 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 843 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 844 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 845 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 846 #if defined (PETSC_USE_CTABLE) 847 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 848 #else 849 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 850 #endif 851 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 852 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 853 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 854 if (baij->slvec0) { 855 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 856 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 857 } 858 if (baij->slvec1) { 859 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 860 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 861 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 862 } 863 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 864 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 865 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 866 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 867 #if defined(PETSC_USE_MAT_SINGLE) 868 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 869 #endif 870 ierr = PetscFree(baij);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatMult_MPISBAIJ" 876 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 877 { 878 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 879 int ierr,nt,mbs=a->mbs,bs=a->bs; 880 PetscScalar *x,*from,zero=0.0; 881 882 PetscFunctionBegin; 883 /* 884 PetscSynchronizedPrintf(PETSC_COMM_WORLD," _1comm is called ...\n"); 885 PetscSynchronizedFlush(PETSC_COMM_WORLD); 886 */ 887 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 888 if (nt != A->n) { 889 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 890 } 891 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 892 if (nt != A->m) { 893 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 894 } 895 896 /* diagonal part */ 897 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 898 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 899 900 /* subdiagonal part */ 901 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 902 903 /* copy x into the vec slvec0 */ 904 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 905 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 906 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 907 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 908 909 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 910 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 911 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 912 913 /* supperdiagonal part */ 914 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 915 916 PetscFunctionReturn(0); 917 } 918 919 #undef __FUNCT__ 920 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 921 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 922 { 923 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 924 int ierr,nt; 925 926 PetscFunctionBegin; 927 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 928 if (nt != A->n) { 929 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 930 } 931 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 932 if (nt != A->m) { 933 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 934 } 935 936 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 937 /* do diagonal part */ 938 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 939 /* do supperdiagonal part */ 940 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 941 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 942 /* do subdiagonal part */ 943 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 944 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 945 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 946 947 PetscFunctionReturn(0); 948 } 949 950 #undef __FUNCT__ 951 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 952 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 953 { 954 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 955 int ierr,mbs=a->mbs,bs=a->bs; 956 PetscScalar *x,*from,zero=0.0; 957 958 PetscFunctionBegin; 959 /* 960 PetscSynchronizedPrintf(PETSC_COMM_WORLD," MatMultAdd is called ...\n"); 961 PetscSynchronizedFlush(PETSC_COMM_WORLD); 962 */ 963 /* diagonal part */ 964 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 965 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 966 967 /* subdiagonal part */ 968 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 969 970 /* copy x into the vec slvec0 */ 971 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 972 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 973 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 974 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 975 976 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 977 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 978 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 979 980 /* supperdiagonal part */ 981 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 982 983 PetscFunctionReturn(0); 984 } 985 986 #undef __FUNCT__ 987 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 988 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 989 { 990 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 991 int ierr; 992 993 PetscFunctionBegin; 994 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 995 /* do diagonal part */ 996 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 997 /* do supperdiagonal part */ 998 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 999 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1000 1001 /* do subdiagonal part */ 1002 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1003 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1004 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1005 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1011 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1012 { 1013 PetscFunctionBegin; 1014 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1015 /* PetscFunctionReturn(0); */ 1016 } 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1020 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1021 { 1022 PetscFunctionBegin; 1023 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1024 /* PetscFunctionReturn(0); */ 1025 } 1026 1027 /* 1028 This only works correctly for square matrices where the subblock A->A is the 1029 diagonal block 1030 */ 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1033 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1034 { 1035 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1036 int ierr; 1037 1038 PetscFunctionBegin; 1039 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1040 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatScale_MPISBAIJ" 1046 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A) 1047 { 1048 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1049 int ierr; 1050 1051 PetscFunctionBegin; 1052 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1053 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1059 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1060 { 1061 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1062 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1063 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1064 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1065 int *cmap,*idx_p,cstart = mat->cstart; 1066 1067 PetscFunctionBegin; 1068 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1069 mat->getrowactive = PETSC_TRUE; 1070 1071 if (!mat->rowvalues && (idx || v)) { 1072 /* 1073 allocate enough space to hold information from the longest row. 1074 */ 1075 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1076 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1077 int max = 1,mbs = mat->mbs,tmp; 1078 for (i=0; i<mbs; i++) { 1079 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1080 if (max < tmp) { max = tmp; } 1081 } 1082 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1083 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1084 } 1085 1086 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1087 lrow = row - brstart; /* local row index */ 1088 1089 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1090 if (!v) {pvA = 0; pvB = 0;} 1091 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1092 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1093 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1094 nztot = nzA + nzB; 1095 1096 cmap = mat->garray; 1097 if (v || idx) { 1098 if (nztot) { 1099 /* Sort by increasing column numbers, assuming A and B already sorted */ 1100 int imark = -1; 1101 if (v) { 1102 *v = v_p = mat->rowvalues; 1103 for (i=0; i<nzB; i++) { 1104 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1105 else break; 1106 } 1107 imark = i; 1108 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1109 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1110 } 1111 if (idx) { 1112 *idx = idx_p = mat->rowindices; 1113 if (imark > -1) { 1114 for (i=0; i<imark; i++) { 1115 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1116 } 1117 } else { 1118 for (i=0; i<nzB; i++) { 1119 if (cmap[cworkB[i]/bs] < cstart) 1120 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1121 else break; 1122 } 1123 imark = i; 1124 } 1125 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1126 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1127 } 1128 } else { 1129 if (idx) *idx = 0; 1130 if (v) *v = 0; 1131 } 1132 } 1133 *nz = nztot; 1134 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1135 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1136 PetscFunctionReturn(0); 1137 } 1138 1139 #undef __FUNCT__ 1140 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1141 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1142 { 1143 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1144 1145 PetscFunctionBegin; 1146 if (baij->getrowactive == PETSC_FALSE) { 1147 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1148 } 1149 baij->getrowactive = PETSC_FALSE; 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1155 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1156 { 1157 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1158 1159 PetscFunctionBegin; 1160 *bs = baij->bs; 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1166 int MatZeroEntries_MPISBAIJ(Mat A) 1167 { 1168 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1169 int ierr; 1170 1171 PetscFunctionBegin; 1172 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1173 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1179 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1180 { 1181 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1182 Mat A = a->A,B = a->B; 1183 int ierr; 1184 PetscReal isend[5],irecv[5]; 1185 1186 PetscFunctionBegin; 1187 info->block_size = (PetscReal)a->bs; 1188 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1189 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1190 isend[3] = info->memory; isend[4] = info->mallocs; 1191 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1192 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1193 isend[3] += info->memory; isend[4] += info->mallocs; 1194 if (flag == MAT_LOCAL) { 1195 info->nz_used = isend[0]; 1196 info->nz_allocated = isend[1]; 1197 info->nz_unneeded = isend[2]; 1198 info->memory = isend[3]; 1199 info->mallocs = isend[4]; 1200 } else if (flag == MAT_GLOBAL_MAX) { 1201 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1202 info->nz_used = irecv[0]; 1203 info->nz_allocated = irecv[1]; 1204 info->nz_unneeded = irecv[2]; 1205 info->memory = irecv[3]; 1206 info->mallocs = irecv[4]; 1207 } else if (flag == MAT_GLOBAL_SUM) { 1208 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1209 info->nz_used = irecv[0]; 1210 info->nz_allocated = irecv[1]; 1211 info->nz_unneeded = irecv[2]; 1212 info->memory = irecv[3]; 1213 info->mallocs = irecv[4]; 1214 } else { 1215 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1216 } 1217 info->rows_global = (PetscReal)A->M; 1218 info->columns_global = (PetscReal)A->N; 1219 info->rows_local = (PetscReal)A->m; 1220 info->columns_local = (PetscReal)A->N; 1221 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1222 info->fill_ratio_needed = 0; 1223 info->factor_mallocs = 0; 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1229 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1230 { 1231 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1232 int ierr; 1233 1234 PetscFunctionBegin; 1235 switch (op) { 1236 case MAT_NO_NEW_NONZERO_LOCATIONS: 1237 case MAT_YES_NEW_NONZERO_LOCATIONS: 1238 case MAT_COLUMNS_UNSORTED: 1239 case MAT_COLUMNS_SORTED: 1240 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1241 case MAT_KEEP_ZEROED_ROWS: 1242 case MAT_NEW_NONZERO_LOCATION_ERR: 1243 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1244 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1245 break; 1246 case MAT_ROW_ORIENTED: 1247 a->roworiented = PETSC_TRUE; 1248 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1249 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1250 break; 1251 case MAT_ROWS_SORTED: 1252 case MAT_ROWS_UNSORTED: 1253 case MAT_YES_NEW_DIAGONALS: 1254 case MAT_USE_SINGLE_PRECISION_SOLVES: 1255 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1256 break; 1257 case MAT_COLUMN_ORIENTED: 1258 a->roworiented = PETSC_FALSE; 1259 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1260 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1261 break; 1262 case MAT_IGNORE_OFF_PROC_ENTRIES: 1263 a->donotstash = PETSC_TRUE; 1264 break; 1265 case MAT_NO_NEW_DIAGONALS: 1266 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1267 case MAT_USE_HASH_TABLE: 1268 a->ht_flag = PETSC_TRUE; 1269 break; 1270 default: 1271 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1278 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1279 { 1280 int ierr; 1281 PetscFunctionBegin; 1282 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1288 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1289 { 1290 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1291 Mat a = baij->A,b = baij->B; 1292 int ierr,s1,s2,s3; 1293 1294 PetscFunctionBegin; 1295 if (ll != rr) { 1296 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1297 } 1298 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1299 if (rr) { 1300 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1301 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1302 /* Overlap communication with computation. */ 1303 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1304 /*} if (ll) { */ 1305 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1306 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1307 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1308 /* } */ 1309 /* scale the diagonal block */ 1310 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1311 1312 /* if (rr) { */ 1313 /* Do a scatter end and then right scale the off-diagonal block */ 1314 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1315 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1316 } 1317 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1323 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag) 1324 { 1325 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1326 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1327 int *procs,*nprocs,j,idx,nsends,*work,row; 1328 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1329 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1330 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1331 MPI_Comm comm = A->comm; 1332 MPI_Request *send_waits,*recv_waits; 1333 MPI_Status recv_status,*send_status; 1334 IS istmp; 1335 PetscTruth found; 1336 1337 PetscFunctionBegin; 1338 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1339 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1340 1341 /* first count number of contributors to each processor */ 1342 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1343 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1344 procs = nprocs + size; 1345 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1346 for (i=0; i<N; i++) { 1347 idx = rows[i]; 1348 found = PETSC_FALSE; 1349 for (j=0; j<size; j++) { 1350 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1351 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 1352 } 1353 } 1354 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1355 } 1356 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1357 1358 /* inform other processors of number of messages and max length*/ 1359 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 1360 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1361 nmax = work[rank]; 1362 nrecvs = work[size+rank]; 1363 ierr = PetscFree(work);CHKERRQ(ierr); 1364 1365 /* post receives: */ 1366 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1367 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1368 for (i=0; i<nrecvs; i++) { 1369 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1370 } 1371 1372 /* do sends: 1373 1) starts[i] gives the starting index in svalues for stuff going to 1374 the ith processor 1375 */ 1376 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1377 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1378 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1379 starts[0] = 0; 1380 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1381 for (i=0; i<N; i++) { 1382 svalues[starts[owner[i]]++] = rows[i]; 1383 } 1384 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1385 1386 starts[0] = 0; 1387 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1388 count = 0; 1389 for (i=0; i<size; i++) { 1390 if (procs[i]) { 1391 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1392 } 1393 } 1394 ierr = PetscFree(starts);CHKERRQ(ierr); 1395 1396 base = owners[rank]*bs; 1397 1398 /* wait on receives */ 1399 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1400 source = lens + nrecvs; 1401 count = nrecvs; slen = 0; 1402 while (count) { 1403 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1404 /* unpack receives into our local space */ 1405 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1406 source[imdex] = recv_status.MPI_SOURCE; 1407 lens[imdex] = n; 1408 slen += n; 1409 count--; 1410 } 1411 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1412 1413 /* move the data into the send scatter */ 1414 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1415 count = 0; 1416 for (i=0; i<nrecvs; i++) { 1417 values = rvalues + i*nmax; 1418 for (j=0; j<lens[i]; j++) { 1419 lrows[count++] = values[j] - base; 1420 } 1421 } 1422 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1423 ierr = PetscFree(lens);CHKERRQ(ierr); 1424 ierr = PetscFree(owner);CHKERRQ(ierr); 1425 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1426 1427 /* actually zap the local rows */ 1428 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1429 PetscLogObjectParent(A,istmp); 1430 1431 /* 1432 Zero the required rows. If the "diagonal block" of the matrix 1433 is square and the user wishes to set the diagonal we use seperate 1434 code so that MatSetValues() is not called for each diagonal allocating 1435 new memory, thus calling lots of mallocs and slowing things down. 1436 1437 Contributed by: Mathew Knepley 1438 */ 1439 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1440 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1441 if (diag && (l->A->M == l->A->N)) { 1442 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1443 } else if (diag) { 1444 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1445 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1446 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1447 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1448 } 1449 for (i=0; i<slen; i++) { 1450 row = lrows[i] + rstart_bs; 1451 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1452 } 1453 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1454 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1455 } else { 1456 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1457 } 1458 1459 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1460 ierr = PetscFree(lrows);CHKERRQ(ierr); 1461 1462 /* wait on sends */ 1463 if (nsends) { 1464 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1465 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1466 ierr = PetscFree(send_status);CHKERRQ(ierr); 1467 } 1468 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1469 ierr = PetscFree(svalues);CHKERRQ(ierr); 1470 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1476 int MatPrintHelp_MPISBAIJ(Mat A) 1477 { 1478 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1479 MPI_Comm comm = A->comm; 1480 static int called = 0; 1481 int ierr; 1482 1483 PetscFunctionBegin; 1484 if (!a->rank) { 1485 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1486 } 1487 if (called) {PetscFunctionReturn(0);} else called = 1; 1488 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1489 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1495 int MatSetUnfactored_MPISBAIJ(Mat A) 1496 { 1497 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1498 int ierr; 1499 1500 PetscFunctionBegin; 1501 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1502 PetscFunctionReturn(0); 1503 } 1504 1505 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "MatEqual_MPISBAIJ" 1509 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1510 { 1511 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1512 Mat a,b,c,d; 1513 PetscTruth flg; 1514 int ierr; 1515 1516 PetscFunctionBegin; 1517 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr); 1518 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1519 a = matA->A; b = matA->B; 1520 c = matB->A; d = matB->B; 1521 1522 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1523 if (flg == PETSC_TRUE) { 1524 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1525 } 1526 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1532 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1533 { 1534 int ierr; 1535 1536 PetscFunctionBegin; 1537 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 /* -------------------------------------------------------------------*/ 1541 static struct _MatOps MatOps_Values = { 1542 MatSetValues_MPISBAIJ, 1543 MatGetRow_MPISBAIJ, 1544 MatRestoreRow_MPISBAIJ, 1545 MatMult_MPISBAIJ, 1546 MatMultAdd_MPISBAIJ, 1547 MatMultTranspose_MPISBAIJ, 1548 MatMultTransposeAdd_MPISBAIJ, 1549 0, 1550 0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 MatRelax_MPISBAIJ, 1556 MatTranspose_MPISBAIJ, 1557 MatGetInfo_MPISBAIJ, 1558 MatEqual_MPISBAIJ, 1559 MatGetDiagonal_MPISBAIJ, 1560 MatDiagonalScale_MPISBAIJ, 1561 MatNorm_MPISBAIJ, 1562 MatAssemblyBegin_MPISBAIJ, 1563 MatAssemblyEnd_MPISBAIJ, 1564 0, 1565 MatSetOption_MPISBAIJ, 1566 MatZeroEntries_MPISBAIJ, 1567 MatZeroRows_MPISBAIJ, 1568 0, 1569 0, 1570 0, 1571 0, 1572 MatSetUpPreallocation_MPISBAIJ, 1573 0, 1574 0, 1575 0, 1576 0, 1577 MatDuplicate_MPISBAIJ, 1578 0, 1579 0, 1580 0, 1581 0, 1582 0, 1583 MatGetSubMatrices_MPISBAIJ, 1584 MatIncreaseOverlap_MPISBAIJ, 1585 MatGetValues_MPISBAIJ, 1586 0, 1587 MatPrintHelp_MPISBAIJ, 1588 MatScale_MPISBAIJ, 1589 0, 1590 0, 1591 0, 1592 MatGetBlockSize_MPISBAIJ, 1593 0, 1594 0, 1595 0, 1596 0, 1597 0, 1598 0, 1599 MatSetUnfactored_MPISBAIJ, 1600 0, 1601 MatSetValuesBlocked_MPISBAIJ, 1602 0, 1603 0, 1604 0, 1605 MatGetPetscMaps_Petsc, 1606 0, 1607 0, 1608 0, 1609 0, 1610 0, 1611 0, 1612 MatGetRowMax_MPISBAIJ}; 1613 1614 1615 EXTERN_C_BEGIN 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1618 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1619 { 1620 PetscFunctionBegin; 1621 *a = ((Mat_MPISBAIJ *)A->data)->A; 1622 *iscopy = PETSC_FALSE; 1623 PetscFunctionReturn(0); 1624 } 1625 EXTERN_C_END 1626 1627 EXTERN_C_BEGIN 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatCreate_MPISBAIJ" 1630 int MatCreate_MPISBAIJ(Mat B) 1631 { 1632 Mat_MPISBAIJ *b; 1633 int ierr; 1634 PetscTruth flg; 1635 1636 PetscFunctionBegin; 1637 1638 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1639 B->data = (void*)b; 1640 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1641 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1642 1643 B->ops->destroy = MatDestroy_MPISBAIJ; 1644 B->ops->view = MatView_MPISBAIJ; 1645 B->mapping = 0; 1646 B->factor = 0; 1647 B->assembled = PETSC_FALSE; 1648 1649 B->insertmode = NOT_SET_VALUES; 1650 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1651 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1652 1653 /* build local table of row and column ownerships */ 1654 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1655 b->cowners = b->rowners + b->size + 2; 1656 b->rowners_bs = b->cowners + b->size + 2; 1657 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1658 1659 /* build cache for off array entries formed */ 1660 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1661 b->donotstash = PETSC_FALSE; 1662 b->colmap = PETSC_NULL; 1663 b->garray = PETSC_NULL; 1664 b->roworiented = PETSC_TRUE; 1665 1666 #if defined(PETSC_USE_MAT_SINGLE) 1667 /* stuff for MatSetValues_XXX in single precision */ 1668 b->setvalueslen = 0; 1669 b->setvaluescopy = PETSC_NULL; 1670 #endif 1671 1672 /* stuff used in block assembly */ 1673 b->barray = 0; 1674 1675 /* stuff used for matrix vector multiply */ 1676 b->lvec = 0; 1677 b->Mvctx = 0; 1678 b->slvec0 = 0; 1679 b->slvec0b = 0; 1680 b->slvec1 = 0; 1681 b->slvec1a = 0; 1682 b->slvec1b = 0; 1683 b->sMvctx = 0; 1684 1685 /* stuff for MatGetRow() */ 1686 b->rowindices = 0; 1687 b->rowvalues = 0; 1688 b->getrowactive = PETSC_FALSE; 1689 1690 /* hash table stuff */ 1691 b->ht = 0; 1692 b->hd = 0; 1693 b->ht_size = 0; 1694 b->ht_flag = PETSC_FALSE; 1695 b->ht_fact = 0; 1696 b->ht_total_ct = 0; 1697 b->ht_insert_ct = 0; 1698 1699 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1700 if (flg) { 1701 PetscReal fact = 1.39; 1702 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1703 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1704 if (fact <= 1.0) fact = 1.39; 1705 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1706 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1707 } 1708 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1709 "MatStoreValues_MPISBAIJ", 1710 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1711 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1712 "MatRetrieveValues_MPISBAIJ", 1713 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1714 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1715 "MatGetDiagonalBlock_MPISBAIJ", 1716 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 EXTERN_C_END 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1723 /*@C 1724 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1725 the user should preallocate the matrix storage by setting the parameters 1726 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1727 performance can be increased by more than a factor of 50. 1728 1729 Collective on Mat 1730 1731 Input Parameters: 1732 + A - the matrix 1733 . bs - size of blockk 1734 . d_nz - number of block nonzeros per block row in diagonal portion of local 1735 submatrix (same for all local rows) 1736 . d_nnz - array containing the number of block nonzeros in the various block rows 1737 in the upper triangular and diagonal part of the in diagonal portion of the local 1738 (possibly different for each block row) or PETSC_NULL. You must leave room 1739 for the diagonal entry even if it is zero. 1740 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1741 submatrix (same for all local rows). 1742 - o_nnz - array containing the number of nonzeros in the various block rows of the 1743 off-diagonal portion of the local submatrix (possibly different for 1744 each block row) or PETSC_NULL. 1745 1746 1747 Options Database Keys: 1748 . -mat_no_unroll - uses code that does not unroll the loops in the 1749 block calculations (much slower) 1750 . -mat_block_size - size of the blocks to use 1751 1752 Notes: 1753 1754 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1755 than it must be used on all processors that share the object for that argument. 1756 1757 Storage Information: 1758 For a square global matrix we define each processor's diagonal portion 1759 to be its local rows and the corresponding columns (a square submatrix); 1760 each processor's off-diagonal portion encompasses the remainder of the 1761 local matrix (a rectangular submatrix). 1762 1763 The user can specify preallocated storage for the diagonal part of 1764 the local submatrix with either d_nz or d_nnz (not both). Set 1765 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1766 memory allocation. Likewise, specify preallocated storage for the 1767 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1768 1769 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1770 the figure below we depict these three local rows and all columns (0-11). 1771 1772 .vb 1773 0 1 2 3 4 5 6 7 8 9 10 11 1774 ------------------- 1775 row 3 | o o o d d d o o o o o o 1776 row 4 | o o o d d d o o o o o o 1777 row 5 | o o o d d d o o o o o o 1778 ------------------- 1779 .ve 1780 1781 Thus, any entries in the d locations are stored in the d (diagonal) 1782 submatrix, and any entries in the o locations are stored in the 1783 o (off-diagonal) submatrix. Note that the d matrix is stored in 1784 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1785 1786 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1787 plus the diagonal part of the d matrix, 1788 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1789 In general, for PDE problems in which most nonzeros are near the diagonal, 1790 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1791 or you will get TERRIBLE performance; see the users' manual chapter on 1792 matrices. 1793 1794 Level: intermediate 1795 1796 .keywords: matrix, block, aij, compressed row, sparse, parallel 1797 1798 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1799 @*/ 1800 1801 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1802 { 1803 Mat_MPISBAIJ *b; 1804 int ierr,i,mbs,Mbs; 1805 PetscTruth flg2; 1806 1807 PetscFunctionBegin; 1808 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr); 1809 if (!flg2) PetscFunctionReturn(0); 1810 1811 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1812 1813 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1814 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1815 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1816 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1817 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1818 if (d_nnz) { 1819 for (i=0; i<B->m/bs; i++) { 1820 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1821 } 1822 } 1823 if (o_nnz) { 1824 for (i=0; i<B->m/bs; i++) { 1825 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1826 } 1827 } 1828 B->preallocated = PETSC_TRUE; 1829 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1830 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1831 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1832 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1833 1834 b = (Mat_MPISBAIJ*)B->data; 1835 mbs = B->m/bs; 1836 Mbs = B->M/bs; 1837 if (mbs*bs != B->m) { 1838 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1839 } 1840 1841 b->bs = bs; 1842 b->bs2 = bs*bs; 1843 b->mbs = mbs; 1844 b->nbs = mbs; 1845 b->Mbs = Mbs; 1846 b->Nbs = Mbs; 1847 1848 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1849 b->rowners[0] = 0; 1850 for (i=2; i<=b->size; i++) { 1851 b->rowners[i] += b->rowners[i-1]; 1852 } 1853 b->rstart = b->rowners[b->rank]; 1854 b->rend = b->rowners[b->rank+1]; 1855 b->cstart = b->rstart; 1856 b->cend = b->rend; 1857 for (i=0; i<=b->size; i++) { 1858 b->rowners_bs[i] = b->rowners[i]*bs; 1859 } 1860 b->rstart_bs = b-> rstart*bs; 1861 b->rend_bs = b->rend*bs; 1862 1863 b->cstart_bs = b->cstart*bs; 1864 b->cend_bs = b->cend*bs; 1865 1866 1867 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1868 PetscLogObjectParent(B,b->A); 1869 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1870 PetscLogObjectParent(B,b->B); 1871 1872 /* build cache for off array entries formed */ 1873 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1874 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatCreateMPISBAIJ" 1880 /*@C 1881 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1882 (block compressed row). For good matrix assembly performance 1883 the user should preallocate the matrix storage by setting the parameters 1884 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1885 performance can be increased by more than a factor of 50. 1886 1887 Collective on MPI_Comm 1888 1889 Input Parameters: 1890 + comm - MPI communicator 1891 . bs - size of blockk 1892 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1893 This value should be the same as the local size used in creating the 1894 y vector for the matrix-vector product y = Ax. 1895 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1896 This value should be the same as the local size used in creating the 1897 x vector for the matrix-vector product y = Ax. 1898 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1899 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1900 . d_nz - number of block nonzeros per block row in diagonal portion of local 1901 submatrix (same for all local rows) 1902 . d_nnz - array containing the number of block nonzeros in the various block rows 1903 in the upper triangular portion of the in diagonal portion of the local 1904 (possibly different for each block block row) or PETSC_NULL. 1905 You must leave room for the diagonal entry even if it is zero. 1906 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1907 submatrix (same for all local rows). 1908 - o_nnz - array containing the number of nonzeros in the various block rows of the 1909 off-diagonal portion of the local submatrix (possibly different for 1910 each block row) or PETSC_NULL. 1911 1912 Output Parameter: 1913 . A - the matrix 1914 1915 Options Database Keys: 1916 . -mat_no_unroll - uses code that does not unroll the loops in the 1917 block calculations (much slower) 1918 . -mat_block_size - size of the blocks to use 1919 . -mat_mpi - use the parallel matrix data structures even on one processor 1920 (defaults to using SeqBAIJ format on one processor) 1921 1922 Notes: 1923 The user MUST specify either the local or global matrix dimensions 1924 (possibly both). 1925 1926 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1927 than it must be used on all processors that share the object for that argument. 1928 1929 Storage Information: 1930 For a square global matrix we define each processor's diagonal portion 1931 to be its local rows and the corresponding columns (a square submatrix); 1932 each processor's off-diagonal portion encompasses the remainder of the 1933 local matrix (a rectangular submatrix). 1934 1935 The user can specify preallocated storage for the diagonal part of 1936 the local submatrix with either d_nz or d_nnz (not both). Set 1937 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1938 memory allocation. Likewise, specify preallocated storage for the 1939 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1940 1941 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1942 the figure below we depict these three local rows and all columns (0-11). 1943 1944 .vb 1945 0 1 2 3 4 5 6 7 8 9 10 11 1946 ------------------- 1947 row 3 | o o o d d d o o o o o o 1948 row 4 | o o o d d d o o o o o o 1949 row 5 | o o o d d d o o o o o o 1950 ------------------- 1951 .ve 1952 1953 Thus, any entries in the d locations are stored in the d (diagonal) 1954 submatrix, and any entries in the o locations are stored in the 1955 o (off-diagonal) submatrix. Note that the d matrix is stored in 1956 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1957 1958 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1959 plus the diagonal part of the d matrix, 1960 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1961 In general, for PDE problems in which most nonzeros are near the diagonal, 1962 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1963 or you will get TERRIBLE performance; see the users' manual chapter on 1964 matrices. 1965 1966 Level: intermediate 1967 1968 .keywords: matrix, block, aij, compressed row, sparse, parallel 1969 1970 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1971 @*/ 1972 1973 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1974 { 1975 int ierr,size; 1976 1977 PetscFunctionBegin; 1978 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1979 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1980 if (size > 1) { 1981 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1982 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1983 } else { 1984 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1985 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1986 } 1987 PetscFunctionReturn(0); 1988 } 1989 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1993 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1994 { 1995 Mat mat; 1996 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1997 int ierr,len=0; 1998 1999 PetscFunctionBegin; 2000 *newmat = 0; 2001 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2002 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 2003 mat->preallocated = PETSC_TRUE; 2004 a = (Mat_MPISBAIJ*)mat->data; 2005 a->bs = oldmat->bs; 2006 a->bs2 = oldmat->bs2; 2007 a->mbs = oldmat->mbs; 2008 a->nbs = oldmat->nbs; 2009 a->Mbs = oldmat->Mbs; 2010 a->Nbs = oldmat->Nbs; 2011 2012 a->rstart = oldmat->rstart; 2013 a->rend = oldmat->rend; 2014 a->cstart = oldmat->cstart; 2015 a->cend = oldmat->cend; 2016 a->size = oldmat->size; 2017 a->rank = oldmat->rank; 2018 a->donotstash = oldmat->donotstash; 2019 a->roworiented = oldmat->roworiented; 2020 a->rowindices = 0; 2021 a->rowvalues = 0; 2022 a->getrowactive = PETSC_FALSE; 2023 a->barray = 0; 2024 a->rstart_bs = oldmat->rstart_bs; 2025 a->rend_bs = oldmat->rend_bs; 2026 a->cstart_bs = oldmat->cstart_bs; 2027 a->cend_bs = oldmat->cend_bs; 2028 2029 /* hash table stuff */ 2030 a->ht = 0; 2031 a->hd = 0; 2032 a->ht_size = 0; 2033 a->ht_flag = oldmat->ht_flag; 2034 a->ht_fact = oldmat->ht_fact; 2035 a->ht_total_ct = 0; 2036 a->ht_insert_ct = 0; 2037 2038 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 2039 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2040 a->cowners = a->rowners + a->size + 2; 2041 a->rowners_bs = a->cowners + a->size + 2; 2042 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2043 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2044 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2045 if (oldmat->colmap) { 2046 #if defined (PETSC_USE_CTABLE) 2047 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2048 #else 2049 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2050 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2051 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2052 #endif 2053 } else a->colmap = 0; 2054 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2055 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2056 PetscLogObjectMemory(mat,len*sizeof(int)); 2057 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2058 } else a->garray = 0; 2059 2060 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2061 PetscLogObjectParent(mat,a->lvec); 2062 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2063 2064 PetscLogObjectParent(mat,a->Mvctx); 2065 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2066 PetscLogObjectParent(mat,a->A); 2067 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2068 PetscLogObjectParent(mat,a->B); 2069 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2070 *newmat = mat; 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #include "petscsys.h" 2075 2076 EXTERN_C_BEGIN 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatLoad_MPISBAIJ" 2079 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2080 { 2081 Mat A; 2082 int i,nz,ierr,j,rstart,rend,fd; 2083 PetscScalar *vals,*buf; 2084 MPI_Comm comm = ((PetscObject)viewer)->comm; 2085 MPI_Status status; 2086 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2087 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2088 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2089 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2090 int dcount,kmax,k,nzcount,tmp; 2091 2092 PetscFunctionBegin; 2093 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2094 2095 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2096 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2097 if (!rank) { 2098 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2099 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2100 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2101 if (header[3] < 0) { 2102 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2103 } 2104 } 2105 2106 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2107 M = header[1]; N = header[2]; 2108 2109 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2110 2111 /* 2112 This code adds extra rows to make sure the number of rows is 2113 divisible by the blocksize 2114 */ 2115 Mbs = M/bs; 2116 extra_rows = bs - M + bs*(Mbs); 2117 if (extra_rows == bs) extra_rows = 0; 2118 else Mbs++; 2119 if (extra_rows &&!rank) { 2120 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2121 } 2122 2123 /* determine ownership of all rows */ 2124 mbs = Mbs/size + ((Mbs % size) > rank); 2125 m = mbs*bs; 2126 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2127 browners = rowners + size + 1; 2128 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2129 rowners[0] = 0; 2130 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2131 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2132 rstart = rowners[rank]; 2133 rend = rowners[rank+1]; 2134 2135 /* distribute row lengths to all processors */ 2136 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2137 if (!rank) { 2138 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2139 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2140 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2141 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2142 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2143 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2144 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2145 } else { 2146 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2147 } 2148 2149 if (!rank) { /* procs[0] */ 2150 /* calculate the number of nonzeros on each processor */ 2151 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2152 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2153 for (i=0; i<size; i++) { 2154 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2155 procsnz[i] += rowlengths[j]; 2156 } 2157 } 2158 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2159 2160 /* determine max buffer needed and allocate it */ 2161 maxnz = 0; 2162 for (i=0; i<size; i++) { 2163 maxnz = PetscMax(maxnz,procsnz[i]); 2164 } 2165 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2166 2167 /* read in my part of the matrix column indices */ 2168 nz = procsnz[0]; 2169 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2170 mycols = ibuf; 2171 if (size == 1) nz -= extra_rows; 2172 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2173 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2174 2175 /* read in every ones (except the last) and ship off */ 2176 for (i=1; i<size-1; i++) { 2177 nz = procsnz[i]; 2178 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2179 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2180 } 2181 /* read in the stuff for the last proc */ 2182 if (size != 1) { 2183 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2184 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2185 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2186 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2187 } 2188 ierr = PetscFree(cols);CHKERRQ(ierr); 2189 } else { /* procs[i], i>0 */ 2190 /* determine buffer space needed for message */ 2191 nz = 0; 2192 for (i=0; i<m; i++) { 2193 nz += locrowlens[i]; 2194 } 2195 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2196 mycols = ibuf; 2197 /* receive message of column indices*/ 2198 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2199 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2200 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2201 } 2202 2203 /* loop over local rows, determining number of off diagonal entries */ 2204 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2205 odlens = dlens + (rend-rstart); 2206 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2207 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2208 masked1 = mask + Mbs; 2209 masked2 = masked1 + Mbs; 2210 rowcount = 0; nzcount = 0; 2211 for (i=0; i<mbs; i++) { 2212 dcount = 0; 2213 odcount = 0; 2214 for (j=0; j<bs; j++) { 2215 kmax = locrowlens[rowcount]; 2216 for (k=0; k<kmax; k++) { 2217 tmp = mycols[nzcount++]/bs; /* block col. index */ 2218 if (!mask[tmp]) { 2219 mask[tmp] = 1; 2220 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2221 else masked1[dcount++] = tmp; /* entry in diag portion */ 2222 } 2223 } 2224 rowcount++; 2225 } 2226 2227 dlens[i] = dcount; /* d_nzz[i] */ 2228 odlens[i] = odcount; /* o_nzz[i] */ 2229 2230 /* zero out the mask elements we set */ 2231 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2232 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2233 } 2234 2235 /* create our matrix */ 2236 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2237 CHKERRQ(ierr); 2238 A = *newmat; 2239 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2240 2241 if (!rank) { 2242 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2243 /* read in my part of the matrix numerical values */ 2244 nz = procsnz[0]; 2245 vals = buf; 2246 mycols = ibuf; 2247 if (size == 1) nz -= extra_rows; 2248 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2249 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2250 2251 /* insert into matrix */ 2252 jj = rstart*bs; 2253 for (i=0; i<m; i++) { 2254 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2255 mycols += locrowlens[i]; 2256 vals += locrowlens[i]; 2257 jj++; 2258 } 2259 2260 /* read in other processors (except the last one) and ship out */ 2261 for (i=1; i<size-1; i++) { 2262 nz = procsnz[i]; 2263 vals = buf; 2264 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2265 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2266 } 2267 /* the last proc */ 2268 if (size != 1){ 2269 nz = procsnz[i] - extra_rows; 2270 vals = buf; 2271 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2272 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2273 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2274 } 2275 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2276 2277 } else { 2278 /* receive numeric values */ 2279 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2280 2281 /* receive message of values*/ 2282 vals = buf; 2283 mycols = ibuf; 2284 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2285 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2286 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2287 2288 /* insert into matrix */ 2289 jj = rstart*bs; 2290 for (i=0; i<m; i++) { 2291 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2292 mycols += locrowlens[i]; 2293 vals += locrowlens[i]; 2294 jj++; 2295 } 2296 } 2297 2298 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2299 ierr = PetscFree(buf);CHKERRQ(ierr); 2300 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2301 ierr = PetscFree(rowners);CHKERRQ(ierr); 2302 ierr = PetscFree(dlens);CHKERRQ(ierr); 2303 ierr = PetscFree(mask);CHKERRQ(ierr); 2304 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2305 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2306 PetscFunctionReturn(0); 2307 } 2308 EXTERN_C_END 2309 2310 #undef __FUNCT__ 2311 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2312 /*@ 2313 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2314 2315 Input Parameters: 2316 . mat - the matrix 2317 . fact - factor 2318 2319 Collective on Mat 2320 2321 Level: advanced 2322 2323 Notes: 2324 This can also be set by the command line option: -mat_use_hash_table fact 2325 2326 .keywords: matrix, hashtable, factor, HT 2327 2328 .seealso: MatSetOption() 2329 @*/ 2330 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2331 { 2332 PetscFunctionBegin; 2333 SETERRQ(1,"Function not yet written for SBAIJ format"); 2334 /* PetscFunctionReturn(0); */ 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2339 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2340 { 2341 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2342 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2343 PetscReal atmp; 2344 PetscReal *work,*svalues,*rvalues; 2345 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2346 int rank,size,*rowners_bs,dest,count,source; 2347 PetscScalar *va; 2348 MatScalar *ba; 2349 MPI_Status stat; 2350 2351 PetscFunctionBegin; 2352 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2353 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2354 2355 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 2356 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 2357 2358 bs = a->bs; 2359 mbs = a->mbs; 2360 Mbs = a->Mbs; 2361 ba = b->a; 2362 bi = b->i; 2363 bj = b->j; 2364 /* 2365 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2366 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2367 */ 2368 2369 /* find ownerships */ 2370 rowners_bs = a->rowners_bs; 2371 /* 2372 if (!rank){ 2373 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2374 } 2375 */ 2376 2377 /* each proc creates an array to be distributed */ 2378 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2379 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2380 2381 /* row_max for B */ 2382 if (rank != size-1){ 2383 for (i=0; i<mbs; i++) { 2384 ncols = bi[1] - bi[0]; bi++; 2385 brow = bs*i; 2386 for (j=0; j<ncols; j++){ 2387 bcol = bs*(*bj); 2388 for (kcol=0; kcol<bs; kcol++){ 2389 col = bcol + kcol; /* local col index */ 2390 col += rowners_bs[rank+1]; /* global col index */ 2391 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2392 for (krow=0; krow<bs; krow++){ 2393 atmp = PetscAbsScalar(*ba); ba++; 2394 row = brow + krow; /* local row index */ 2395 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2396 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2397 if (work[col] < atmp) work[col] = atmp; 2398 } 2399 } 2400 bj++; 2401 } 2402 } 2403 /* 2404 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2405 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2406 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2407 */ 2408 2409 /* send values to its owners */ 2410 for (dest=rank+1; dest<size; dest++){ 2411 svalues = work + rowners_bs[dest]; 2412 count = rowners_bs[dest+1]-rowners_bs[dest]; 2413 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PETSC_COMM_WORLD);CHKERRQ(ierr); 2414 /* 2415 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2416 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2417 */ 2418 } 2419 } 2420 2421 /* receive values */ 2422 if (rank){ 2423 rvalues = work; 2424 count = rowners_bs[rank+1]-rowners_bs[rank]; 2425 for (source=0; source<rank; source++){ 2426 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);CHKERRQ(ierr); 2427 /* process values */ 2428 for (i=0; i<count; i++){ 2429 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2430 } 2431 /* 2432 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2433 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2434 */ 2435 } 2436 } 2437 2438 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2439 ierr = PetscFree(work);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatRelax_MPISBAIJ" 2445 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2446 { 2447 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2448 int ierr,mbs=mat->mbs,bs=mat->bs; 2449 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2450 Vec bb1; 2451 2452 PetscFunctionBegin; 2453 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2454 if (bs > 1) 2455 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2456 2457 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2458 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2459 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2460 its--; 2461 } 2462 2463 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2464 while (its--){ 2465 2466 /* lower triangular part: slvec0b = - B^T*xx */ 2467 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2468 2469 /* copy xx into slvec0a */ 2470 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2471 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2472 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2473 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2474 2475 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2476 2477 /* copy bb into slvec1a */ 2478 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2479 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2480 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2481 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2482 2483 /* set slvec1b = 0 */ 2484 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2485 2486 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2487 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2488 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2489 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2490 2491 /* upper triangular part: bb1 = bb1 - B*x */ 2492 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2493 2494 /* local diagonal sweep */ 2495 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2496 } 2497 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2498 } else { 2499 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2500 } 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2506 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2507 { 2508 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2509 int ierr; 2510 PetscScalar mone=-1.0; 2511 Vec lvec1,bb1; 2512 2513 PetscFunctionBegin; 2514 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2515 if (mat->bs > 1) 2516 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2517 2518 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2519 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2520 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2521 its--; 2522 } 2523 2524 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2525 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2526 while (its--){ 2527 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2528 2529 /* lower diagonal part: bb1 = bb - B^T*xx */ 2530 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2531 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2532 2533 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2534 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2535 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2536 2537 /* upper diagonal part: bb1 = bb1 - B*x */ 2538 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2539 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2540 2541 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2542 2543 /* diagonal sweep */ 2544 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2545 } 2546 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2547 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2548 } else { 2549 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2550 } 2551 PetscFunctionReturn(0); 2552 } 2553 2554