1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 12 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 13 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *); 14 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode); 15 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 18 extern int MatPrintHelp_SeqSBAIJ(Mat); 19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 int MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 int ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 int MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 int ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 /* 76 Local utility routine that creates a mapping from the global column 77 number to the local number in the off-diagonal part of the local 78 storage of the matrix. This is done in a non scable way since the 79 length of colmap equals the global matrix length. 80 */ 81 #undef __FUNCT__ 82 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private" 83 static int CreateColmap_MPISBAIJ_Private(Mat mat) 84 { 85 PetscFunctionBegin; 86 SETERRQ(1,"Function not yet written for SBAIJ format"); 87 /* PetscFunctionReturn(0); */ 88 } 89 90 #define CHUNKSIZE 10 91 92 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 93 { \ 94 \ 95 brow = row/bs; \ 96 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 97 rmax = aimax[brow]; nrow = ailen[brow]; \ 98 bcol = col/bs; \ 99 ridx = row % bs; cidx = col % bs; \ 100 low = 0; high = nrow; \ 101 while (high-low > 3) { \ 102 t = (low+high)/2; \ 103 if (rp[t] > bcol) high = t; \ 104 else low = t; \ 105 } \ 106 for (_i=low; _i<high; _i++) { \ 107 if (rp[_i] > bcol) break; \ 108 if (rp[_i] == bcol) { \ 109 bap = ap + bs2*_i + bs*cidx + ridx; \ 110 if (addv == ADD_VALUES) *bap += value; \ 111 else *bap = value; \ 112 goto a_noinsert; \ 113 } \ 114 } \ 115 if (a->nonew == 1) goto a_noinsert; \ 116 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 117 if (nrow >= rmax) { \ 118 /* there is no extra room in row, therefore enlarge */ \ 119 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 120 MatScalar *new_a; \ 121 \ 122 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 123 \ 124 /* malloc new storage space */ \ 125 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 126 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 127 new_j = (int*)(new_a + bs2*new_nz); \ 128 new_i = new_j + new_nz; \ 129 \ 130 /* copy over old data into new slots */ \ 131 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 132 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 133 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 134 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 135 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 136 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 137 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 138 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 139 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 140 /* free up old matrix storage */ \ 141 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 142 if (!a->singlemalloc) { \ 143 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 144 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 145 } \ 146 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 147 a->singlemalloc = PETSC_TRUE; \ 148 \ 149 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 150 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 151 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 152 a->s_maxnz += bs2*CHUNKSIZE; \ 153 a->reallocs++; \ 154 a->s_nz++; \ 155 } \ 156 N = nrow++ - 1; \ 157 /* shift up all the later entries in this row */ \ 158 for (ii=N; ii>=_i; ii--) { \ 159 rp[ii+1] = rp[ii]; \ 160 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 161 } \ 162 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 163 rp[_i] = bcol; \ 164 ap[bs2*_i + bs*cidx + ridx] = value; \ 165 a_noinsert:; \ 166 ailen[brow] = nrow; \ 167 } 168 #ifndef MatSetValues_SeqBAIJ_B_Private 169 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 170 { \ 171 brow = row/bs; \ 172 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 173 rmax = bimax[brow]; nrow = bilen[brow]; \ 174 bcol = col/bs; \ 175 ridx = row % bs; cidx = col % bs; \ 176 low = 0; high = nrow; \ 177 while (high-low > 3) { \ 178 t = (low+high)/2; \ 179 if (rp[t] > bcol) high = t; \ 180 else low = t; \ 181 } \ 182 for (_i=low; _i<high; _i++) { \ 183 if (rp[_i] > bcol) break; \ 184 if (rp[_i] == bcol) { \ 185 bap = ap + bs2*_i + bs*cidx + ridx; \ 186 if (addv == ADD_VALUES) *bap += value; \ 187 else *bap = value; \ 188 goto b_noinsert; \ 189 } \ 190 } \ 191 if (b->nonew == 1) goto b_noinsert; \ 192 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 193 if (nrow >= rmax) { \ 194 /* there is no extra room in row, therefore enlarge */ \ 195 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 196 MatScalar *new_a; \ 197 \ 198 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 199 \ 200 /* malloc new storage space */ \ 201 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 202 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 203 new_j = (int*)(new_a + bs2*new_nz); \ 204 new_i = new_j + new_nz; \ 205 \ 206 /* copy over old data into new slots */ \ 207 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 208 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 209 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 210 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 211 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 212 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 213 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 214 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 215 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 216 /* free up old matrix storage */ \ 217 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 218 if (!b->singlemalloc) { \ 219 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 220 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 221 } \ 222 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 223 b->singlemalloc = PETSC_TRUE; \ 224 \ 225 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 226 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 227 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 228 b->maxnz += bs2*CHUNKSIZE; \ 229 b->reallocs++; \ 230 b->nz++; \ 231 } \ 232 N = nrow++ - 1; \ 233 /* shift up all the later entries in this row */ \ 234 for (ii=N; ii>=_i; ii--) { \ 235 rp[ii+1] = rp[ii]; \ 236 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 237 } \ 238 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 239 rp[_i] = bcol; \ 240 ap[bs2*_i + bs*cidx + ridx] = value; \ 241 b_noinsert:; \ 242 bilen[brow] = nrow; \ 243 } 244 #endif 245 246 #if defined(PETSC_USE_MAT_SINGLE) 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatSetValues_MPISBAIJ" 249 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 250 { 251 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 252 int ierr,i,N = m*n; 253 MatScalar *vsingle; 254 255 PetscFunctionBegin; 256 if (N > b->setvalueslen) { 257 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 258 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 259 b->setvalueslen = N; 260 } 261 vsingle = b->setvaluescopy; 262 263 for (i=0; i<N; i++) { 264 vsingle[i] = v[i]; 265 } 266 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 272 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 273 { 274 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 275 int ierr,i,N = m*n*b->bs2; 276 MatScalar *vsingle; 277 278 PetscFunctionBegin; 279 if (N > b->setvalueslen) { 280 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 281 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 282 b->setvalueslen = N; 283 } 284 vsingle = b->setvaluescopy; 285 for (i=0; i<N; i++) { 286 vsingle[i] = v[i]; 287 } 288 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 294 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 295 { 296 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 297 int ierr,i,N = m*n; 298 MatScalar *vsingle; 299 300 PetscFunctionBegin; 301 SETERRQ(1,"Function not yet written for SBAIJ format"); 302 /* PetscFunctionReturn(0); */ 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 307 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 308 { 309 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 310 int ierr,i,N = m*n*b->bs2; 311 MatScalar *vsingle; 312 313 PetscFunctionBegin; 314 SETERRQ(1,"Function not yet written for SBAIJ format"); 315 /* PetscFunctionReturn(0); */ 316 } 317 #endif 318 319 /* Only add/insert a(i,j) with i<=j (blocks). 320 Any a(i,j) with i>j input by user is ingored. 321 */ 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatSetValues_MPIBAIJ" 324 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 325 { 326 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 327 MatScalar value; 328 PetscTruth roworiented = baij->roworiented; 329 int ierr,i,j,row,col; 330 int rstart_orig=baij->rstart_bs; 331 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 332 int cend_orig=baij->cend_bs,bs=baij->bs; 333 334 /* Some Variables required in the macro */ 335 Mat A = baij->A; 336 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 337 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 338 MatScalar *aa=a->a; 339 340 Mat B = baij->B; 341 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 342 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 343 MatScalar *ba=b->a; 344 345 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 346 int low,high,t,ridx,cidx,bs2=a->bs2; 347 MatScalar *ap,*bap; 348 349 /* for stash */ 350 int n_loc, *in_loc=0; 351 MatScalar *v_loc=0; 352 353 PetscFunctionBegin; 354 355 if(!baij->donotstash){ 356 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 357 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 358 } 359 360 for (i=0; i<m; i++) { 361 if (im[i] < 0) continue; 362 #if defined(PETSC_USE_BOPT_g) 363 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 364 #endif 365 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 366 row = im[i] - rstart_orig; /* local row index */ 367 for (j=0; j<n; j++) { 368 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 369 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 370 col = in[j] - cstart_orig; /* local col index */ 371 brow = row/bs; bcol = col/bs; 372 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 373 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 374 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 375 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 376 } else if (in[j] < 0) continue; 377 #if defined(PETSC_USE_BOPT_g) 378 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 379 #endif 380 else { /* off-diag entry (B) */ 381 if (mat->was_assembled) { 382 if (!baij->colmap) { 383 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 384 } 385 #if defined (PETSC_USE_CTABLE) 386 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 387 col = col - 1; 388 #else 389 col = baij->colmap[in[j]/bs] - 1; 390 #endif 391 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 392 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 393 col = in[j]; 394 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 395 B = baij->B; 396 b = (Mat_SeqBAIJ*)(B)->data; 397 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 398 ba=b->a; 399 } else col += in[j]%bs; 400 } else col = in[j]; 401 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 402 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 403 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 404 } 405 } 406 } else { /* off processor entry */ 407 if (!baij->donotstash) { 408 n_loc = 0; 409 for (j=0; j<n; j++){ 410 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 411 in_loc[n_loc] = in[j]; 412 if (roworiented) { 413 v_loc[n_loc] = v[i*n+j]; 414 } else { 415 v_loc[n_loc] = v[j*m+i]; 416 } 417 n_loc++; 418 } 419 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 420 } 421 } 422 } 423 424 if(!baij->donotstash){ 425 ierr = PetscFree(in_loc);CHKERRQ(ierr); 426 ierr = PetscFree(v_loc);CHKERRQ(ierr); 427 } 428 PetscFunctionReturn(0); 429 } 430 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 433 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 434 { 435 PetscFunctionBegin; 436 SETERRQ(1,"Function not yet written for SBAIJ format"); 437 /* PetscFunctionReturn(0); */ 438 } 439 440 #define HASH_KEY 0.6180339887 441 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 442 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 443 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 446 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 447 { 448 PetscFunctionBegin; 449 SETERRQ(1,"Function not yet written for SBAIJ format"); 450 /* PetscFunctionReturn(0); */ 451 } 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 455 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 456 { 457 PetscFunctionBegin; 458 SETERRQ(1,"Function not yet written for SBAIJ format"); 459 /* PetscFunctionReturn(0); */ 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatGetValues_MPISBAIJ" 464 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 465 { 466 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 467 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 468 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 469 470 PetscFunctionBegin; 471 for (i=0; i<m; i++) { 472 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 473 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 474 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 475 row = idxm[i] - bsrstart; 476 for (j=0; j<n; j++) { 477 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 478 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 479 if (idxn[j] >= bscstart && idxn[j] < bscend){ 480 col = idxn[j] - bscstart; 481 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 482 } else { 483 if (!baij->colmap) { 484 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 485 } 486 #if defined (PETSC_USE_CTABLE) 487 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 488 data --; 489 #else 490 data = baij->colmap[idxn[j]/bs]-1; 491 #endif 492 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 493 else { 494 col = data + idxn[j]%bs; 495 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 496 } 497 } 498 } 499 } else { 500 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 501 } 502 } 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatNorm_MPISBAIJ" 508 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 509 { 510 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 511 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 512 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 513 int ierr; 514 PetscReal sum[2],*lnorm2; 515 516 PetscFunctionBegin; 517 if (baij->size == 1) { 518 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 519 } else { 520 if (type == NORM_FROBENIUS) { 521 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 522 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 523 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 524 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 525 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 526 /* 527 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 528 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 529 */ 530 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 531 /* 532 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 533 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 534 535 *norm = sqrt(sum[0] + 2*sum[1]); 536 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 537 } else { 538 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 539 } 540 } 541 PetscFunctionReturn(0); 542 } 543 544 /* 545 Creates the hash table, and sets the table 546 This table is created only once. 547 If new entried need to be added to the matrix 548 then the hash table has to be destroyed and 549 recreated. 550 */ 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 553 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 554 { 555 PetscFunctionBegin; 556 SETERRQ(1,"Function not yet written for SBAIJ format"); 557 /* PetscFunctionReturn(0); */ 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 562 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 563 { 564 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 565 int ierr,nstash,reallocs; 566 InsertMode addv; 567 568 PetscFunctionBegin; 569 if (baij->donotstash) { 570 PetscFunctionReturn(0); 571 } 572 573 /* make sure all processors are either in INSERTMODE or ADDMODE */ 574 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 575 if (addv == (ADD_VALUES|INSERT_VALUES)) { 576 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 577 } 578 mat->insertmode = addv; /* in case this processor had no cache */ 579 580 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 581 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 582 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 583 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 584 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 585 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 591 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 592 { 593 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 594 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 595 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 596 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 597 int *row,*col,other_disassembled; 598 PetscTruth r1,r2,r3; 599 MatScalar *val; 600 InsertMode addv = mat->insertmode; 601 602 PetscFunctionBegin; 603 604 if (!baij->donotstash) { 605 while (1) { 606 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 607 /* 608 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 609 PetscSynchronizedFlush(PETSC_COMM_WORLD); 610 */ 611 if (!flg) break; 612 613 for (i=0; i<n;) { 614 /* Now identify the consecutive vals belonging to the same row */ 615 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 616 if (j < n) ncols = j-i; 617 else ncols = n-i; 618 /* Now assemble all these values with a single function call */ 619 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 620 i = j; 621 } 622 } 623 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 624 /* Now process the block-stash. Since the values are stashed column-oriented, 625 set the roworiented flag to column oriented, and after MatSetValues() 626 restore the original flags */ 627 r1 = baij->roworiented; 628 r2 = a->roworiented; 629 r3 = b->roworiented; 630 baij->roworiented = PETSC_FALSE; 631 a->roworiented = PETSC_FALSE; 632 b->roworiented = PETSC_FALSE; 633 while (1) { 634 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 635 if (!flg) break; 636 637 for (i=0; i<n;) { 638 /* Now identify the consecutive vals belonging to the same row */ 639 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 640 if (j < n) ncols = j-i; 641 else ncols = n-i; 642 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 643 i = j; 644 } 645 } 646 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 647 baij->roworiented = r1; 648 a->roworiented = r2; 649 b->roworiented = r3; 650 } 651 652 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 653 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 654 655 /* determine if any processor has disassembled, if so we must 656 also disassemble ourselfs, in order that we may reassemble. */ 657 /* 658 if nonzero structure of submatrix B cannot change then we know that 659 no processor disassembled thus we can skip this stuff 660 */ 661 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 662 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 663 if (mat->was_assembled && !other_disassembled) { 664 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 665 } 666 } 667 668 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 669 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 670 } 671 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 672 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 673 674 #if defined(PETSC_USE_BOPT_g) 675 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 676 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 677 baij->ht_total_ct = 0; 678 baij->ht_insert_ct = 0; 679 } 680 #endif 681 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 682 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 683 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 684 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 685 } 686 687 if (baij->rowvalues) { 688 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 689 baij->rowvalues = 0; 690 } 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 696 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 697 { 698 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 699 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 700 PetscTruth isascii,isdraw; 701 PetscViewer sviewer; 702 PetscViewerFormat format; 703 704 PetscFunctionBegin; 705 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 706 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 707 if (isascii) { 708 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 709 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 710 MatInfo info; 711 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 712 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 713 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 714 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 715 baij->bs,(int)info.memory);CHKERRQ(ierr); 716 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 717 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 718 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 719 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 720 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 721 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } else if (format == PETSC_VIEWER_ASCII_INFO) { 724 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 } 728 729 if (isdraw) { 730 PetscDraw draw; 731 PetscTruth isnull; 732 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 733 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 734 } 735 736 if (size == 1) { 737 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 738 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 739 } else { 740 /* assemble the entire matrix onto first processor. */ 741 Mat A; 742 Mat_SeqSBAIJ *Aloc; 743 Mat_SeqBAIJ *Bloc; 744 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 745 MatScalar *a; 746 747 if (!rank) { 748 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 749 } else { 750 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 751 } 752 PetscLogObjectParent(mat,A); 753 754 /* copy over the A part */ 755 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 756 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 757 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 758 759 for (i=0; i<mbs; i++) { 760 rvals[0] = bs*(baij->rstart + i); 761 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 762 for (j=ai[i]; j<ai[i+1]; j++) { 763 col = (baij->cstart+aj[j])*bs; 764 for (k=0; k<bs; k++) { 765 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 766 col++; a += bs; 767 } 768 } 769 } 770 /* copy over the B part */ 771 Bloc = (Mat_SeqBAIJ*)baij->B->data; 772 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 773 for (i=0; i<mbs; i++) { 774 rvals[0] = bs*(baij->rstart + i); 775 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 776 for (j=ai[i]; j<ai[i+1]; j++) { 777 col = baij->garray[aj[j]]*bs; 778 for (k=0; k<bs; k++) { 779 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 780 col++; a += bs; 781 } 782 } 783 } 784 ierr = PetscFree(rvals);CHKERRQ(ierr); 785 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 786 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 787 /* 788 Everyone has to call to draw the matrix since the graphics waits are 789 synchronized across all processors that share the PetscDraw object 790 */ 791 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 792 if (!rank) { 793 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 794 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 795 } 796 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 797 ierr = MatDestroy(A);CHKERRQ(ierr); 798 } 799 PetscFunctionReturn(0); 800 } 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatView_MPISBAIJ" 804 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 805 { 806 int ierr; 807 PetscTruth isascii,isdraw,issocket,isbinary; 808 809 PetscFunctionBegin; 810 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 811 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 812 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 813 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 814 if (isascii || isdraw || issocket || isbinary) { 815 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 816 } else { 817 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 818 } 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatDestroy_MPISBAIJ" 824 int MatDestroy_MPISBAIJ(Mat mat) 825 { 826 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 827 int ierr; 828 829 PetscFunctionBegin; 830 #if defined(PETSC_USE_LOG) 831 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 832 #endif 833 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 834 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 835 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 836 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 837 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 838 #if defined (PETSC_USE_CTABLE) 839 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 840 #else 841 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 842 #endif 843 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 844 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 845 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 846 if (baij->slvec0) { 847 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 848 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 849 } 850 if (baij->slvec1) { 851 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 852 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 853 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 854 } 855 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 856 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 857 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 858 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 859 #if defined(PETSC_USE_MAT_SINGLE) 860 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 861 #endif 862 ierr = PetscFree(baij);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 868 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 869 { 870 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 871 int ierr,nt; 872 873 PetscFunctionBegin; 874 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 875 if (nt != A->n) { 876 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 877 } 878 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 879 if (nt != A->m) { 880 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 881 } 882 883 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 884 /* do diagonal part */ 885 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 886 /* do supperdiagonal part */ 887 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 888 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 889 /* do subdiagonal part */ 890 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 891 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 892 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 893 894 PetscFunctionReturn(0); 895 } 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 899 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 900 { 901 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 902 int ierr; 903 904 PetscFunctionBegin; 905 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 906 /* do diagonal part */ 907 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 908 /* do supperdiagonal part */ 909 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 910 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 911 912 /* do subdiagonal part */ 913 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 914 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 915 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 916 917 PetscFunctionReturn(0); 918 } 919 920 #undef __FUNCT__ 921 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 922 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 923 { 924 PetscFunctionBegin; 925 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 926 /* PetscFunctionReturn(0); */ 927 } 928 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 931 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 932 { 933 PetscFunctionBegin; 934 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 935 /* PetscFunctionReturn(0); */ 936 } 937 938 /* 939 This only works correctly for square matrices where the subblock A->A is the 940 diagonal block 941 */ 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 944 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 945 { 946 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 947 int ierr; 948 949 PetscFunctionBegin; 950 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 951 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 952 PetscFunctionReturn(0); 953 } 954 955 #undef __FUNCT__ 956 #define __FUNCT__ "MatScale_MPISBAIJ" 957 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A) 958 { 959 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 960 int ierr; 961 962 PetscFunctionBegin; 963 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 964 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatGetRow_MPISBAIJ" 970 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 971 { 972 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 973 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 974 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 975 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 976 int *cmap,*idx_p,cstart = mat->cstart; 977 978 PetscFunctionBegin; 979 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 980 mat->getrowactive = PETSC_TRUE; 981 982 if (!mat->rowvalues && (idx || v)) { 983 /* 984 allocate enough space to hold information from the longest row. 985 */ 986 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 987 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 988 int max = 1,mbs = mat->mbs,tmp; 989 for (i=0; i<mbs; i++) { 990 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 991 if (max < tmp) { max = tmp; } 992 } 993 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 994 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 995 } 996 997 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 998 lrow = row - brstart; /* local row index */ 999 1000 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1001 if (!v) {pvA = 0; pvB = 0;} 1002 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1003 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1004 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1005 nztot = nzA + nzB; 1006 1007 cmap = mat->garray; 1008 if (v || idx) { 1009 if (nztot) { 1010 /* Sort by increasing column numbers, assuming A and B already sorted */ 1011 int imark = -1; 1012 if (v) { 1013 *v = v_p = mat->rowvalues; 1014 for (i=0; i<nzB; i++) { 1015 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1016 else break; 1017 } 1018 imark = i; 1019 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1020 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1021 } 1022 if (idx) { 1023 *idx = idx_p = mat->rowindices; 1024 if (imark > -1) { 1025 for (i=0; i<imark; i++) { 1026 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1027 } 1028 } else { 1029 for (i=0; i<nzB; i++) { 1030 if (cmap[cworkB[i]/bs] < cstart) 1031 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1032 else break; 1033 } 1034 imark = i; 1035 } 1036 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1037 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1038 } 1039 } else { 1040 if (idx) *idx = 0; 1041 if (v) *v = 0; 1042 } 1043 } 1044 *nz = nztot; 1045 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1046 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1052 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1053 { 1054 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1055 1056 PetscFunctionBegin; 1057 if (baij->getrowactive == PETSC_FALSE) { 1058 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1059 } 1060 baij->getrowactive = PETSC_FALSE; 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1066 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1067 { 1068 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1069 1070 PetscFunctionBegin; 1071 *bs = baij->bs; 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1077 int MatZeroEntries_MPISBAIJ(Mat A) 1078 { 1079 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1080 int ierr; 1081 1082 PetscFunctionBegin; 1083 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1084 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1090 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1091 { 1092 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1093 Mat A = a->A,B = a->B; 1094 int ierr; 1095 PetscReal isend[5],irecv[5]; 1096 1097 PetscFunctionBegin; 1098 info->block_size = (PetscReal)a->bs; 1099 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1100 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1101 isend[3] = info->memory; isend[4] = info->mallocs; 1102 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1103 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1104 isend[3] += info->memory; isend[4] += info->mallocs; 1105 if (flag == MAT_LOCAL) { 1106 info->nz_used = isend[0]; 1107 info->nz_allocated = isend[1]; 1108 info->nz_unneeded = isend[2]; 1109 info->memory = isend[3]; 1110 info->mallocs = isend[4]; 1111 } else if (flag == MAT_GLOBAL_MAX) { 1112 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1113 info->nz_used = irecv[0]; 1114 info->nz_allocated = irecv[1]; 1115 info->nz_unneeded = irecv[2]; 1116 info->memory = irecv[3]; 1117 info->mallocs = irecv[4]; 1118 } else if (flag == MAT_GLOBAL_SUM) { 1119 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1120 info->nz_used = irecv[0]; 1121 info->nz_allocated = irecv[1]; 1122 info->nz_unneeded = irecv[2]; 1123 info->memory = irecv[3]; 1124 info->mallocs = irecv[4]; 1125 } else { 1126 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1127 } 1128 info->rows_global = (PetscReal)A->M; 1129 info->columns_global = (PetscReal)A->N; 1130 info->rows_local = (PetscReal)A->m; 1131 info->columns_local = (PetscReal)A->N; 1132 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1133 info->fill_ratio_needed = 0; 1134 info->factor_mallocs = 0; 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1140 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1141 { 1142 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1143 int ierr; 1144 1145 PetscFunctionBegin; 1146 switch (op) { 1147 case MAT_NO_NEW_NONZERO_LOCATIONS: 1148 case MAT_YES_NEW_NONZERO_LOCATIONS: 1149 case MAT_COLUMNS_UNSORTED: 1150 case MAT_COLUMNS_SORTED: 1151 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1152 case MAT_KEEP_ZEROED_ROWS: 1153 case MAT_NEW_NONZERO_LOCATION_ERR: 1154 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1155 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1156 break; 1157 case MAT_ROW_ORIENTED: 1158 a->roworiented = PETSC_TRUE; 1159 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1160 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1161 break; 1162 case MAT_ROWS_SORTED: 1163 case MAT_ROWS_UNSORTED: 1164 case MAT_YES_NEW_DIAGONALS: 1165 case MAT_USE_SINGLE_PRECISION_SOLVES: 1166 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1167 break; 1168 case MAT_COLUMN_ORIENTED: 1169 a->roworiented = PETSC_FALSE; 1170 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1171 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1172 break; 1173 case MAT_IGNORE_OFF_PROC_ENTRIES: 1174 a->donotstash = PETSC_TRUE; 1175 break; 1176 case MAT_NO_NEW_DIAGONALS: 1177 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1178 case MAT_USE_HASH_TABLE: 1179 a->ht_flag = PETSC_TRUE; 1180 break; 1181 default: 1182 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatTranspose_MPISBAIJ(" 1189 int MatTranspose_MPISBAIJ(Mat A,Mat *matout) 1190 { 1191 PetscFunctionBegin; 1192 SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called"); 1193 /* PetscFunctionReturn(0); */ 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1198 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1199 { 1200 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1201 Mat a = baij->A,b = baij->B; 1202 int ierr,s1,s2,s3; 1203 1204 PetscFunctionBegin; 1205 if (ll != rr) { 1206 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1207 } 1208 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1209 if (rr) { 1210 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1211 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1212 /* Overlap communication with computation. */ 1213 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1214 /*} if (ll) { */ 1215 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1216 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1217 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1218 /* } */ 1219 /* scale the diagonal block */ 1220 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1221 1222 /* if (rr) { */ 1223 /* Do a scatter end and then right scale the off-diagonal block */ 1224 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1225 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1226 } 1227 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1233 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag) 1234 { 1235 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1236 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1237 int *procs,*nprocs,j,idx,nsends,*work,row; 1238 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1239 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1240 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1241 MPI_Comm comm = A->comm; 1242 MPI_Request *send_waits,*recv_waits; 1243 MPI_Status recv_status,*send_status; 1244 IS istmp; 1245 PetscTruth found; 1246 1247 PetscFunctionBegin; 1248 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1249 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1250 1251 /* first count number of contributors to each processor */ 1252 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1253 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1254 procs = nprocs + size; 1255 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1256 for (i=0; i<N; i++) { 1257 idx = rows[i]; 1258 found = PETSC_FALSE; 1259 for (j=0; j<size; j++) { 1260 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1261 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 1262 } 1263 } 1264 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1265 } 1266 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1267 1268 /* inform other processors of number of messages and max length*/ 1269 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 1270 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1271 nmax = work[rank]; 1272 nrecvs = work[size+rank]; 1273 ierr = PetscFree(work);CHKERRQ(ierr); 1274 1275 /* post receives: */ 1276 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1277 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1278 for (i=0; i<nrecvs; i++) { 1279 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1280 } 1281 1282 /* do sends: 1283 1) starts[i] gives the starting index in svalues for stuff going to 1284 the ith processor 1285 */ 1286 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1287 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1288 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1289 starts[0] = 0; 1290 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1291 for (i=0; i<N; i++) { 1292 svalues[starts[owner[i]]++] = rows[i]; 1293 } 1294 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1295 1296 starts[0] = 0; 1297 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1298 count = 0; 1299 for (i=0; i<size; i++) { 1300 if (procs[i]) { 1301 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1302 } 1303 } 1304 ierr = PetscFree(starts);CHKERRQ(ierr); 1305 1306 base = owners[rank]*bs; 1307 1308 /* wait on receives */ 1309 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1310 source = lens + nrecvs; 1311 count = nrecvs; slen = 0; 1312 while (count) { 1313 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1314 /* unpack receives into our local space */ 1315 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1316 source[imdex] = recv_status.MPI_SOURCE; 1317 lens[imdex] = n; 1318 slen += n; 1319 count--; 1320 } 1321 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1322 1323 /* move the data into the send scatter */ 1324 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1325 count = 0; 1326 for (i=0; i<nrecvs; i++) { 1327 values = rvalues + i*nmax; 1328 for (j=0; j<lens[i]; j++) { 1329 lrows[count++] = values[j] - base; 1330 } 1331 } 1332 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1333 ierr = PetscFree(lens);CHKERRQ(ierr); 1334 ierr = PetscFree(owner);CHKERRQ(ierr); 1335 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1336 1337 /* actually zap the local rows */ 1338 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1339 PetscLogObjectParent(A,istmp); 1340 1341 /* 1342 Zero the required rows. If the "diagonal block" of the matrix 1343 is square and the user wishes to set the diagonal we use seperate 1344 code so that MatSetValues() is not called for each diagonal allocating 1345 new memory, thus calling lots of mallocs and slowing things down. 1346 1347 Contributed by: Mathew Knepley 1348 */ 1349 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1350 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1351 if (diag && (l->A->M == l->A->N)) { 1352 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1353 } else if (diag) { 1354 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1355 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1356 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1357 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1358 } 1359 for (i=0; i<slen; i++) { 1360 row = lrows[i] + rstart_bs; 1361 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1362 } 1363 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1364 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1365 } else { 1366 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1367 } 1368 1369 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1370 ierr = PetscFree(lrows);CHKERRQ(ierr); 1371 1372 /* wait on sends */ 1373 if (nsends) { 1374 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1375 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1376 ierr = PetscFree(send_status);CHKERRQ(ierr); 1377 } 1378 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1379 ierr = PetscFree(svalues);CHKERRQ(ierr); 1380 1381 PetscFunctionReturn(0); 1382 } 1383 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1386 int MatPrintHelp_MPISBAIJ(Mat A) 1387 { 1388 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1389 MPI_Comm comm = A->comm; 1390 static int called = 0; 1391 int ierr; 1392 1393 PetscFunctionBegin; 1394 if (!a->rank) { 1395 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1396 } 1397 if (called) {PetscFunctionReturn(0);} else called = 1; 1398 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1399 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1405 int MatSetUnfactored_MPISBAIJ(Mat A) 1406 { 1407 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1408 int ierr; 1409 1410 PetscFunctionBegin; 1411 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatEqual_MPISBAIJ" 1419 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1420 { 1421 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1422 Mat a,b,c,d; 1423 PetscTruth flg; 1424 int ierr; 1425 1426 PetscFunctionBegin; 1427 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr); 1428 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1429 a = matA->A; b = matA->B; 1430 c = matB->A; d = matB->B; 1431 1432 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1433 if (flg == PETSC_TRUE) { 1434 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1435 } 1436 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1442 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1443 { 1444 int ierr; 1445 1446 PetscFunctionBegin; 1447 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } 1450 /* -------------------------------------------------------------------*/ 1451 static struct _MatOps MatOps_Values = { 1452 MatSetValues_MPISBAIJ, 1453 MatGetRow_MPISBAIJ, 1454 MatRestoreRow_MPISBAIJ, 1455 MatMult_MPISBAIJ_2comm, 1456 MatMultAdd_MPISBAIJ, 1457 MatMultTranspose_MPISBAIJ, 1458 MatMultTransposeAdd_MPISBAIJ, 1459 0, 1460 0, 1461 0, 1462 0, 1463 0, 1464 0, 1465 MatRelax_MPISBAIJ, 1466 MatTranspose_MPISBAIJ, 1467 MatGetInfo_MPISBAIJ, 1468 MatEqual_MPISBAIJ, 1469 MatGetDiagonal_MPISBAIJ, 1470 MatDiagonalScale_MPISBAIJ, 1471 MatNorm_MPISBAIJ, 1472 MatAssemblyBegin_MPISBAIJ, 1473 MatAssemblyEnd_MPISBAIJ, 1474 0, 1475 MatSetOption_MPISBAIJ, 1476 MatZeroEntries_MPISBAIJ, 1477 MatZeroRows_MPISBAIJ, 1478 0, 1479 0, 1480 0, 1481 0, 1482 MatSetUpPreallocation_MPISBAIJ, 1483 0, 1484 0, 1485 0, 1486 0, 1487 MatDuplicate_MPISBAIJ, 1488 0, 1489 0, 1490 0, 1491 0, 1492 0, 1493 MatGetSubMatrices_MPISBAIJ, 1494 MatIncreaseOverlap_MPISBAIJ, 1495 MatGetValues_MPISBAIJ, 1496 0, 1497 MatPrintHelp_MPISBAIJ, 1498 MatScale_MPISBAIJ, 1499 0, 1500 0, 1501 0, 1502 MatGetBlockSize_MPISBAIJ, 1503 0, 1504 0, 1505 0, 1506 0, 1507 0, 1508 0, 1509 MatSetUnfactored_MPISBAIJ, 1510 0, 1511 MatSetValuesBlocked_MPISBAIJ, 1512 0, 1513 0, 1514 0, 1515 MatGetPetscMaps_Petsc, 1516 0, 1517 0, 1518 0, 1519 0, 1520 0, 1521 0, 1522 MatGetRowMax_MPISBAIJ}; 1523 1524 1525 EXTERN_C_BEGIN 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1528 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1529 { 1530 PetscFunctionBegin; 1531 *a = ((Mat_MPISBAIJ *)A->data)->A; 1532 *iscopy = PETSC_FALSE; 1533 PetscFunctionReturn(0); 1534 } 1535 EXTERN_C_END 1536 1537 EXTERN_C_BEGIN 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatCreate_MPISBAIJ" 1540 int MatCreate_MPISBAIJ(Mat B) 1541 { 1542 Mat_MPISBAIJ *b; 1543 int ierr; 1544 PetscTruth flg; 1545 1546 PetscFunctionBegin; 1547 1548 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1549 B->data = (void*)b; 1550 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1551 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1552 1553 B->ops->destroy = MatDestroy_MPISBAIJ; 1554 B->ops->view = MatView_MPISBAIJ; 1555 B->mapping = 0; 1556 B->factor = 0; 1557 B->assembled = PETSC_FALSE; 1558 1559 B->insertmode = NOT_SET_VALUES; 1560 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1561 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1562 1563 /* build local table of row and column ownerships */ 1564 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1565 b->cowners = b->rowners + b->size + 2; 1566 b->rowners_bs = b->cowners + b->size + 2; 1567 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1568 1569 /* build cache for off array entries formed */ 1570 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1571 b->donotstash = PETSC_FALSE; 1572 b->colmap = PETSC_NULL; 1573 b->garray = PETSC_NULL; 1574 b->roworiented = PETSC_TRUE; 1575 1576 #if defined(PETSC_USE_MAT_SINGLE) 1577 /* stuff for MatSetValues_XXX in single precision */ 1578 b->setvalueslen = 0; 1579 b->setvaluescopy = PETSC_NULL; 1580 #endif 1581 1582 /* stuff used in block assembly */ 1583 b->barray = 0; 1584 1585 /* stuff used for matrix vector multiply */ 1586 b->lvec = 0; 1587 b->Mvctx = 0; 1588 b->slvec0 = 0; 1589 b->slvec0b = 0; 1590 b->slvec1 = 0; 1591 b->slvec1a = 0; 1592 b->slvec1b = 0; 1593 b->sMvctx = 0; 1594 1595 /* stuff for MatGetRow() */ 1596 b->rowindices = 0; 1597 b->rowvalues = 0; 1598 b->getrowactive = PETSC_FALSE; 1599 1600 /* hash table stuff */ 1601 b->ht = 0; 1602 b->hd = 0; 1603 b->ht_size = 0; 1604 b->ht_flag = PETSC_FALSE; 1605 b->ht_fact = 0; 1606 b->ht_total_ct = 0; 1607 b->ht_insert_ct = 0; 1608 1609 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1610 if (flg) { 1611 PetscReal fact = 1.39; 1612 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1613 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1614 if (fact <= 1.0) fact = 1.39; 1615 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1616 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1617 } 1618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1619 "MatStoreValues_MPISBAIJ", 1620 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1621 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1622 "MatRetrieveValues_MPISBAIJ", 1623 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1624 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1625 "MatGetDiagonalBlock_MPISBAIJ", 1626 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1627 PetscFunctionReturn(0); 1628 } 1629 EXTERN_C_END 1630 1631 #undef __FUNCT__ 1632 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1633 /*@C 1634 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1635 the user should preallocate the matrix storage by setting the parameters 1636 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1637 performance can be increased by more than a factor of 50. 1638 1639 Collective on Mat 1640 1641 Input Parameters: 1642 + A - the matrix 1643 . bs - size of blockk 1644 . d_nz - number of block nonzeros per block row in diagonal portion of local 1645 submatrix (same for all local rows) 1646 . d_nnz - array containing the number of block nonzeros in the various block rows 1647 of the in diagonal portion of the local (possibly different for each block 1648 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1649 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1650 submatrix (same for all local rows). 1651 - o_nnz - array containing the number of nonzeros in the various block rows of the 1652 off-diagonal portion of the local submatrix (possibly different for 1653 each block row) or PETSC_NULL. 1654 1655 1656 Options Database Keys: 1657 . -mat_no_unroll - uses code that does not unroll the loops in the 1658 block calculations (much slower) 1659 . -mat_block_size - size of the blocks to use 1660 1661 Notes: 1662 1663 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1664 than it must be used on all processors that share the object for that argument. 1665 1666 Storage Information: 1667 For a square global matrix we define each processor's diagonal portion 1668 to be its local rows and the corresponding columns (a square submatrix); 1669 each processor's off-diagonal portion encompasses the remainder of the 1670 local matrix (a rectangular submatrix). 1671 1672 The user can specify preallocated storage for the diagonal part of 1673 the local submatrix with either d_nz or d_nnz (not both). Set 1674 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1675 memory allocation. Likewise, specify preallocated storage for the 1676 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1677 1678 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1679 the figure below we depict these three local rows and all columns (0-11). 1680 1681 .vb 1682 0 1 2 3 4 5 6 7 8 9 10 11 1683 ------------------- 1684 row 3 | o o o d d d o o o o o o 1685 row 4 | o o o d d d o o o o o o 1686 row 5 | o o o d d d o o o o o o 1687 ------------------- 1688 .ve 1689 1690 Thus, any entries in the d locations are stored in the d (diagonal) 1691 submatrix, and any entries in the o locations are stored in the 1692 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1693 stored simply in the MATSEQBAIJ format for compressed row storage. 1694 1695 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1696 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1697 In general, for PDE problems in which most nonzeros are near the diagonal, 1698 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1699 or you will get TERRIBLE performance; see the users' manual chapter on 1700 matrices. 1701 1702 Level: intermediate 1703 1704 .keywords: matrix, block, aij, compressed row, sparse, parallel 1705 1706 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1707 @*/ 1708 1709 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1710 { 1711 Mat_MPISBAIJ *b; 1712 int ierr,i,mbs,Mbs; 1713 PetscTruth flg2; 1714 1715 PetscFunctionBegin; 1716 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr); 1717 if (!flg2) PetscFunctionReturn(0); 1718 1719 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1720 1721 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1722 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1723 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1724 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1725 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1726 if (d_nnz) { 1727 for (i=0; i<B->m/bs; i++) { 1728 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1729 } 1730 } 1731 if (o_nnz) { 1732 for (i=0; i<B->m/bs; i++) { 1733 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1734 } 1735 } 1736 B->preallocated = PETSC_TRUE; 1737 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1738 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1739 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1740 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1741 1742 b = (Mat_MPISBAIJ*)B->data; 1743 mbs = B->m/bs; 1744 Mbs = B->M/bs; 1745 if (mbs*bs != B->m) { 1746 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1747 } 1748 1749 b->bs = bs; 1750 b->bs2 = bs*bs; 1751 b->mbs = mbs; 1752 b->nbs = mbs; 1753 b->Mbs = Mbs; 1754 b->Nbs = Mbs; 1755 1756 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1757 b->rowners[0] = 0; 1758 for (i=2; i<=b->size; i++) { 1759 b->rowners[i] += b->rowners[i-1]; 1760 } 1761 b->rstart = b->rowners[b->rank]; 1762 b->rend = b->rowners[b->rank+1]; 1763 b->cstart = b->rstart; 1764 b->cend = b->rend; 1765 for (i=0; i<=b->size; i++) { 1766 b->rowners_bs[i] = b->rowners[i]*bs; 1767 } 1768 b->rstart_bs = b-> rstart*bs; 1769 b->rend_bs = b->rend*bs; 1770 1771 b->cstart_bs = b->cstart*bs; 1772 b->cend_bs = b->cend*bs; 1773 1774 1775 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1776 PetscLogObjectParent(B,b->A); 1777 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1778 PetscLogObjectParent(B,b->B); 1779 1780 /* build cache for off array entries formed */ 1781 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1782 1783 PetscFunctionReturn(0); 1784 } 1785 1786 #undef __FUNCT__ 1787 #define __FUNCT__ "MatCreateMPISBAIJ" 1788 /*@C 1789 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1790 (block compressed row). For good matrix assembly performance 1791 the user should preallocate the matrix storage by setting the parameters 1792 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1793 performance can be increased by more than a factor of 50. 1794 1795 Collective on MPI_Comm 1796 1797 Input Parameters: 1798 + comm - MPI communicator 1799 . bs - size of blockk 1800 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1801 This value should be the same as the local size used in creating the 1802 y vector for the matrix-vector product y = Ax. 1803 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1804 This value should be the same as the local size used in creating the 1805 x vector for the matrix-vector product y = Ax. 1806 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1807 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1808 . d_nz - number of block nonzeros per block row in diagonal portion of local 1809 submatrix (same for all local rows) 1810 . d_nnz - array containing the number of block nonzeros in the various block rows 1811 of the in diagonal portion of the local (possibly different for each block 1812 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1813 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1814 submatrix (same for all local rows). 1815 - o_nnz - array containing the number of nonzeros in the various block rows of the 1816 off-diagonal portion of the local submatrix (possibly different for 1817 each block row) or PETSC_NULL. 1818 1819 Output Parameter: 1820 . A - the matrix 1821 1822 Options Database Keys: 1823 . -mat_no_unroll - uses code that does not unroll the loops in the 1824 block calculations (much slower) 1825 . -mat_block_size - size of the blocks to use 1826 . -mat_mpi - use the parallel matrix data structures even on one processor 1827 (defaults to using SeqBAIJ format on one processor) 1828 1829 Notes: 1830 The user MUST specify either the local or global matrix dimensions 1831 (possibly both). 1832 1833 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1834 than it must be used on all processors that share the object for that argument. 1835 1836 Storage Information: 1837 For a square global matrix we define each processor's diagonal portion 1838 to be its local rows and the corresponding columns (a square submatrix); 1839 each processor's off-diagonal portion encompasses the remainder of the 1840 local matrix (a rectangular submatrix). 1841 1842 The user can specify preallocated storage for the diagonal part of 1843 the local submatrix with either d_nz or d_nnz (not both). Set 1844 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1845 memory allocation. Likewise, specify preallocated storage for the 1846 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1847 1848 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1849 the figure below we depict these three local rows and all columns (0-11). 1850 1851 .vb 1852 0 1 2 3 4 5 6 7 8 9 10 11 1853 ------------------- 1854 row 3 | o o o d d d o o o o o o 1855 row 4 | o o o d d d o o o o o o 1856 row 5 | o o o d d d o o o o o o 1857 ------------------- 1858 .ve 1859 1860 Thus, any entries in the d locations are stored in the d (diagonal) 1861 submatrix, and any entries in the o locations are stored in the 1862 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1863 stored simply in the MATSEQBAIJ format for compressed row storage. 1864 1865 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1866 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1867 In general, for PDE problems in which most nonzeros are near the diagonal, 1868 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1869 or you will get TERRIBLE performance; see the users' manual chapter on 1870 matrices. 1871 1872 Level: intermediate 1873 1874 .keywords: matrix, block, aij, compressed row, sparse, parallel 1875 1876 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1877 @*/ 1878 1879 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1880 { 1881 int ierr,size; 1882 1883 PetscFunctionBegin; 1884 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1885 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1886 if (size > 1) { 1887 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1888 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1889 } else { 1890 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1891 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1892 } 1893 PetscFunctionReturn(0); 1894 } 1895 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1899 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1900 { 1901 Mat mat; 1902 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1903 int ierr,len=0; 1904 1905 PetscFunctionBegin; 1906 *newmat = 0; 1907 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1908 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 1909 mat->preallocated = PETSC_TRUE; 1910 a = (Mat_MPISBAIJ*)mat->data; 1911 a->bs = oldmat->bs; 1912 a->bs2 = oldmat->bs2; 1913 a->mbs = oldmat->mbs; 1914 a->nbs = oldmat->nbs; 1915 a->Mbs = oldmat->Mbs; 1916 a->Nbs = oldmat->Nbs; 1917 1918 a->rstart = oldmat->rstart; 1919 a->rend = oldmat->rend; 1920 a->cstart = oldmat->cstart; 1921 a->cend = oldmat->cend; 1922 a->size = oldmat->size; 1923 a->rank = oldmat->rank; 1924 a->donotstash = oldmat->donotstash; 1925 a->roworiented = oldmat->roworiented; 1926 a->rowindices = 0; 1927 a->rowvalues = 0; 1928 a->getrowactive = PETSC_FALSE; 1929 a->barray = 0; 1930 a->rstart_bs = oldmat->rstart_bs; 1931 a->rend_bs = oldmat->rend_bs; 1932 a->cstart_bs = oldmat->cstart_bs; 1933 a->cend_bs = oldmat->cend_bs; 1934 1935 /* hash table stuff */ 1936 a->ht = 0; 1937 a->hd = 0; 1938 a->ht_size = 0; 1939 a->ht_flag = oldmat->ht_flag; 1940 a->ht_fact = oldmat->ht_fact; 1941 a->ht_total_ct = 0; 1942 a->ht_insert_ct = 0; 1943 1944 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1945 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1946 a->cowners = a->rowners + a->size + 2; 1947 a->rowners_bs = a->cowners + a->size + 2; 1948 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1949 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1950 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 1951 if (oldmat->colmap) { 1952 #if defined (PETSC_USE_CTABLE) 1953 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1954 #else 1955 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1956 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1957 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 1958 #endif 1959 } else a->colmap = 0; 1960 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 1961 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 1962 PetscLogObjectMemory(mat,len*sizeof(int)); 1963 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 1964 } else a->garray = 0; 1965 1966 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1967 PetscLogObjectParent(mat,a->lvec); 1968 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1969 1970 PetscLogObjectParent(mat,a->Mvctx); 1971 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1972 PetscLogObjectParent(mat,a->A); 1973 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1974 PetscLogObjectParent(mat,a->B); 1975 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 1976 *newmat = mat; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #include "petscsys.h" 1981 1982 EXTERN_C_BEGIN 1983 #undef __FUNCT__ 1984 #define __FUNCT__ "MatLoad_MPISBAIJ" 1985 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1986 { 1987 Mat A; 1988 int i,nz,ierr,j,rstart,rend,fd; 1989 PetscScalar *vals,*buf; 1990 MPI_Comm comm = ((PetscObject)viewer)->comm; 1991 MPI_Status status; 1992 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 1993 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 1994 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 1995 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1996 int dcount,kmax,k,nzcount,tmp; 1997 1998 PetscFunctionBegin; 1999 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2000 2001 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2002 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2003 if (!rank) { 2004 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2005 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2006 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2007 if (header[3] < 0) { 2008 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2009 } 2010 } 2011 2012 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2013 M = header[1]; N = header[2]; 2014 2015 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2016 2017 /* 2018 This code adds extra rows to make sure the number of rows is 2019 divisible by the blocksize 2020 */ 2021 Mbs = M/bs; 2022 extra_rows = bs - M + bs*(Mbs); 2023 if (extra_rows == bs) extra_rows = 0; 2024 else Mbs++; 2025 if (extra_rows &&!rank) { 2026 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2027 } 2028 2029 /* determine ownership of all rows */ 2030 mbs = Mbs/size + ((Mbs % size) > rank); 2031 m = mbs*bs; 2032 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2033 browners = rowners + size + 1; 2034 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2035 rowners[0] = 0; 2036 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2037 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2038 rstart = rowners[rank]; 2039 rend = rowners[rank+1]; 2040 2041 /* distribute row lengths to all processors */ 2042 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2043 if (!rank) { 2044 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2045 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2046 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2047 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2048 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2049 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2050 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2051 } else { 2052 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2053 } 2054 2055 if (!rank) { /* procs[0] */ 2056 /* calculate the number of nonzeros on each processor */ 2057 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2058 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2059 for (i=0; i<size; i++) { 2060 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2061 procsnz[i] += rowlengths[j]; 2062 } 2063 } 2064 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2065 2066 /* determine max buffer needed and allocate it */ 2067 maxnz = 0; 2068 for (i=0; i<size; i++) { 2069 maxnz = PetscMax(maxnz,procsnz[i]); 2070 } 2071 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2072 2073 /* read in my part of the matrix column indices */ 2074 nz = procsnz[0]; 2075 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2076 mycols = ibuf; 2077 if (size == 1) nz -= extra_rows; 2078 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2079 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2080 2081 /* read in every ones (except the last) and ship off */ 2082 for (i=1; i<size-1; i++) { 2083 nz = procsnz[i]; 2084 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2085 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2086 } 2087 /* read in the stuff for the last proc */ 2088 if (size != 1) { 2089 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2090 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2091 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2092 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2093 } 2094 ierr = PetscFree(cols);CHKERRQ(ierr); 2095 } else { /* procs[i], i>0 */ 2096 /* determine buffer space needed for message */ 2097 nz = 0; 2098 for (i=0; i<m; i++) { 2099 nz += locrowlens[i]; 2100 } 2101 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2102 mycols = ibuf; 2103 /* receive message of column indices*/ 2104 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2105 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2106 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2107 } 2108 2109 /* loop over local rows, determining number of off diagonal entries */ 2110 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2111 odlens = dlens + (rend-rstart); 2112 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2113 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2114 masked1 = mask + Mbs; 2115 masked2 = masked1 + Mbs; 2116 rowcount = 0; nzcount = 0; 2117 for (i=0; i<mbs; i++) { 2118 dcount = 0; 2119 odcount = 0; 2120 for (j=0; j<bs; j++) { 2121 kmax = locrowlens[rowcount]; 2122 for (k=0; k<kmax; k++) { 2123 tmp = mycols[nzcount++]/bs; /* block col. index */ 2124 if (!mask[tmp]) { 2125 mask[tmp] = 1; 2126 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2127 else masked1[dcount++] = tmp; /* entry in diag portion */ 2128 } 2129 } 2130 rowcount++; 2131 } 2132 2133 dlens[i] = dcount; /* d_nzz[i] */ 2134 odlens[i] = odcount; /* o_nzz[i] */ 2135 2136 /* zero out the mask elements we set */ 2137 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2138 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2139 } 2140 2141 /* create our matrix */ 2142 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2143 CHKERRQ(ierr); 2144 A = *newmat; 2145 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2146 2147 if (!rank) { 2148 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2149 /* read in my part of the matrix numerical values */ 2150 nz = procsnz[0]; 2151 vals = buf; 2152 mycols = ibuf; 2153 if (size == 1) nz -= extra_rows; 2154 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2155 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2156 2157 /* insert into matrix */ 2158 jj = rstart*bs; 2159 for (i=0; i<m; i++) { 2160 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2161 mycols += locrowlens[i]; 2162 vals += locrowlens[i]; 2163 jj++; 2164 } 2165 2166 /* read in other processors (except the last one) and ship out */ 2167 for (i=1; i<size-1; i++) { 2168 nz = procsnz[i]; 2169 vals = buf; 2170 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2171 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2172 } 2173 /* the last proc */ 2174 if (size != 1){ 2175 nz = procsnz[i] - extra_rows; 2176 vals = buf; 2177 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2178 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2179 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2180 } 2181 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2182 2183 } else { 2184 /* receive numeric values */ 2185 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2186 2187 /* receive message of values*/ 2188 vals = buf; 2189 mycols = ibuf; 2190 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2191 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2192 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2193 2194 /* insert into matrix */ 2195 jj = rstart*bs; 2196 for (i=0; i<m; i++) { 2197 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2198 mycols += locrowlens[i]; 2199 vals += locrowlens[i]; 2200 jj++; 2201 } 2202 } 2203 2204 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2205 ierr = PetscFree(buf);CHKERRQ(ierr); 2206 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2207 ierr = PetscFree(rowners);CHKERRQ(ierr); 2208 ierr = PetscFree(dlens);CHKERRQ(ierr); 2209 ierr = PetscFree(mask);CHKERRQ(ierr); 2210 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2211 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2212 PetscFunctionReturn(0); 2213 } 2214 EXTERN_C_END 2215 2216 #undef __FUNCT__ 2217 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2218 /*@ 2219 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2220 2221 Input Parameters: 2222 . mat - the matrix 2223 . fact - factor 2224 2225 Collective on Mat 2226 2227 Level: advanced 2228 2229 Notes: 2230 This can also be set by the command line option: -mat_use_hash_table fact 2231 2232 .keywords: matrix, hashtable, factor, HT 2233 2234 .seealso: MatSetOption() 2235 @*/ 2236 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2237 { 2238 PetscFunctionBegin; 2239 SETERRQ(1,"Function not yet written for SBAIJ format"); 2240 /* PetscFunctionReturn(0); */ 2241 } 2242 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2245 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2246 { 2247 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2248 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2249 PetscReal atmp; 2250 PetscReal *work,*svalues,*rvalues; 2251 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2252 int rank,size,*rowners_bs,dest,count,source; 2253 PetscScalar *va; 2254 MatScalar *ba; 2255 MPI_Status stat; 2256 2257 PetscFunctionBegin; 2258 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2259 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2260 2261 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 2262 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 2263 2264 bs = a->bs; 2265 mbs = a->mbs; 2266 Mbs = a->Mbs; 2267 ba = b->a; 2268 bi = b->i; 2269 bj = b->j; 2270 /* 2271 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2272 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2273 */ 2274 2275 /* find ownerships */ 2276 rowners_bs = a->rowners_bs; 2277 /* 2278 if (!rank){ 2279 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2280 } 2281 */ 2282 2283 /* each proc creates an array to be distributed */ 2284 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2285 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2286 2287 /* row_max for B */ 2288 if (rank != size-1){ 2289 for (i=0; i<mbs; i++) { 2290 ncols = bi[1] - bi[0]; bi++; 2291 brow = bs*i; 2292 for (j=0; j<ncols; j++){ 2293 bcol = bs*(*bj); 2294 for (kcol=0; kcol<bs; kcol++){ 2295 col = bcol + kcol; /* local col index */ 2296 col += rowners_bs[rank+1]; /* global col index */ 2297 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2298 for (krow=0; krow<bs; krow++){ 2299 atmp = PetscAbsScalar(*ba); ba++; 2300 row = brow + krow; /* local row index */ 2301 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2302 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2303 if (work[col] < atmp) work[col] = atmp; 2304 } 2305 } 2306 bj++; 2307 } 2308 } 2309 /* 2310 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2311 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2312 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2313 */ 2314 2315 /* send values to its owners */ 2316 for (dest=rank+1; dest<size; dest++){ 2317 svalues = work + rowners_bs[dest]; 2318 count = rowners_bs[dest+1]-rowners_bs[dest]; 2319 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PETSC_COMM_WORLD);CHKERRQ(ierr); 2320 /* 2321 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2322 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2323 */ 2324 } 2325 } 2326 2327 /* receive values */ 2328 if (rank){ 2329 rvalues = work; 2330 count = rowners_bs[rank+1]-rowners_bs[rank]; 2331 for (source=0; source<rank; source++){ 2332 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);CHKERRQ(ierr); 2333 /* process values */ 2334 for (i=0; i<count; i++){ 2335 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2336 } 2337 /* 2338 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2339 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2340 */ 2341 } 2342 } 2343 2344 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2345 ierr = PetscFree(work);CHKERRQ(ierr); 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatRelax_MPISBAIJ" 2351 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2352 { 2353 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2354 int ierr; 2355 PetscScalar mone=-1.0; 2356 Vec lvec1,bb1; 2357 2358 PetscFunctionBegin; 2359 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2360 if (mat->bs > 1) 2361 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2362 2363 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2364 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2365 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 2366 its--; 2367 } 2368 2369 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2370 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2371 while (its--){ 2372 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2373 2374 /* lower diagonal part: bb1 = bb - B^T*xx */ 2375 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2376 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2377 2378 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2379 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2380 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2381 2382 /* upper diagonal part: bb1 = bb1 - B*x */ 2383 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2384 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2385 2386 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2387 2388 /* diagonal sweep */ 2389 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 2390 } 2391 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2392 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2393 } else { 2394 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2395 } 2396 PetscFunctionReturn(0); 2397 } 2398 2399