1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 12 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 13 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *); 14 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode); 15 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 18 extern int MatPrintHelp_SeqSBAIJ(Mat); 19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 23 extern int MatUseSpooles_MPISBAIJ(Mat); 24 25 /* UGLY, ugly, ugly 26 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 27 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 28 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 29 converts the entries into single precision and then calls ..._MatScalar() to put them 30 into the single precision data structures. 31 */ 32 #if defined(PETSC_USE_MAT_SINGLE) 33 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 34 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 35 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 36 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 37 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 38 #else 39 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 40 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 41 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 42 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 43 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 44 #endif 45 46 EXTERN_C_BEGIN 47 #undef __FUNCT__ 48 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 49 int MatStoreValues_MPISBAIJ(Mat mat) 50 { 51 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 52 int ierr; 53 54 PetscFunctionBegin; 55 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 56 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 57 PetscFunctionReturn(0); 58 } 59 EXTERN_C_END 60 61 EXTERN_C_BEGIN 62 #undef __FUNCT__ 63 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 64 int MatRetrieveValues_MPISBAIJ(Mat mat) 65 { 66 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 67 int ierr; 68 69 PetscFunctionBegin; 70 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 71 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 EXTERN_C_END 75 76 /* 77 Local utility routine that creates a mapping from the global column 78 number to the local number in the off-diagonal part of the local 79 storage of the matrix. This is done in a non scable way since the 80 length of colmap equals the global matrix length. 81 */ 82 #undef __FUNCT__ 83 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private" 84 static int CreateColmap_MPISBAIJ_Private(Mat mat) 85 { 86 PetscFunctionBegin; 87 SETERRQ(1,"Function not yet written for SBAIJ format"); 88 /* PetscFunctionReturn(0); */ 89 } 90 91 #define CHUNKSIZE 10 92 93 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 94 { \ 95 \ 96 brow = row/bs; \ 97 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 98 rmax = aimax[brow]; nrow = ailen[brow]; \ 99 bcol = col/bs; \ 100 ridx = row % bs; cidx = col % bs; \ 101 low = 0; high = nrow; \ 102 while (high-low > 3) { \ 103 t = (low+high)/2; \ 104 if (rp[t] > bcol) high = t; \ 105 else low = t; \ 106 } \ 107 for (_i=low; _i<high; _i++) { \ 108 if (rp[_i] > bcol) break; \ 109 if (rp[_i] == bcol) { \ 110 bap = ap + bs2*_i + bs*cidx + ridx; \ 111 if (addv == ADD_VALUES) *bap += value; \ 112 else *bap = value; \ 113 goto a_noinsert; \ 114 } \ 115 } \ 116 if (a->nonew == 1) goto a_noinsert; \ 117 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 118 if (nrow >= rmax) { \ 119 /* there is no extra room in row, therefore enlarge */ \ 120 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 121 MatScalar *new_a; \ 122 \ 123 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 124 \ 125 /* malloc new storage space */ \ 126 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 127 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 128 new_j = (int*)(new_a + bs2*new_nz); \ 129 new_i = new_j + new_nz; \ 130 \ 131 /* copy over old data into new slots */ \ 132 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 133 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 134 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 135 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 136 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 137 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 139 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 140 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 141 /* free up old matrix storage */ \ 142 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 143 if (!a->singlemalloc) { \ 144 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 145 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 146 } \ 147 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 148 a->singlemalloc = PETSC_TRUE; \ 149 \ 150 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 151 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 152 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 153 a->s_maxnz += bs2*CHUNKSIZE; \ 154 a->reallocs++; \ 155 a->s_nz++; \ 156 } \ 157 N = nrow++ - 1; \ 158 /* shift up all the later entries in this row */ \ 159 for (ii=N; ii>=_i; ii--) { \ 160 rp[ii+1] = rp[ii]; \ 161 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 162 } \ 163 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 164 rp[_i] = bcol; \ 165 ap[bs2*_i + bs*cidx + ridx] = value; \ 166 a_noinsert:; \ 167 ailen[brow] = nrow; \ 168 } 169 #ifndef MatSetValues_SeqBAIJ_B_Private 170 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 171 { \ 172 brow = row/bs; \ 173 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 174 rmax = bimax[brow]; nrow = bilen[brow]; \ 175 bcol = col/bs; \ 176 ridx = row % bs; cidx = col % bs; \ 177 low = 0; high = nrow; \ 178 while (high-low > 3) { \ 179 t = (low+high)/2; \ 180 if (rp[t] > bcol) high = t; \ 181 else low = t; \ 182 } \ 183 for (_i=low; _i<high; _i++) { \ 184 if (rp[_i] > bcol) break; \ 185 if (rp[_i] == bcol) { \ 186 bap = ap + bs2*_i + bs*cidx + ridx; \ 187 if (addv == ADD_VALUES) *bap += value; \ 188 else *bap = value; \ 189 goto b_noinsert; \ 190 } \ 191 } \ 192 if (b->nonew == 1) goto b_noinsert; \ 193 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 194 if (nrow >= rmax) { \ 195 /* there is no extra room in row, therefore enlarge */ \ 196 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 197 MatScalar *new_a; \ 198 \ 199 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 200 \ 201 /* malloc new storage space */ \ 202 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 203 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 204 new_j = (int*)(new_a + bs2*new_nz); \ 205 new_i = new_j + new_nz; \ 206 \ 207 /* copy over old data into new slots */ \ 208 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 209 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 210 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 211 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 212 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 213 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 214 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 215 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 216 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 217 /* free up old matrix storage */ \ 218 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 219 if (!b->singlemalloc) { \ 220 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 221 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 222 } \ 223 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 224 b->singlemalloc = PETSC_TRUE; \ 225 \ 226 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 227 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 228 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 229 b->maxnz += bs2*CHUNKSIZE; \ 230 b->reallocs++; \ 231 b->nz++; \ 232 } \ 233 N = nrow++ - 1; \ 234 /* shift up all the later entries in this row */ \ 235 for (ii=N; ii>=_i; ii--) { \ 236 rp[ii+1] = rp[ii]; \ 237 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 238 } \ 239 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 240 rp[_i] = bcol; \ 241 ap[bs2*_i + bs*cidx + ridx] = value; \ 242 b_noinsert:; \ 243 bilen[brow] = nrow; \ 244 } 245 #endif 246 247 #if defined(PETSC_USE_MAT_SINGLE) 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatSetValues_MPISBAIJ" 250 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 251 { 252 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 253 int ierr,i,N = m*n; 254 MatScalar *vsingle; 255 256 PetscFunctionBegin; 257 if (N > b->setvalueslen) { 258 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 259 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 260 b->setvalueslen = N; 261 } 262 vsingle = b->setvaluescopy; 263 264 for (i=0; i<N; i++) { 265 vsingle[i] = v[i]; 266 } 267 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 273 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 274 { 275 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 276 int ierr,i,N = m*n*b->bs2; 277 MatScalar *vsingle; 278 279 PetscFunctionBegin; 280 if (N > b->setvalueslen) { 281 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 282 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 283 b->setvalueslen = N; 284 } 285 vsingle = b->setvaluescopy; 286 for (i=0; i<N; i++) { 287 vsingle[i] = v[i]; 288 } 289 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 295 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 296 { 297 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 298 int ierr,i,N = m*n; 299 MatScalar *vsingle; 300 301 PetscFunctionBegin; 302 SETERRQ(1,"Function not yet written for SBAIJ format"); 303 /* PetscFunctionReturn(0); */ 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 308 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 309 { 310 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 311 int ierr,i,N = m*n*b->bs2; 312 MatScalar *vsingle; 313 314 PetscFunctionBegin; 315 SETERRQ(1,"Function not yet written for SBAIJ format"); 316 /* PetscFunctionReturn(0); */ 317 } 318 #endif 319 320 /* Only add/insert a(i,j) with i<=j (blocks). 321 Any a(i,j) with i>j input by user is ingored. 322 */ 323 #undef __FUNCT__ 324 #define __FUNCT__ "MatSetValues_MPIBAIJ" 325 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 326 { 327 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 328 MatScalar value; 329 PetscTruth roworiented = baij->roworiented; 330 int ierr,i,j,row,col; 331 int rstart_orig=baij->rstart_bs; 332 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 333 int cend_orig=baij->cend_bs,bs=baij->bs; 334 335 /* Some Variables required in the macro */ 336 Mat A = baij->A; 337 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 338 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 339 MatScalar *aa=a->a; 340 341 Mat B = baij->B; 342 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 343 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 344 MatScalar *ba=b->a; 345 346 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 347 int low,high,t,ridx,cidx,bs2=a->bs2; 348 MatScalar *ap,*bap; 349 350 /* for stash */ 351 int n_loc, *in_loc=0; 352 MatScalar *v_loc=0; 353 354 PetscFunctionBegin; 355 356 if(!baij->donotstash){ 357 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 358 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 359 } 360 361 for (i=0; i<m; i++) { 362 if (im[i] < 0) continue; 363 #if defined(PETSC_USE_BOPT_g) 364 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 365 #endif 366 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 367 row = im[i] - rstart_orig; /* local row index */ 368 for (j=0; j<n; j++) { 369 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 370 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 371 col = in[j] - cstart_orig; /* local col index */ 372 brow = row/bs; bcol = col/bs; 373 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 374 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 375 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 376 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 377 } else if (in[j] < 0) continue; 378 #if defined(PETSC_USE_BOPT_g) 379 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 380 #endif 381 else { /* off-diag entry (B) */ 382 if (mat->was_assembled) { 383 if (!baij->colmap) { 384 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 385 } 386 #if defined (PETSC_USE_CTABLE) 387 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 388 col = col - 1; 389 #else 390 col = baij->colmap[in[j]/bs] - 1; 391 #endif 392 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 393 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 394 col = in[j]; 395 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 396 B = baij->B; 397 b = (Mat_SeqBAIJ*)(B)->data; 398 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 399 ba=b->a; 400 } else col += in[j]%bs; 401 } else col = in[j]; 402 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 403 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 404 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 405 } 406 } 407 } else { /* off processor entry */ 408 if (!baij->donotstash) { 409 n_loc = 0; 410 for (j=0; j<n; j++){ 411 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 412 in_loc[n_loc] = in[j]; 413 if (roworiented) { 414 v_loc[n_loc] = v[i*n+j]; 415 } else { 416 v_loc[n_loc] = v[j*m+i]; 417 } 418 n_loc++; 419 } 420 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 421 } 422 } 423 } 424 425 if(!baij->donotstash){ 426 ierr = PetscFree(in_loc);CHKERRQ(ierr); 427 ierr = PetscFree(v_loc);CHKERRQ(ierr); 428 } 429 PetscFunctionReturn(0); 430 } 431 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 434 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 435 { 436 PetscFunctionBegin; 437 SETERRQ(1,"Function not yet written for SBAIJ format"); 438 /* PetscFunctionReturn(0); */ 439 } 440 441 #define HASH_KEY 0.6180339887 442 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 443 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 444 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 447 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 448 { 449 PetscFunctionBegin; 450 SETERRQ(1,"Function not yet written for SBAIJ format"); 451 /* PetscFunctionReturn(0); */ 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 456 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 457 { 458 PetscFunctionBegin; 459 SETERRQ(1,"Function not yet written for SBAIJ format"); 460 /* PetscFunctionReturn(0); */ 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatGetValues_MPISBAIJ" 465 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 466 { 467 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 468 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 469 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 470 471 PetscFunctionBegin; 472 for (i=0; i<m; i++) { 473 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 474 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 475 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 476 row = idxm[i] - bsrstart; 477 for (j=0; j<n; j++) { 478 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 479 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 480 if (idxn[j] >= bscstart && idxn[j] < bscend){ 481 col = idxn[j] - bscstart; 482 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 483 } else { 484 if (!baij->colmap) { 485 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 486 } 487 #if defined (PETSC_USE_CTABLE) 488 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 489 data --; 490 #else 491 data = baij->colmap[idxn[j]/bs]-1; 492 #endif 493 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 494 else { 495 col = data + idxn[j]%bs; 496 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 497 } 498 } 499 } 500 } else { 501 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 502 } 503 } 504 PetscFunctionReturn(0); 505 } 506 507 #undef __FUNCT__ 508 #define __FUNCT__ "MatNorm_MPISBAIJ" 509 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 510 { 511 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 512 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 513 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 514 int ierr; 515 PetscReal sum[2],*lnorm2; 516 517 PetscFunctionBegin; 518 if (baij->size == 1) { 519 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 520 } else { 521 if (type == NORM_FROBENIUS) { 522 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 523 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 524 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 525 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 526 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 527 /* 528 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 529 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 530 */ 531 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 532 /* 533 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 534 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 535 536 *norm = sqrt(sum[0] + 2*sum[1]); 537 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 538 } else { 539 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 540 } 541 } 542 PetscFunctionReturn(0); 543 } 544 545 /* 546 Creates the hash table, and sets the table 547 This table is created only once. 548 If new entried need to be added to the matrix 549 then the hash table has to be destroyed and 550 recreated. 551 */ 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 554 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 555 { 556 PetscFunctionBegin; 557 SETERRQ(1,"Function not yet written for SBAIJ format"); 558 /* PetscFunctionReturn(0); */ 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 563 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 564 { 565 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 566 int ierr,nstash,reallocs; 567 InsertMode addv; 568 569 PetscFunctionBegin; 570 if (baij->donotstash) { 571 PetscFunctionReturn(0); 572 } 573 574 /* make sure all processors are either in INSERTMODE or ADDMODE */ 575 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 576 if (addv == (ADD_VALUES|INSERT_VALUES)) { 577 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 578 } 579 mat->insertmode = addv; /* in case this processor had no cache */ 580 581 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 582 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 583 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 584 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 585 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 586 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 592 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 593 { 594 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 595 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 596 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 597 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 598 int *row,*col,other_disassembled; 599 PetscTruth r1,r2,r3; 600 MatScalar *val; 601 InsertMode addv = mat->insertmode; 602 #if defined(PETSC_HAVE_SPOOLES) 603 PetscTruth flag; 604 #endif 605 606 PetscFunctionBegin; 607 608 if (!baij->donotstash) { 609 while (1) { 610 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 611 /* 612 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 613 PetscSynchronizedFlush(PETSC_COMM_WORLD); 614 */ 615 if (!flg) break; 616 617 for (i=0; i<n;) { 618 /* Now identify the consecutive vals belonging to the same row */ 619 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 620 if (j < n) ncols = j-i; 621 else ncols = n-i; 622 /* Now assemble all these values with a single function call */ 623 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 624 i = j; 625 } 626 } 627 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 628 /* Now process the block-stash. Since the values are stashed column-oriented, 629 set the roworiented flag to column oriented, and after MatSetValues() 630 restore the original flags */ 631 r1 = baij->roworiented; 632 r2 = a->roworiented; 633 r3 = b->roworiented; 634 baij->roworiented = PETSC_FALSE; 635 a->roworiented = PETSC_FALSE; 636 b->roworiented = PETSC_FALSE; 637 while (1) { 638 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 639 if (!flg) break; 640 641 for (i=0; i<n;) { 642 /* Now identify the consecutive vals belonging to the same row */ 643 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 644 if (j < n) ncols = j-i; 645 else ncols = n-i; 646 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 647 i = j; 648 } 649 } 650 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 651 baij->roworiented = r1; 652 a->roworiented = r2; 653 b->roworiented = r3; 654 } 655 656 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 657 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 658 659 /* determine if any processor has disassembled, if so we must 660 also disassemble ourselfs, in order that we may reassemble. */ 661 /* 662 if nonzero structure of submatrix B cannot change then we know that 663 no processor disassembled thus we can skip this stuff 664 */ 665 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 666 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 667 if (mat->was_assembled && !other_disassembled) { 668 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 669 } 670 } 671 672 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 673 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 674 } 675 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 676 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 677 678 #if defined(PETSC_USE_BOPT_g) 679 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 680 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 681 baij->ht_total_ct = 0; 682 baij->ht_insert_ct = 0; 683 } 684 #endif 685 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 686 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 687 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 688 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 689 } 690 691 if (baij->rowvalues) { 692 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 693 baij->rowvalues = 0; 694 } 695 696 #if defined(PETSC_HAVE_SPOOLES) 697 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 698 if (flag) { ierr = MatUseSpooles_MPISBAIJ(mat);CHKERRQ(ierr); } 699 #endif 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 705 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 706 { 707 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 708 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 709 PetscTruth isascii,isdraw; 710 PetscViewer sviewer; 711 PetscViewerFormat format; 712 713 PetscFunctionBegin; 714 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 715 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 716 if (isascii) { 717 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 718 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 719 MatInfo info; 720 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 721 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 722 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 723 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 724 baij->bs,(int)info.memory);CHKERRQ(ierr); 725 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 726 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 727 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 728 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 729 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 730 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } else if (format == PETSC_VIEWER_ASCII_INFO) { 733 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 734 PetscFunctionReturn(0); 735 } 736 } 737 738 if (isdraw) { 739 PetscDraw draw; 740 PetscTruth isnull; 741 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 742 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 743 } 744 745 if (size == 1) { 746 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 747 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 748 } else { 749 /* assemble the entire matrix onto first processor. */ 750 Mat A; 751 Mat_SeqSBAIJ *Aloc; 752 Mat_SeqBAIJ *Bloc; 753 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 754 MatScalar *a; 755 756 if (!rank) { 757 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 758 } else { 759 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 760 } 761 PetscLogObjectParent(mat,A); 762 763 /* copy over the A part */ 764 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 765 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 766 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 767 768 for (i=0; i<mbs; i++) { 769 rvals[0] = bs*(baij->rstart + i); 770 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 771 for (j=ai[i]; j<ai[i+1]; j++) { 772 col = (baij->cstart+aj[j])*bs; 773 for (k=0; k<bs; k++) { 774 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 775 col++; a += bs; 776 } 777 } 778 } 779 /* copy over the B part */ 780 Bloc = (Mat_SeqBAIJ*)baij->B->data; 781 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 782 for (i=0; i<mbs; i++) { 783 rvals[0] = bs*(baij->rstart + i); 784 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 785 for (j=ai[i]; j<ai[i+1]; j++) { 786 col = baij->garray[aj[j]]*bs; 787 for (k=0; k<bs; k++) { 788 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 789 col++; a += bs; 790 } 791 } 792 } 793 ierr = PetscFree(rvals);CHKERRQ(ierr); 794 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 795 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 796 /* 797 Everyone has to call to draw the matrix since the graphics waits are 798 synchronized across all processors that share the PetscDraw object 799 */ 800 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 801 if (!rank) { 802 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 803 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 804 } 805 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 806 ierr = MatDestroy(A);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatView_MPISBAIJ" 813 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 814 { 815 int ierr; 816 PetscTruth isascii,isdraw,issocket,isbinary; 817 818 PetscFunctionBegin; 819 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 820 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 821 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 822 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 823 if (isascii || isdraw || issocket || isbinary) { 824 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 825 } else { 826 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 827 } 828 PetscFunctionReturn(0); 829 } 830 831 #undef __FUNCT__ 832 #define __FUNCT__ "MatDestroy_MPISBAIJ" 833 int MatDestroy_MPISBAIJ(Mat mat) 834 { 835 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 836 int ierr; 837 838 PetscFunctionBegin; 839 #if defined(PETSC_USE_LOG) 840 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 841 #endif 842 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 843 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 844 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 845 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 846 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 847 #if defined (PETSC_USE_CTABLE) 848 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 849 #else 850 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 851 #endif 852 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 853 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 854 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 855 if (baij->slvec0) { 856 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 857 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 858 } 859 if (baij->slvec1) { 860 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 861 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 862 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 863 } 864 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 865 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 866 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 867 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 868 #if defined(PETSC_USE_MAT_SINGLE) 869 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 870 #endif 871 ierr = PetscFree(baij);CHKERRQ(ierr); 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "MatMult_MPISBAIJ" 877 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 878 { 879 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 880 int ierr,nt,mbs=a->mbs,bs=a->bs; 881 PetscScalar *x,*from,zero=0.0; 882 883 PetscFunctionBegin; 884 /* 885 PetscSynchronizedPrintf(PETSC_COMM_WORLD," _1comm is called ...\n"); 886 PetscSynchronizedFlush(PETSC_COMM_WORLD); 887 */ 888 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 889 if (nt != A->n) { 890 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 891 } 892 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 893 if (nt != A->m) { 894 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 895 } 896 897 /* diagonal part */ 898 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 899 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 900 901 /* subdiagonal part */ 902 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 903 904 /* copy x into the vec slvec0 */ 905 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 906 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 907 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 908 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 909 910 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 911 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 912 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 913 914 /* supperdiagonal part */ 915 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 916 917 PetscFunctionReturn(0); 918 } 919 920 #undef __FUNCT__ 921 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 922 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 923 { 924 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 925 int ierr,nt; 926 927 PetscFunctionBegin; 928 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 929 if (nt != A->n) { 930 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 931 } 932 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 933 if (nt != A->m) { 934 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 935 } 936 937 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 938 /* do diagonal part */ 939 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 940 /* do supperdiagonal part */ 941 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 942 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 943 /* do subdiagonal part */ 944 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 945 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 946 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 947 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 953 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 954 { 955 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 956 int ierr,mbs=a->mbs,bs=a->bs; 957 PetscScalar *x,*from,zero=0.0; 958 959 PetscFunctionBegin; 960 /* 961 PetscSynchronizedPrintf(PETSC_COMM_WORLD," MatMultAdd is called ...\n"); 962 PetscSynchronizedFlush(PETSC_COMM_WORLD); 963 */ 964 /* diagonal part */ 965 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 966 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 967 968 /* subdiagonal part */ 969 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 970 971 /* copy x into the vec slvec0 */ 972 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 973 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 974 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 975 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 976 977 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 978 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 979 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 980 981 /* supperdiagonal part */ 982 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 983 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 989 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 990 { 991 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 992 int ierr; 993 994 PetscFunctionBegin; 995 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 996 /* do diagonal part */ 997 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 998 /* do supperdiagonal part */ 999 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1000 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1001 1002 /* do subdiagonal part */ 1003 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1004 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1006 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1012 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1013 { 1014 PetscFunctionBegin; 1015 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1016 /* PetscFunctionReturn(0); */ 1017 } 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1021 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1022 { 1023 PetscFunctionBegin; 1024 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1025 /* PetscFunctionReturn(0); */ 1026 } 1027 1028 /* 1029 This only works correctly for square matrices where the subblock A->A is the 1030 diagonal block 1031 */ 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1034 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1035 { 1036 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1037 int ierr; 1038 1039 PetscFunctionBegin; 1040 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1041 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatScale_MPISBAIJ" 1047 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A) 1048 { 1049 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1050 int ierr; 1051 1052 PetscFunctionBegin; 1053 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1054 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1060 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1061 { 1062 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1063 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1064 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1065 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1066 int *cmap,*idx_p,cstart = mat->cstart; 1067 1068 PetscFunctionBegin; 1069 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1070 mat->getrowactive = PETSC_TRUE; 1071 1072 if (!mat->rowvalues && (idx || v)) { 1073 /* 1074 allocate enough space to hold information from the longest row. 1075 */ 1076 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1077 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1078 int max = 1,mbs = mat->mbs,tmp; 1079 for (i=0; i<mbs; i++) { 1080 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1081 if (max < tmp) { max = tmp; } 1082 } 1083 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1084 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1085 } 1086 1087 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1088 lrow = row - brstart; /* local row index */ 1089 1090 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1091 if (!v) {pvA = 0; pvB = 0;} 1092 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1093 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1094 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1095 nztot = nzA + nzB; 1096 1097 cmap = mat->garray; 1098 if (v || idx) { 1099 if (nztot) { 1100 /* Sort by increasing column numbers, assuming A and B already sorted */ 1101 int imark = -1; 1102 if (v) { 1103 *v = v_p = mat->rowvalues; 1104 for (i=0; i<nzB; i++) { 1105 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1106 else break; 1107 } 1108 imark = i; 1109 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1110 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1111 } 1112 if (idx) { 1113 *idx = idx_p = mat->rowindices; 1114 if (imark > -1) { 1115 for (i=0; i<imark; i++) { 1116 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1117 } 1118 } else { 1119 for (i=0; i<nzB; i++) { 1120 if (cmap[cworkB[i]/bs] < cstart) 1121 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1122 else break; 1123 } 1124 imark = i; 1125 } 1126 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1127 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1128 } 1129 } else { 1130 if (idx) *idx = 0; 1131 if (v) *v = 0; 1132 } 1133 } 1134 *nz = nztot; 1135 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1136 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1142 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1143 { 1144 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1145 1146 PetscFunctionBegin; 1147 if (baij->getrowactive == PETSC_FALSE) { 1148 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1149 } 1150 baij->getrowactive = PETSC_FALSE; 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1156 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1157 { 1158 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1159 1160 PetscFunctionBegin; 1161 *bs = baij->bs; 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1167 int MatZeroEntries_MPISBAIJ(Mat A) 1168 { 1169 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1170 int ierr; 1171 1172 PetscFunctionBegin; 1173 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1174 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1180 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1181 { 1182 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1183 Mat A = a->A,B = a->B; 1184 int ierr; 1185 PetscReal isend[5],irecv[5]; 1186 1187 PetscFunctionBegin; 1188 info->block_size = (PetscReal)a->bs; 1189 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1190 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1191 isend[3] = info->memory; isend[4] = info->mallocs; 1192 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1193 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1194 isend[3] += info->memory; isend[4] += info->mallocs; 1195 if (flag == MAT_LOCAL) { 1196 info->nz_used = isend[0]; 1197 info->nz_allocated = isend[1]; 1198 info->nz_unneeded = isend[2]; 1199 info->memory = isend[3]; 1200 info->mallocs = isend[4]; 1201 } else if (flag == MAT_GLOBAL_MAX) { 1202 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1203 info->nz_used = irecv[0]; 1204 info->nz_allocated = irecv[1]; 1205 info->nz_unneeded = irecv[2]; 1206 info->memory = irecv[3]; 1207 info->mallocs = irecv[4]; 1208 } else if (flag == MAT_GLOBAL_SUM) { 1209 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1210 info->nz_used = irecv[0]; 1211 info->nz_allocated = irecv[1]; 1212 info->nz_unneeded = irecv[2]; 1213 info->memory = irecv[3]; 1214 info->mallocs = irecv[4]; 1215 } else { 1216 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1217 } 1218 info->rows_global = (PetscReal)A->M; 1219 info->columns_global = (PetscReal)A->N; 1220 info->rows_local = (PetscReal)A->m; 1221 info->columns_local = (PetscReal)A->N; 1222 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1223 info->fill_ratio_needed = 0; 1224 info->factor_mallocs = 0; 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1230 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1231 { 1232 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1233 int ierr; 1234 1235 PetscFunctionBegin; 1236 switch (op) { 1237 case MAT_NO_NEW_NONZERO_LOCATIONS: 1238 case MAT_YES_NEW_NONZERO_LOCATIONS: 1239 case MAT_COLUMNS_UNSORTED: 1240 case MAT_COLUMNS_SORTED: 1241 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1242 case MAT_KEEP_ZEROED_ROWS: 1243 case MAT_NEW_NONZERO_LOCATION_ERR: 1244 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1245 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1246 break; 1247 case MAT_ROW_ORIENTED: 1248 a->roworiented = PETSC_TRUE; 1249 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1250 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1251 break; 1252 case MAT_ROWS_SORTED: 1253 case MAT_ROWS_UNSORTED: 1254 case MAT_YES_NEW_DIAGONALS: 1255 case MAT_USE_SINGLE_PRECISION_SOLVES: 1256 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1257 break; 1258 case MAT_COLUMN_ORIENTED: 1259 a->roworiented = PETSC_FALSE; 1260 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1261 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1262 break; 1263 case MAT_IGNORE_OFF_PROC_ENTRIES: 1264 a->donotstash = PETSC_TRUE; 1265 break; 1266 case MAT_NO_NEW_DIAGONALS: 1267 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1268 case MAT_USE_HASH_TABLE: 1269 a->ht_flag = PETSC_TRUE; 1270 break; 1271 default: 1272 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1273 } 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatTranspose_MPISBAIJ(" 1279 int MatTranspose_MPISBAIJ(Mat A,Mat *matout) 1280 { 1281 PetscFunctionBegin; 1282 SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called"); 1283 /* PetscFunctionReturn(0); */ 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1288 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1289 { 1290 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1291 Mat a = baij->A,b = baij->B; 1292 int ierr,s1,s2,s3; 1293 1294 PetscFunctionBegin; 1295 if (ll != rr) { 1296 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1297 } 1298 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1299 if (rr) { 1300 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1301 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1302 /* Overlap communication with computation. */ 1303 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1304 /*} if (ll) { */ 1305 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1306 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1307 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1308 /* } */ 1309 /* scale the diagonal block */ 1310 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1311 1312 /* if (rr) { */ 1313 /* Do a scatter end and then right scale the off-diagonal block */ 1314 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1315 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1316 } 1317 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1323 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag) 1324 { 1325 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1326 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1327 int *procs,*nprocs,j,idx,nsends,*work,row; 1328 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1329 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1330 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1331 MPI_Comm comm = A->comm; 1332 MPI_Request *send_waits,*recv_waits; 1333 MPI_Status recv_status,*send_status; 1334 IS istmp; 1335 PetscTruth found; 1336 1337 PetscFunctionBegin; 1338 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1339 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1340 1341 /* first count number of contributors to each processor */ 1342 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1343 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1344 procs = nprocs + size; 1345 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1346 for (i=0; i<N; i++) { 1347 idx = rows[i]; 1348 found = PETSC_FALSE; 1349 for (j=0; j<size; j++) { 1350 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1351 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 1352 } 1353 } 1354 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1355 } 1356 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1357 1358 /* inform other processors of number of messages and max length*/ 1359 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 1360 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1361 nmax = work[rank]; 1362 nrecvs = work[size+rank]; 1363 ierr = PetscFree(work);CHKERRQ(ierr); 1364 1365 /* post receives: */ 1366 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1367 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1368 for (i=0; i<nrecvs; i++) { 1369 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1370 } 1371 1372 /* do sends: 1373 1) starts[i] gives the starting index in svalues for stuff going to 1374 the ith processor 1375 */ 1376 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1377 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1378 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1379 starts[0] = 0; 1380 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1381 for (i=0; i<N; i++) { 1382 svalues[starts[owner[i]]++] = rows[i]; 1383 } 1384 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1385 1386 starts[0] = 0; 1387 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1388 count = 0; 1389 for (i=0; i<size; i++) { 1390 if (procs[i]) { 1391 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1392 } 1393 } 1394 ierr = PetscFree(starts);CHKERRQ(ierr); 1395 1396 base = owners[rank]*bs; 1397 1398 /* wait on receives */ 1399 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1400 source = lens + nrecvs; 1401 count = nrecvs; slen = 0; 1402 while (count) { 1403 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1404 /* unpack receives into our local space */ 1405 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1406 source[imdex] = recv_status.MPI_SOURCE; 1407 lens[imdex] = n; 1408 slen += n; 1409 count--; 1410 } 1411 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1412 1413 /* move the data into the send scatter */ 1414 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1415 count = 0; 1416 for (i=0; i<nrecvs; i++) { 1417 values = rvalues + i*nmax; 1418 for (j=0; j<lens[i]; j++) { 1419 lrows[count++] = values[j] - base; 1420 } 1421 } 1422 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1423 ierr = PetscFree(lens);CHKERRQ(ierr); 1424 ierr = PetscFree(owner);CHKERRQ(ierr); 1425 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1426 1427 /* actually zap the local rows */ 1428 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1429 PetscLogObjectParent(A,istmp); 1430 1431 /* 1432 Zero the required rows. If the "diagonal block" of the matrix 1433 is square and the user wishes to set the diagonal we use seperate 1434 code so that MatSetValues() is not called for each diagonal allocating 1435 new memory, thus calling lots of mallocs and slowing things down. 1436 1437 Contributed by: Mathew Knepley 1438 */ 1439 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1440 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1441 if (diag && (l->A->M == l->A->N)) { 1442 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1443 } else if (diag) { 1444 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1445 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1446 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1447 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1448 } 1449 for (i=0; i<slen; i++) { 1450 row = lrows[i] + rstart_bs; 1451 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1452 } 1453 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1454 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1455 } else { 1456 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1457 } 1458 1459 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1460 ierr = PetscFree(lrows);CHKERRQ(ierr); 1461 1462 /* wait on sends */ 1463 if (nsends) { 1464 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1465 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1466 ierr = PetscFree(send_status);CHKERRQ(ierr); 1467 } 1468 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1469 ierr = PetscFree(svalues);CHKERRQ(ierr); 1470 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1476 int MatPrintHelp_MPISBAIJ(Mat A) 1477 { 1478 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1479 MPI_Comm comm = A->comm; 1480 static int called = 0; 1481 int ierr; 1482 1483 PetscFunctionBegin; 1484 if (!a->rank) { 1485 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1486 } 1487 if (called) {PetscFunctionReturn(0);} else called = 1; 1488 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1489 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1495 int MatSetUnfactored_MPISBAIJ(Mat A) 1496 { 1497 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1498 int ierr; 1499 1500 PetscFunctionBegin; 1501 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1502 PetscFunctionReturn(0); 1503 } 1504 1505 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "MatEqual_MPISBAIJ" 1509 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1510 { 1511 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1512 Mat a,b,c,d; 1513 PetscTruth flg; 1514 int ierr; 1515 1516 PetscFunctionBegin; 1517 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr); 1518 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1519 a = matA->A; b = matA->B; 1520 c = matB->A; d = matB->B; 1521 1522 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1523 if (flg == PETSC_TRUE) { 1524 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1525 } 1526 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1532 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1533 { 1534 int ierr; 1535 1536 PetscFunctionBegin; 1537 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 /* -------------------------------------------------------------------*/ 1541 static struct _MatOps MatOps_Values = { 1542 MatSetValues_MPISBAIJ, 1543 MatGetRow_MPISBAIJ, 1544 MatRestoreRow_MPISBAIJ, 1545 MatMult_MPISBAIJ, 1546 MatMultAdd_MPISBAIJ, 1547 MatMultTranspose_MPISBAIJ, 1548 MatMultTransposeAdd_MPISBAIJ, 1549 0, 1550 0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 MatRelax_MPISBAIJ, 1556 MatTranspose_MPISBAIJ, 1557 MatGetInfo_MPISBAIJ, 1558 MatEqual_MPISBAIJ, 1559 MatGetDiagonal_MPISBAIJ, 1560 MatDiagonalScale_MPISBAIJ, 1561 MatNorm_MPISBAIJ, 1562 MatAssemblyBegin_MPISBAIJ, 1563 MatAssemblyEnd_MPISBAIJ, 1564 0, 1565 MatSetOption_MPISBAIJ, 1566 MatZeroEntries_MPISBAIJ, 1567 MatZeroRows_MPISBAIJ, 1568 0, 1569 0, 1570 0, 1571 0, 1572 MatSetUpPreallocation_MPISBAIJ, 1573 0, 1574 0, 1575 0, 1576 0, 1577 MatDuplicate_MPISBAIJ, 1578 0, 1579 0, 1580 0, 1581 0, 1582 0, 1583 MatGetSubMatrices_MPISBAIJ, 1584 MatIncreaseOverlap_MPISBAIJ, 1585 MatGetValues_MPISBAIJ, 1586 0, 1587 MatPrintHelp_MPISBAIJ, 1588 MatScale_MPISBAIJ, 1589 0, 1590 0, 1591 0, 1592 MatGetBlockSize_MPISBAIJ, 1593 0, 1594 0, 1595 0, 1596 0, 1597 0, 1598 0, 1599 MatSetUnfactored_MPISBAIJ, 1600 0, 1601 MatSetValuesBlocked_MPISBAIJ, 1602 0, 1603 0, 1604 0, 1605 MatGetPetscMaps_Petsc, 1606 0, 1607 0, 1608 0, 1609 0, 1610 0, 1611 0, 1612 MatGetRowMax_MPISBAIJ}; 1613 1614 1615 EXTERN_C_BEGIN 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1618 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1619 { 1620 PetscFunctionBegin; 1621 *a = ((Mat_MPISBAIJ *)A->data)->A; 1622 *iscopy = PETSC_FALSE; 1623 PetscFunctionReturn(0); 1624 } 1625 EXTERN_C_END 1626 1627 EXTERN_C_BEGIN 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatCreate_MPISBAIJ" 1630 int MatCreate_MPISBAIJ(Mat B) 1631 { 1632 Mat_MPISBAIJ *b; 1633 int ierr; 1634 PetscTruth flg; 1635 1636 PetscFunctionBegin; 1637 1638 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1639 B->data = (void*)b; 1640 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1641 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1642 1643 B->ops->destroy = MatDestroy_MPISBAIJ; 1644 B->ops->view = MatView_MPISBAIJ; 1645 B->mapping = 0; 1646 B->factor = 0; 1647 B->assembled = PETSC_FALSE; 1648 1649 B->insertmode = NOT_SET_VALUES; 1650 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1651 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1652 1653 /* build local table of row and column ownerships */ 1654 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1655 b->cowners = b->rowners + b->size + 2; 1656 b->rowners_bs = b->cowners + b->size + 2; 1657 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1658 1659 /* build cache for off array entries formed */ 1660 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1661 b->donotstash = PETSC_FALSE; 1662 b->colmap = PETSC_NULL; 1663 b->garray = PETSC_NULL; 1664 b->roworiented = PETSC_TRUE; 1665 1666 #if defined(PETSC_USE_MAT_SINGLE) 1667 /* stuff for MatSetValues_XXX in single precision */ 1668 b->setvalueslen = 0; 1669 b->setvaluescopy = PETSC_NULL; 1670 #endif 1671 1672 /* stuff used in block assembly */ 1673 b->barray = 0; 1674 1675 /* stuff used for matrix vector multiply */ 1676 b->lvec = 0; 1677 b->Mvctx = 0; 1678 b->slvec0 = 0; 1679 b->slvec0b = 0; 1680 b->slvec1 = 0; 1681 b->slvec1a = 0; 1682 b->slvec1b = 0; 1683 b->sMvctx = 0; 1684 1685 /* stuff for MatGetRow() */ 1686 b->rowindices = 0; 1687 b->rowvalues = 0; 1688 b->getrowactive = PETSC_FALSE; 1689 1690 /* hash table stuff */ 1691 b->ht = 0; 1692 b->hd = 0; 1693 b->ht_size = 0; 1694 b->ht_flag = PETSC_FALSE; 1695 b->ht_fact = 0; 1696 b->ht_total_ct = 0; 1697 b->ht_insert_ct = 0; 1698 1699 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1700 if (flg) { 1701 PetscReal fact = 1.39; 1702 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1703 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1704 if (fact <= 1.0) fact = 1.39; 1705 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1706 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1707 } 1708 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1709 "MatStoreValues_MPISBAIJ", 1710 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1711 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1712 "MatRetrieveValues_MPISBAIJ", 1713 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1714 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1715 "MatGetDiagonalBlock_MPISBAIJ", 1716 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 EXTERN_C_END 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1723 /*@C 1724 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1725 the user should preallocate the matrix storage by setting the parameters 1726 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1727 performance can be increased by more than a factor of 50. 1728 1729 Collective on Mat 1730 1731 Input Parameters: 1732 + A - the matrix 1733 . bs - size of blockk 1734 . d_nz - number of block nonzeros per block row in diagonal portion of local 1735 submatrix (same for all local rows) 1736 . d_nnz - array containing the number of block nonzeros in the various block rows 1737 in the upper triangular and diagonal part of the in diagonal portion of the local 1738 (possibly different for each block row) or PETSC_NULL. You must leave room 1739 for the diagonal entry even if it is zero. 1740 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1741 submatrix (same for all local rows). 1742 - o_nnz - array containing the number of nonzeros in the various block rows of the 1743 off-diagonal portion of the local submatrix (possibly different for 1744 each block row) or PETSC_NULL. 1745 1746 1747 Options Database Keys: 1748 . -mat_no_unroll - uses code that does not unroll the loops in the 1749 block calculations (much slower) 1750 . -mat_block_size - size of the blocks to use 1751 1752 Notes: 1753 1754 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1755 than it must be used on all processors that share the object for that argument. 1756 1757 Storage Information: 1758 For a square global matrix we define each processor's diagonal portion 1759 to be its local rows and the corresponding columns (a square submatrix); 1760 each processor's off-diagonal portion encompasses the remainder of the 1761 local matrix (a rectangular submatrix). 1762 1763 The user can specify preallocated storage for the diagonal part of 1764 the local submatrix with either d_nz or d_nnz (not both). Set 1765 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1766 memory allocation. Likewise, specify preallocated storage for the 1767 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1768 1769 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1770 the figure below we depict these three local rows and all columns (0-11). 1771 1772 .vb 1773 0 1 2 3 4 5 6 7 8 9 10 11 1774 ------------------- 1775 row 3 | o o o d d d o o o o o o 1776 row 4 | o o o d d d o o o o o o 1777 row 5 | o o o d d d o o o o o o 1778 ------------------- 1779 .ve 1780 1781 Thus, any entries in the d locations are stored in the d (diagonal) 1782 submatrix, and any entries in the o locations are stored in the 1783 o (off-diagonal) submatrix. Note that the d matrix is stored in 1784 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1785 1786 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1787 plus the diagonal part of the d matrix, 1788 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1789 In general, for PDE problems in which most nonzeros are near the diagonal, 1790 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1791 or you will get TERRIBLE performance; see the users' manual chapter on 1792 matrices. 1793 1794 Level: intermediate 1795 1796 .keywords: matrix, block, aij, compressed row, sparse, parallel 1797 1798 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1799 @*/ 1800 1801 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1802 { 1803 Mat_MPISBAIJ *b; 1804 int ierr,i,mbs,Mbs; 1805 PetscTruth flg2; 1806 1807 PetscFunctionBegin; 1808 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr); 1809 if (!flg2) PetscFunctionReturn(0); 1810 1811 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1812 1813 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1814 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1815 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1816 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1817 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1818 if (d_nnz) { 1819 for (i=0; i<B->m/bs; i++) { 1820 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1821 } 1822 } 1823 if (o_nnz) { 1824 for (i=0; i<B->m/bs; i++) { 1825 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1826 } 1827 } 1828 B->preallocated = PETSC_TRUE; 1829 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1830 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1831 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1832 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1833 1834 b = (Mat_MPISBAIJ*)B->data; 1835 mbs = B->m/bs; 1836 Mbs = B->M/bs; 1837 if (mbs*bs != B->m) { 1838 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1839 } 1840 1841 b->bs = bs; 1842 b->bs2 = bs*bs; 1843 b->mbs = mbs; 1844 b->nbs = mbs; 1845 b->Mbs = Mbs; 1846 b->Nbs = Mbs; 1847 1848 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1849 b->rowners[0] = 0; 1850 for (i=2; i<=b->size; i++) { 1851 b->rowners[i] += b->rowners[i-1]; 1852 } 1853 b->rstart = b->rowners[b->rank]; 1854 b->rend = b->rowners[b->rank+1]; 1855 b->cstart = b->rstart; 1856 b->cend = b->rend; 1857 for (i=0; i<=b->size; i++) { 1858 b->rowners_bs[i] = b->rowners[i]*bs; 1859 } 1860 b->rstart_bs = b-> rstart*bs; 1861 b->rend_bs = b->rend*bs; 1862 1863 b->cstart_bs = b->cstart*bs; 1864 b->cend_bs = b->cend*bs; 1865 1866 1867 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1868 PetscLogObjectParent(B,b->A); 1869 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1870 PetscLogObjectParent(B,b->B); 1871 1872 /* build cache for off array entries formed */ 1873 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1874 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatCreateMPISBAIJ" 1880 /*@C 1881 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1882 (block compressed row). For good matrix assembly performance 1883 the user should preallocate the matrix storage by setting the parameters 1884 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1885 performance can be increased by more than a factor of 50. 1886 1887 Collective on MPI_Comm 1888 1889 Input Parameters: 1890 + comm - MPI communicator 1891 . bs - size of blockk 1892 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1893 This value should be the same as the local size used in creating the 1894 y vector for the matrix-vector product y = Ax. 1895 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1896 This value should be the same as the local size used in creating the 1897 x vector for the matrix-vector product y = Ax. 1898 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1899 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1900 . d_nz - number of block nonzeros per block row in diagonal portion of local 1901 submatrix (same for all local rows) 1902 . d_nnz - array containing the number of block nonzeros in the various block rows 1903 in the upper triangular portion of the in diagonal portion of the local 1904 (possibly different for each block block row) or PETSC_NULL. 1905 You must leave room for the diagonal entry even if it is zero. 1906 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1907 submatrix (same for all local rows). 1908 - o_nnz - array containing the number of nonzeros in the various block rows of the 1909 off-diagonal portion of the local submatrix (possibly different for 1910 each block row) or PETSC_NULL. 1911 1912 Output Parameter: 1913 . A - the matrix 1914 1915 Options Database Keys: 1916 . -mat_no_unroll - uses code that does not unroll the loops in the 1917 block calculations (much slower) 1918 . -mat_block_size - size of the blocks to use 1919 . -mat_mpi - use the parallel matrix data structures even on one processor 1920 (defaults to using SeqBAIJ format on one processor) 1921 1922 Notes: 1923 The user MUST specify either the local or global matrix dimensions 1924 (possibly both). 1925 1926 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1927 than it must be used on all processors that share the object for that argument. 1928 1929 Storage Information: 1930 For a square global matrix we define each processor's diagonal portion 1931 to be its local rows and the corresponding columns (a square submatrix); 1932 each processor's off-diagonal portion encompasses the remainder of the 1933 local matrix (a rectangular submatrix). 1934 1935 The user can specify preallocated storage for the diagonal part of 1936 the local submatrix with either d_nz or d_nnz (not both). Set 1937 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1938 memory allocation. Likewise, specify preallocated storage for the 1939 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1940 1941 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1942 the figure below we depict these three local rows and all columns (0-11). 1943 1944 .vb 1945 0 1 2 3 4 5 6 7 8 9 10 11 1946 ------------------- 1947 row 3 | o o o d d d o o o o o o 1948 row 4 | o o o d d d o o o o o o 1949 row 5 | o o o d d d o o o o o o 1950 ------------------- 1951 .ve 1952 1953 Thus, any entries in the d locations are stored in the d (diagonal) 1954 submatrix, and any entries in the o locations are stored in the 1955 o (off-diagonal) submatrix. Note that the d matrix is stored in 1956 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1957 1958 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1959 plus the diagonal part of the d matrix, 1960 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1961 In general, for PDE problems in which most nonzeros are near the diagonal, 1962 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1963 or you will get TERRIBLE performance; see the users' manual chapter on 1964 matrices. 1965 1966 Level: intermediate 1967 1968 .keywords: matrix, block, aij, compressed row, sparse, parallel 1969 1970 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1971 @*/ 1972 1973 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1974 { 1975 int ierr,size; 1976 1977 PetscFunctionBegin; 1978 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1979 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1980 if (size > 1) { 1981 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1982 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1983 } else { 1984 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1985 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1986 } 1987 PetscFunctionReturn(0); 1988 } 1989 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1993 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1994 { 1995 Mat mat; 1996 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1997 int ierr,len=0; 1998 1999 PetscFunctionBegin; 2000 *newmat = 0; 2001 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2002 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 2003 mat->preallocated = PETSC_TRUE; 2004 a = (Mat_MPISBAIJ*)mat->data; 2005 a->bs = oldmat->bs; 2006 a->bs2 = oldmat->bs2; 2007 a->mbs = oldmat->mbs; 2008 a->nbs = oldmat->nbs; 2009 a->Mbs = oldmat->Mbs; 2010 a->Nbs = oldmat->Nbs; 2011 2012 a->rstart = oldmat->rstart; 2013 a->rend = oldmat->rend; 2014 a->cstart = oldmat->cstart; 2015 a->cend = oldmat->cend; 2016 a->size = oldmat->size; 2017 a->rank = oldmat->rank; 2018 a->donotstash = oldmat->donotstash; 2019 a->roworiented = oldmat->roworiented; 2020 a->rowindices = 0; 2021 a->rowvalues = 0; 2022 a->getrowactive = PETSC_FALSE; 2023 a->barray = 0; 2024 a->rstart_bs = oldmat->rstart_bs; 2025 a->rend_bs = oldmat->rend_bs; 2026 a->cstart_bs = oldmat->cstart_bs; 2027 a->cend_bs = oldmat->cend_bs; 2028 2029 /* hash table stuff */ 2030 a->ht = 0; 2031 a->hd = 0; 2032 a->ht_size = 0; 2033 a->ht_flag = oldmat->ht_flag; 2034 a->ht_fact = oldmat->ht_fact; 2035 a->ht_total_ct = 0; 2036 a->ht_insert_ct = 0; 2037 2038 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 2039 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2040 a->cowners = a->rowners + a->size + 2; 2041 a->rowners_bs = a->cowners + a->size + 2; 2042 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2043 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2044 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2045 if (oldmat->colmap) { 2046 #if defined (PETSC_USE_CTABLE) 2047 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2048 #else 2049 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2050 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2051 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2052 #endif 2053 } else a->colmap = 0; 2054 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2055 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2056 PetscLogObjectMemory(mat,len*sizeof(int)); 2057 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2058 } else a->garray = 0; 2059 2060 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2061 PetscLogObjectParent(mat,a->lvec); 2062 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2063 2064 PetscLogObjectParent(mat,a->Mvctx); 2065 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2066 PetscLogObjectParent(mat,a->A); 2067 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2068 PetscLogObjectParent(mat,a->B); 2069 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2070 *newmat = mat; 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #include "petscsys.h" 2075 2076 EXTERN_C_BEGIN 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatLoad_MPISBAIJ" 2079 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2080 { 2081 Mat A; 2082 int i,nz,ierr,j,rstart,rend,fd; 2083 PetscScalar *vals,*buf; 2084 MPI_Comm comm = ((PetscObject)viewer)->comm; 2085 MPI_Status status; 2086 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2087 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2088 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2089 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2090 int dcount,kmax,k,nzcount,tmp; 2091 2092 PetscFunctionBegin; 2093 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2094 2095 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2096 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2097 if (!rank) { 2098 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2099 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2100 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2101 if (header[3] < 0) { 2102 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2103 } 2104 } 2105 2106 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2107 M = header[1]; N = header[2]; 2108 2109 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2110 2111 /* 2112 This code adds extra rows to make sure the number of rows is 2113 divisible by the blocksize 2114 */ 2115 Mbs = M/bs; 2116 extra_rows = bs - M + bs*(Mbs); 2117 if (extra_rows == bs) extra_rows = 0; 2118 else Mbs++; 2119 if (extra_rows &&!rank) { 2120 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2121 } 2122 2123 /* determine ownership of all rows */ 2124 mbs = Mbs/size + ((Mbs % size) > rank); 2125 m = mbs*bs; 2126 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2127 browners = rowners + size + 1; 2128 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2129 rowners[0] = 0; 2130 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2131 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2132 rstart = rowners[rank]; 2133 rend = rowners[rank+1]; 2134 2135 /* distribute row lengths to all processors */ 2136 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2137 if (!rank) { 2138 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2139 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2140 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2141 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2142 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2143 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2144 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2145 } else { 2146 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2147 } 2148 2149 if (!rank) { /* procs[0] */ 2150 /* calculate the number of nonzeros on each processor */ 2151 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2152 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2153 for (i=0; i<size; i++) { 2154 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2155 procsnz[i] += rowlengths[j]; 2156 } 2157 } 2158 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2159 2160 /* determine max buffer needed and allocate it */ 2161 maxnz = 0; 2162 for (i=0; i<size; i++) { 2163 maxnz = PetscMax(maxnz,procsnz[i]); 2164 } 2165 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2166 2167 /* read in my part of the matrix column indices */ 2168 nz = procsnz[0]; 2169 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2170 mycols = ibuf; 2171 if (size == 1) nz -= extra_rows; 2172 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2173 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2174 2175 /* read in every ones (except the last) and ship off */ 2176 for (i=1; i<size-1; i++) { 2177 nz = procsnz[i]; 2178 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2179 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2180 } 2181 /* read in the stuff for the last proc */ 2182 if (size != 1) { 2183 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2184 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2185 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2186 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2187 } 2188 ierr = PetscFree(cols);CHKERRQ(ierr); 2189 } else { /* procs[i], i>0 */ 2190 /* determine buffer space needed for message */ 2191 nz = 0; 2192 for (i=0; i<m; i++) { 2193 nz += locrowlens[i]; 2194 } 2195 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2196 mycols = ibuf; 2197 /* receive message of column indices*/ 2198 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2199 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2200 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2201 } 2202 2203 /* loop over local rows, determining number of off diagonal entries */ 2204 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2205 odlens = dlens + (rend-rstart); 2206 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2207 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2208 masked1 = mask + Mbs; 2209 masked2 = masked1 + Mbs; 2210 rowcount = 0; nzcount = 0; 2211 for (i=0; i<mbs; i++) { 2212 dcount = 0; 2213 odcount = 0; 2214 for (j=0; j<bs; j++) { 2215 kmax = locrowlens[rowcount]; 2216 for (k=0; k<kmax; k++) { 2217 tmp = mycols[nzcount++]/bs; /* block col. index */ 2218 if (!mask[tmp]) { 2219 mask[tmp] = 1; 2220 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2221 else masked1[dcount++] = tmp; /* entry in diag portion */ 2222 } 2223 } 2224 rowcount++; 2225 } 2226 2227 dlens[i] = dcount; /* d_nzz[i] */ 2228 odlens[i] = odcount; /* o_nzz[i] */ 2229 2230 /* zero out the mask elements we set */ 2231 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2232 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2233 } 2234 2235 /* create our matrix */ 2236 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2237 CHKERRQ(ierr); 2238 A = *newmat; 2239 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2240 2241 if (!rank) { 2242 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2243 /* read in my part of the matrix numerical values */ 2244 nz = procsnz[0]; 2245 vals = buf; 2246 mycols = ibuf; 2247 if (size == 1) nz -= extra_rows; 2248 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2249 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2250 2251 /* insert into matrix */ 2252 jj = rstart*bs; 2253 for (i=0; i<m; i++) { 2254 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2255 mycols += locrowlens[i]; 2256 vals += locrowlens[i]; 2257 jj++; 2258 } 2259 2260 /* read in other processors (except the last one) and ship out */ 2261 for (i=1; i<size-1; i++) { 2262 nz = procsnz[i]; 2263 vals = buf; 2264 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2265 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2266 } 2267 /* the last proc */ 2268 if (size != 1){ 2269 nz = procsnz[i] - extra_rows; 2270 vals = buf; 2271 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2272 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2273 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2274 } 2275 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2276 2277 } else { 2278 /* receive numeric values */ 2279 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2280 2281 /* receive message of values*/ 2282 vals = buf; 2283 mycols = ibuf; 2284 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2285 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2286 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2287 2288 /* insert into matrix */ 2289 jj = rstart*bs; 2290 for (i=0; i<m; i++) { 2291 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2292 mycols += locrowlens[i]; 2293 vals += locrowlens[i]; 2294 jj++; 2295 } 2296 } 2297 2298 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2299 ierr = PetscFree(buf);CHKERRQ(ierr); 2300 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2301 ierr = PetscFree(rowners);CHKERRQ(ierr); 2302 ierr = PetscFree(dlens);CHKERRQ(ierr); 2303 ierr = PetscFree(mask);CHKERRQ(ierr); 2304 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2305 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2306 PetscFunctionReturn(0); 2307 } 2308 EXTERN_C_END 2309 2310 #undef __FUNCT__ 2311 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2312 /*@ 2313 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2314 2315 Input Parameters: 2316 . mat - the matrix 2317 . fact - factor 2318 2319 Collective on Mat 2320 2321 Level: advanced 2322 2323 Notes: 2324 This can also be set by the command line option: -mat_use_hash_table fact 2325 2326 .keywords: matrix, hashtable, factor, HT 2327 2328 .seealso: MatSetOption() 2329 @*/ 2330 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2331 { 2332 PetscFunctionBegin; 2333 SETERRQ(1,"Function not yet written for SBAIJ format"); 2334 /* PetscFunctionReturn(0); */ 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2339 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2340 { 2341 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2342 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2343 PetscReal atmp; 2344 PetscReal *work,*svalues,*rvalues; 2345 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2346 int rank,size,*rowners_bs,dest,count,source; 2347 PetscScalar *va; 2348 MatScalar *ba; 2349 MPI_Status stat; 2350 2351 PetscFunctionBegin; 2352 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2353 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2354 2355 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 2356 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 2357 2358 bs = a->bs; 2359 mbs = a->mbs; 2360 Mbs = a->Mbs; 2361 ba = b->a; 2362 bi = b->i; 2363 bj = b->j; 2364 /* 2365 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2366 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2367 */ 2368 2369 /* find ownerships */ 2370 rowners_bs = a->rowners_bs; 2371 /* 2372 if (!rank){ 2373 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2374 } 2375 */ 2376 2377 /* each proc creates an array to be distributed */ 2378 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2379 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2380 2381 /* row_max for B */ 2382 if (rank != size-1){ 2383 for (i=0; i<mbs; i++) { 2384 ncols = bi[1] - bi[0]; bi++; 2385 brow = bs*i; 2386 for (j=0; j<ncols; j++){ 2387 bcol = bs*(*bj); 2388 for (kcol=0; kcol<bs; kcol++){ 2389 col = bcol + kcol; /* local col index */ 2390 col += rowners_bs[rank+1]; /* global col index */ 2391 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2392 for (krow=0; krow<bs; krow++){ 2393 atmp = PetscAbsScalar(*ba); ba++; 2394 row = brow + krow; /* local row index */ 2395 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2396 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2397 if (work[col] < atmp) work[col] = atmp; 2398 } 2399 } 2400 bj++; 2401 } 2402 } 2403 /* 2404 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2405 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2406 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2407 */ 2408 2409 /* send values to its owners */ 2410 for (dest=rank+1; dest<size; dest++){ 2411 svalues = work + rowners_bs[dest]; 2412 count = rowners_bs[dest+1]-rowners_bs[dest]; 2413 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PETSC_COMM_WORLD);CHKERRQ(ierr); 2414 /* 2415 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2416 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2417 */ 2418 } 2419 } 2420 2421 /* receive values */ 2422 if (rank){ 2423 rvalues = work; 2424 count = rowners_bs[rank+1]-rowners_bs[rank]; 2425 for (source=0; source<rank; source++){ 2426 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);CHKERRQ(ierr); 2427 /* process values */ 2428 for (i=0; i<count; i++){ 2429 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2430 } 2431 /* 2432 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2433 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2434 */ 2435 } 2436 } 2437 2438 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2439 ierr = PetscFree(work);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatRelax_MPISBAIJ" 2445 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2446 { 2447 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2448 int ierr,mbs=mat->mbs,bs=mat->bs; 2449 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2450 Vec bb1; 2451 2452 PetscFunctionBegin; 2453 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2454 if (bs > 1) 2455 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2456 2457 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2458 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2459 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2460 its--; 2461 } 2462 2463 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2464 while (its--){ 2465 2466 /* lower triangular part: slvec0b = - B^T*xx */ 2467 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2468 2469 /* copy xx into slvec0a */ 2470 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2471 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2472 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2473 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2474 2475 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2476 2477 /* copy bb into slvec1a */ 2478 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2479 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2480 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2481 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2482 2483 /* set slvec1b = 0 */ 2484 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2485 2486 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2487 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2488 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2489 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2490 2491 /* upper triangular part: bb1 = bb1 - B*x */ 2492 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2493 2494 /* local diagonal sweep */ 2495 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2496 } 2497 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2498 } else { 2499 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2500 } 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2506 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2507 { 2508 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2509 int ierr; 2510 PetscScalar mone=-1.0; 2511 Vec lvec1,bb1; 2512 2513 PetscFunctionBegin; 2514 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2515 if (mat->bs > 1) 2516 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2517 2518 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2519 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2520 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2521 its--; 2522 } 2523 2524 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2525 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2526 while (its--){ 2527 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2528 2529 /* lower diagonal part: bb1 = bb - B^T*xx */ 2530 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2531 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2532 2533 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2534 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2535 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2536 2537 /* upper diagonal part: bb1 = bb1 - B*x */ 2538 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2539 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2540 2541 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2542 2543 /* diagonal sweep */ 2544 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2545 } 2546 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2547 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2548 } else { 2549 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2550 } 2551 PetscFunctionReturn(0); 2552 } 2553 2554