1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 12 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 13 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *); 14 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode); 15 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 18 extern int MatPrintHelp_SeqSBAIJ(Mat); 19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 23 extern int MatUseSpooles_MPISBAIJ(Mat); 24 25 /* UGLY, ugly, ugly 26 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 27 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 28 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 29 converts the entries into single precision and then calls ..._MatScalar() to put them 30 into the single precision data structures. 31 */ 32 #if defined(PETSC_USE_MAT_SINGLE) 33 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 34 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 35 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 36 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 37 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 38 #else 39 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 40 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 41 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 42 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 43 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 44 #endif 45 46 EXTERN_C_BEGIN 47 #undef __FUNCT__ 48 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 49 int MatStoreValues_MPISBAIJ(Mat mat) 50 { 51 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 52 int ierr; 53 54 PetscFunctionBegin; 55 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 56 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 57 PetscFunctionReturn(0); 58 } 59 EXTERN_C_END 60 61 EXTERN_C_BEGIN 62 #undef __FUNCT__ 63 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 64 int MatRetrieveValues_MPISBAIJ(Mat mat) 65 { 66 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 67 int ierr; 68 69 PetscFunctionBegin; 70 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 71 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 EXTERN_C_END 75 76 /* 77 Local utility routine that creates a mapping from the global column 78 number to the local number in the off-diagonal part of the local 79 storage of the matrix. This is done in a non scable way since the 80 length of colmap equals the global matrix length. 81 */ 82 #undef __FUNCT__ 83 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private" 84 static int CreateColmap_MPISBAIJ_Private(Mat mat) 85 { 86 PetscFunctionBegin; 87 SETERRQ(1,"Function not yet written for SBAIJ format"); 88 /* PetscFunctionReturn(0); */ 89 } 90 91 #define CHUNKSIZE 10 92 93 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 94 { \ 95 \ 96 brow = row/bs; \ 97 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 98 rmax = aimax[brow]; nrow = ailen[brow]; \ 99 bcol = col/bs; \ 100 ridx = row % bs; cidx = col % bs; \ 101 low = 0; high = nrow; \ 102 while (high-low > 3) { \ 103 t = (low+high)/2; \ 104 if (rp[t] > bcol) high = t; \ 105 else low = t; \ 106 } \ 107 for (_i=low; _i<high; _i++) { \ 108 if (rp[_i] > bcol) break; \ 109 if (rp[_i] == bcol) { \ 110 bap = ap + bs2*_i + bs*cidx + ridx; \ 111 if (addv == ADD_VALUES) *bap += value; \ 112 else *bap = value; \ 113 goto a_noinsert; \ 114 } \ 115 } \ 116 if (a->nonew == 1) goto a_noinsert; \ 117 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 118 if (nrow >= rmax) { \ 119 /* there is no extra room in row, therefore enlarge */ \ 120 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 121 MatScalar *new_a; \ 122 \ 123 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 124 \ 125 /* malloc new storage space */ \ 126 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 127 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 128 new_j = (int*)(new_a + bs2*new_nz); \ 129 new_i = new_j + new_nz; \ 130 \ 131 /* copy over old data into new slots */ \ 132 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 133 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 134 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 135 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 136 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 137 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 139 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 140 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 141 /* free up old matrix storage */ \ 142 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 143 if (!a->singlemalloc) { \ 144 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 145 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 146 } \ 147 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 148 a->singlemalloc = PETSC_TRUE; \ 149 \ 150 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 151 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 152 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 153 a->s_maxnz += bs2*CHUNKSIZE; \ 154 a->reallocs++; \ 155 a->s_nz++; \ 156 } \ 157 N = nrow++ - 1; \ 158 /* shift up all the later entries in this row */ \ 159 for (ii=N; ii>=_i; ii--) { \ 160 rp[ii+1] = rp[ii]; \ 161 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 162 } \ 163 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 164 rp[_i] = bcol; \ 165 ap[bs2*_i + bs*cidx + ridx] = value; \ 166 a_noinsert:; \ 167 ailen[brow] = nrow; \ 168 } 169 #ifndef MatSetValues_SeqBAIJ_B_Private 170 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 171 { \ 172 brow = row/bs; \ 173 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 174 rmax = bimax[brow]; nrow = bilen[brow]; \ 175 bcol = col/bs; \ 176 ridx = row % bs; cidx = col % bs; \ 177 low = 0; high = nrow; \ 178 while (high-low > 3) { \ 179 t = (low+high)/2; \ 180 if (rp[t] > bcol) high = t; \ 181 else low = t; \ 182 } \ 183 for (_i=low; _i<high; _i++) { \ 184 if (rp[_i] > bcol) break; \ 185 if (rp[_i] == bcol) { \ 186 bap = ap + bs2*_i + bs*cidx + ridx; \ 187 if (addv == ADD_VALUES) *bap += value; \ 188 else *bap = value; \ 189 goto b_noinsert; \ 190 } \ 191 } \ 192 if (b->nonew == 1) goto b_noinsert; \ 193 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 194 if (nrow >= rmax) { \ 195 /* there is no extra room in row, therefore enlarge */ \ 196 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 197 MatScalar *new_a; \ 198 \ 199 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 200 \ 201 /* malloc new storage space */ \ 202 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 203 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 204 new_j = (int*)(new_a + bs2*new_nz); \ 205 new_i = new_j + new_nz; \ 206 \ 207 /* copy over old data into new slots */ \ 208 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 209 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 210 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 211 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 212 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 213 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 214 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 215 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 216 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 217 /* free up old matrix storage */ \ 218 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 219 if (!b->singlemalloc) { \ 220 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 221 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 222 } \ 223 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 224 b->singlemalloc = PETSC_TRUE; \ 225 \ 226 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 227 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 228 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 229 b->maxnz += bs2*CHUNKSIZE; \ 230 b->reallocs++; \ 231 b->nz++; \ 232 } \ 233 N = nrow++ - 1; \ 234 /* shift up all the later entries in this row */ \ 235 for (ii=N; ii>=_i; ii--) { \ 236 rp[ii+1] = rp[ii]; \ 237 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 238 } \ 239 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 240 rp[_i] = bcol; \ 241 ap[bs2*_i + bs*cidx + ridx] = value; \ 242 b_noinsert:; \ 243 bilen[brow] = nrow; \ 244 } 245 #endif 246 247 #if defined(PETSC_USE_MAT_SINGLE) 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatSetValues_MPISBAIJ" 250 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 251 { 252 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 253 int ierr,i,N = m*n; 254 MatScalar *vsingle; 255 256 PetscFunctionBegin; 257 if (N > b->setvalueslen) { 258 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 259 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 260 b->setvalueslen = N; 261 } 262 vsingle = b->setvaluescopy; 263 264 for (i=0; i<N; i++) { 265 vsingle[i] = v[i]; 266 } 267 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 273 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 274 { 275 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 276 int ierr,i,N = m*n*b->bs2; 277 MatScalar *vsingle; 278 279 PetscFunctionBegin; 280 if (N > b->setvalueslen) { 281 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 282 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 283 b->setvalueslen = N; 284 } 285 vsingle = b->setvaluescopy; 286 for (i=0; i<N; i++) { 287 vsingle[i] = v[i]; 288 } 289 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 295 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 296 { 297 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 298 int ierr,i,N = m*n; 299 MatScalar *vsingle; 300 301 PetscFunctionBegin; 302 SETERRQ(1,"Function not yet written for SBAIJ format"); 303 /* PetscFunctionReturn(0); */ 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 308 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 309 { 310 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 311 int ierr,i,N = m*n*b->bs2; 312 MatScalar *vsingle; 313 314 PetscFunctionBegin; 315 SETERRQ(1,"Function not yet written for SBAIJ format"); 316 /* PetscFunctionReturn(0); */ 317 } 318 #endif 319 320 /* Only add/insert a(i,j) with i<=j (blocks). 321 Any a(i,j) with i>j input by user is ingored. 322 */ 323 #undef __FUNCT__ 324 #define __FUNCT__ "MatSetValues_MPIBAIJ" 325 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 326 { 327 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 328 MatScalar value; 329 PetscTruth roworiented = baij->roworiented; 330 int ierr,i,j,row,col; 331 int rstart_orig=baij->rstart_bs; 332 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 333 int cend_orig=baij->cend_bs,bs=baij->bs; 334 335 /* Some Variables required in the macro */ 336 Mat A = baij->A; 337 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 338 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 339 MatScalar *aa=a->a; 340 341 Mat B = baij->B; 342 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 343 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 344 MatScalar *ba=b->a; 345 346 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 347 int low,high,t,ridx,cidx,bs2=a->bs2; 348 MatScalar *ap,*bap; 349 350 /* for stash */ 351 int n_loc, *in_loc=0; 352 MatScalar *v_loc=0; 353 354 PetscFunctionBegin; 355 356 if(!baij->donotstash){ 357 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 358 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 359 } 360 361 for (i=0; i<m; i++) { 362 if (im[i] < 0) continue; 363 #if defined(PETSC_USE_BOPT_g) 364 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 365 #endif 366 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 367 row = im[i] - rstart_orig; /* local row index */ 368 for (j=0; j<n; j++) { 369 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 370 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 371 col = in[j] - cstart_orig; /* local col index */ 372 brow = row/bs; bcol = col/bs; 373 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 374 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 375 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 376 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 377 } else if (in[j] < 0) continue; 378 #if defined(PETSC_USE_BOPT_g) 379 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 380 #endif 381 else { /* off-diag entry (B) */ 382 if (mat->was_assembled) { 383 if (!baij->colmap) { 384 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 385 } 386 #if defined (PETSC_USE_CTABLE) 387 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 388 col = col - 1; 389 #else 390 col = baij->colmap[in[j]/bs] - 1; 391 #endif 392 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 393 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 394 col = in[j]; 395 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 396 B = baij->B; 397 b = (Mat_SeqBAIJ*)(B)->data; 398 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 399 ba=b->a; 400 } else col += in[j]%bs; 401 } else col = in[j]; 402 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 403 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 404 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 405 } 406 } 407 } else { /* off processor entry */ 408 if (!baij->donotstash) { 409 n_loc = 0; 410 for (j=0; j<n; j++){ 411 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 412 in_loc[n_loc] = in[j]; 413 if (roworiented) { 414 v_loc[n_loc] = v[i*n+j]; 415 } else { 416 v_loc[n_loc] = v[j*m+i]; 417 } 418 n_loc++; 419 } 420 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 421 } 422 } 423 } 424 425 if(!baij->donotstash){ 426 ierr = PetscFree(in_loc);CHKERRQ(ierr); 427 ierr = PetscFree(v_loc);CHKERRQ(ierr); 428 } 429 PetscFunctionReturn(0); 430 } 431 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 434 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 435 { 436 PetscFunctionBegin; 437 SETERRQ(1,"Function not yet written for SBAIJ format"); 438 /* PetscFunctionReturn(0); */ 439 } 440 441 #define HASH_KEY 0.6180339887 442 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 443 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 444 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 447 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 448 { 449 PetscFunctionBegin; 450 SETERRQ(1,"Function not yet written for SBAIJ format"); 451 /* PetscFunctionReturn(0); */ 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 456 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 457 { 458 PetscFunctionBegin; 459 SETERRQ(1,"Function not yet written for SBAIJ format"); 460 /* PetscFunctionReturn(0); */ 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatGetValues_MPISBAIJ" 465 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 466 { 467 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 468 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 469 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 470 471 PetscFunctionBegin; 472 for (i=0; i<m; i++) { 473 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 474 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 475 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 476 row = idxm[i] - bsrstart; 477 for (j=0; j<n; j++) { 478 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 479 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 480 if (idxn[j] >= bscstart && idxn[j] < bscend){ 481 col = idxn[j] - bscstart; 482 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 483 } else { 484 if (!baij->colmap) { 485 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 486 } 487 #if defined (PETSC_USE_CTABLE) 488 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 489 data --; 490 #else 491 data = baij->colmap[idxn[j]/bs]-1; 492 #endif 493 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 494 else { 495 col = data + idxn[j]%bs; 496 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 497 } 498 } 499 } 500 } else { 501 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 502 } 503 } 504 PetscFunctionReturn(0); 505 } 506 507 #undef __FUNCT__ 508 #define __FUNCT__ "MatNorm_MPISBAIJ" 509 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 510 { 511 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 512 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 513 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 514 int ierr; 515 PetscReal sum[2],*lnorm2; 516 517 PetscFunctionBegin; 518 if (baij->size == 1) { 519 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 520 } else { 521 if (type == NORM_FROBENIUS) { 522 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 523 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 524 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 525 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 526 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 527 /* 528 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 529 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 530 */ 531 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 532 /* 533 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 534 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 535 536 *norm = sqrt(sum[0] + 2*sum[1]); 537 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 538 } else { 539 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 540 } 541 } 542 PetscFunctionReturn(0); 543 } 544 545 /* 546 Creates the hash table, and sets the table 547 This table is created only once. 548 If new entried need to be added to the matrix 549 then the hash table has to be destroyed and 550 recreated. 551 */ 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 554 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 555 { 556 PetscFunctionBegin; 557 SETERRQ(1,"Function not yet written for SBAIJ format"); 558 /* PetscFunctionReturn(0); */ 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 563 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 564 { 565 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 566 int ierr,nstash,reallocs; 567 InsertMode addv; 568 569 PetscFunctionBegin; 570 if (baij->donotstash) { 571 PetscFunctionReturn(0); 572 } 573 574 /* make sure all processors are either in INSERTMODE or ADDMODE */ 575 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 576 if (addv == (ADD_VALUES|INSERT_VALUES)) { 577 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 578 } 579 mat->insertmode = addv; /* in case this processor had no cache */ 580 581 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 582 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 583 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 584 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 585 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 586 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 592 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 593 { 594 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 595 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 596 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 597 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 598 int *row,*col,other_disassembled; 599 PetscTruth r1,r2,r3; 600 MatScalar *val; 601 InsertMode addv = mat->insertmode; 602 #if defined(PETSC_HAVE_SPOOLES) 603 PetscTruth flag; 604 #endif 605 606 PetscFunctionBegin; 607 608 if (!baij->donotstash) { 609 while (1) { 610 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 611 /* 612 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 613 PetscSynchronizedFlush(PETSC_COMM_WORLD); 614 */ 615 if (!flg) break; 616 617 for (i=0; i<n;) { 618 /* Now identify the consecutive vals belonging to the same row */ 619 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 620 if (j < n) ncols = j-i; 621 else ncols = n-i; 622 /* Now assemble all these values with a single function call */ 623 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 624 i = j; 625 } 626 } 627 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 628 /* Now process the block-stash. Since the values are stashed column-oriented, 629 set the roworiented flag to column oriented, and after MatSetValues() 630 restore the original flags */ 631 r1 = baij->roworiented; 632 r2 = a->roworiented; 633 r3 = b->roworiented; 634 baij->roworiented = PETSC_FALSE; 635 a->roworiented = PETSC_FALSE; 636 b->roworiented = PETSC_FALSE; 637 while (1) { 638 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 639 if (!flg) break; 640 641 for (i=0; i<n;) { 642 /* Now identify the consecutive vals belonging to the same row */ 643 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 644 if (j < n) ncols = j-i; 645 else ncols = n-i; 646 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 647 i = j; 648 } 649 } 650 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 651 baij->roworiented = r1; 652 a->roworiented = r2; 653 b->roworiented = r3; 654 } 655 656 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 657 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 658 659 /* determine if any processor has disassembled, if so we must 660 also disassemble ourselfs, in order that we may reassemble. */ 661 /* 662 if nonzero structure of submatrix B cannot change then we know that 663 no processor disassembled thus we can skip this stuff 664 */ 665 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 666 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 667 if (mat->was_assembled && !other_disassembled) { 668 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 669 } 670 } 671 672 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 673 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 674 } 675 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 676 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 677 678 #if defined(PETSC_USE_BOPT_g) 679 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 680 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 681 baij->ht_total_ct = 0; 682 baij->ht_insert_ct = 0; 683 } 684 #endif 685 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 686 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 687 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 688 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 689 } 690 691 if (baij->rowvalues) { 692 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 693 baij->rowvalues = 0; 694 } 695 696 #if defined(PETSC_HAVE_SPOOLES) 697 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 698 if (flag) { ierr = MatUseSpooles_MPISBAIJ(mat);CHKERRQ(ierr); } 699 #endif 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 705 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 706 { 707 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 708 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 709 PetscTruth isascii,isdraw; 710 PetscViewer sviewer; 711 PetscViewerFormat format; 712 713 PetscFunctionBegin; 714 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 715 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 716 if (isascii) { 717 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 718 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 719 MatInfo info; 720 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 721 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 722 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 723 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 724 baij->bs,(int)info.memory);CHKERRQ(ierr); 725 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 726 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 727 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 728 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 729 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 730 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } else if (format == PETSC_VIEWER_ASCII_INFO) { 733 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 734 PetscFunctionReturn(0); 735 } 736 } 737 738 if (isdraw) { 739 PetscDraw draw; 740 PetscTruth isnull; 741 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 742 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 743 } 744 745 if (size == 1) { 746 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 747 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 748 } else { 749 /* assemble the entire matrix onto first processor. */ 750 Mat A; 751 Mat_SeqSBAIJ *Aloc; 752 Mat_SeqBAIJ *Bloc; 753 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 754 MatScalar *a; 755 756 if (!rank) { 757 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 758 } else { 759 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 760 } 761 PetscLogObjectParent(mat,A); 762 763 /* copy over the A part */ 764 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 765 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 766 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 767 768 for (i=0; i<mbs; i++) { 769 rvals[0] = bs*(baij->rstart + i); 770 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 771 for (j=ai[i]; j<ai[i+1]; j++) { 772 col = (baij->cstart+aj[j])*bs; 773 for (k=0; k<bs; k++) { 774 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 775 col++; a += bs; 776 } 777 } 778 } 779 /* copy over the B part */ 780 Bloc = (Mat_SeqBAIJ*)baij->B->data; 781 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 782 for (i=0; i<mbs; i++) { 783 rvals[0] = bs*(baij->rstart + i); 784 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 785 for (j=ai[i]; j<ai[i+1]; j++) { 786 col = baij->garray[aj[j]]*bs; 787 for (k=0; k<bs; k++) { 788 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 789 col++; a += bs; 790 } 791 } 792 } 793 ierr = PetscFree(rvals);CHKERRQ(ierr); 794 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 795 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 796 /* 797 Everyone has to call to draw the matrix since the graphics waits are 798 synchronized across all processors that share the PetscDraw object 799 */ 800 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 801 if (!rank) { 802 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 803 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 804 } 805 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 806 ierr = MatDestroy(A);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatView_MPISBAIJ" 813 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 814 { 815 int ierr; 816 PetscTruth isascii,isdraw,issocket,isbinary; 817 818 PetscFunctionBegin; 819 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 820 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 821 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 822 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 823 if (isascii || isdraw || issocket || isbinary) { 824 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 825 } else { 826 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 827 } 828 PetscFunctionReturn(0); 829 } 830 831 #undef __FUNCT__ 832 #define __FUNCT__ "MatDestroy_MPISBAIJ" 833 int MatDestroy_MPISBAIJ(Mat mat) 834 { 835 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 836 int ierr; 837 838 PetscFunctionBegin; 839 #if defined(PETSC_USE_LOG) 840 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 841 #endif 842 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 843 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 844 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 845 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 846 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 847 #if defined (PETSC_USE_CTABLE) 848 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 849 #else 850 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 851 #endif 852 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 853 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 854 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 855 if (baij->slvec0) { 856 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 857 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 858 } 859 if (baij->slvec1) { 860 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 861 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 862 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 863 } 864 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 865 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 866 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 867 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 868 #if defined(PETSC_USE_MAT_SINGLE) 869 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 870 #endif 871 ierr = PetscFree(baij);CHKERRQ(ierr); 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "MatMult_MPISBAIJ" 877 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 878 { 879 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 880 int ierr,nt,mbs=a->mbs,bs=a->bs; 881 PetscScalar *x,*from,zero=0.0; 882 883 PetscFunctionBegin; 884 /* 885 PetscSynchronizedPrintf(PETSC_COMM_WORLD," _1comm is called ...\n"); 886 PetscSynchronizedFlush(PETSC_COMM_WORLD); 887 */ 888 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 889 if (nt != A->n) { 890 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 891 } 892 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 893 if (nt != A->m) { 894 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 895 } 896 897 /* diagonal part */ 898 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 899 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 900 901 /* subdiagonal part */ 902 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 903 904 /* copy x into the vec slvec0 */ 905 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 906 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 907 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 908 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 909 910 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 911 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 912 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 913 914 /* supperdiagonal part */ 915 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 916 917 PetscFunctionReturn(0); 918 } 919 920 #undef __FUNCT__ 921 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 922 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 923 { 924 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 925 int ierr,nt; 926 927 PetscFunctionBegin; 928 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 929 if (nt != A->n) { 930 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 931 } 932 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 933 if (nt != A->m) { 934 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 935 } 936 937 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 938 /* do diagonal part */ 939 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 940 /* do supperdiagonal part */ 941 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 942 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 943 /* do subdiagonal part */ 944 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 945 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 946 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 947 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 953 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 954 { 955 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 956 int ierr,mbs=a->mbs,bs=a->bs; 957 PetscScalar *x,*from,zero=0.0; 958 959 PetscFunctionBegin; 960 /* 961 PetscSynchronizedPrintf(PETSC_COMM_WORLD," MatMultAdd is called ...\n"); 962 PetscSynchronizedFlush(PETSC_COMM_WORLD); 963 */ 964 /* diagonal part */ 965 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 966 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 967 968 /* subdiagonal part */ 969 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 970 971 /* copy x into the vec slvec0 */ 972 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 973 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 974 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 975 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 976 977 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 978 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 979 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 980 981 /* supperdiagonal part */ 982 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 983 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 989 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 990 { 991 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 992 int ierr; 993 994 PetscFunctionBegin; 995 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 996 /* do diagonal part */ 997 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 998 /* do supperdiagonal part */ 999 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1000 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1001 1002 /* do subdiagonal part */ 1003 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1004 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1006 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1012 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1013 { 1014 PetscFunctionBegin; 1015 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1016 /* PetscFunctionReturn(0); */ 1017 } 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1021 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1022 { 1023 PetscFunctionBegin; 1024 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1025 /* PetscFunctionReturn(0); */ 1026 } 1027 1028 /* 1029 This only works correctly for square matrices where the subblock A->A is the 1030 diagonal block 1031 */ 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1034 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1035 { 1036 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1037 int ierr; 1038 1039 PetscFunctionBegin; 1040 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1041 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatScale_MPISBAIJ" 1047 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A) 1048 { 1049 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1050 int ierr; 1051 1052 PetscFunctionBegin; 1053 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1054 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1060 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1061 { 1062 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1063 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1064 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1065 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1066 int *cmap,*idx_p,cstart = mat->cstart; 1067 1068 PetscFunctionBegin; 1069 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1070 mat->getrowactive = PETSC_TRUE; 1071 1072 if (!mat->rowvalues && (idx || v)) { 1073 /* 1074 allocate enough space to hold information from the longest row. 1075 */ 1076 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1077 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1078 int max = 1,mbs = mat->mbs,tmp; 1079 for (i=0; i<mbs; i++) { 1080 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1081 if (max < tmp) { max = tmp; } 1082 } 1083 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1084 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1085 } 1086 1087 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1088 lrow = row - brstart; /* local row index */ 1089 1090 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1091 if (!v) {pvA = 0; pvB = 0;} 1092 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1093 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1094 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1095 nztot = nzA + nzB; 1096 1097 cmap = mat->garray; 1098 if (v || idx) { 1099 if (nztot) { 1100 /* Sort by increasing column numbers, assuming A and B already sorted */ 1101 int imark = -1; 1102 if (v) { 1103 *v = v_p = mat->rowvalues; 1104 for (i=0; i<nzB; i++) { 1105 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1106 else break; 1107 } 1108 imark = i; 1109 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1110 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1111 } 1112 if (idx) { 1113 *idx = idx_p = mat->rowindices; 1114 if (imark > -1) { 1115 for (i=0; i<imark; i++) { 1116 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1117 } 1118 } else { 1119 for (i=0; i<nzB; i++) { 1120 if (cmap[cworkB[i]/bs] < cstart) 1121 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1122 else break; 1123 } 1124 imark = i; 1125 } 1126 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1127 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1128 } 1129 } else { 1130 if (idx) *idx = 0; 1131 if (v) *v = 0; 1132 } 1133 } 1134 *nz = nztot; 1135 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1136 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1142 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1143 { 1144 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1145 1146 PetscFunctionBegin; 1147 if (baij->getrowactive == PETSC_FALSE) { 1148 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1149 } 1150 baij->getrowactive = PETSC_FALSE; 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1156 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1157 { 1158 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1159 1160 PetscFunctionBegin; 1161 *bs = baij->bs; 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1167 int MatZeroEntries_MPISBAIJ(Mat A) 1168 { 1169 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1170 int ierr; 1171 1172 PetscFunctionBegin; 1173 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1174 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1180 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1181 { 1182 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1183 Mat A = a->A,B = a->B; 1184 int ierr; 1185 PetscReal isend[5],irecv[5]; 1186 1187 PetscFunctionBegin; 1188 info->block_size = (PetscReal)a->bs; 1189 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1190 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1191 isend[3] = info->memory; isend[4] = info->mallocs; 1192 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1193 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1194 isend[3] += info->memory; isend[4] += info->mallocs; 1195 if (flag == MAT_LOCAL) { 1196 info->nz_used = isend[0]; 1197 info->nz_allocated = isend[1]; 1198 info->nz_unneeded = isend[2]; 1199 info->memory = isend[3]; 1200 info->mallocs = isend[4]; 1201 } else if (flag == MAT_GLOBAL_MAX) { 1202 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1203 info->nz_used = irecv[0]; 1204 info->nz_allocated = irecv[1]; 1205 info->nz_unneeded = irecv[2]; 1206 info->memory = irecv[3]; 1207 info->mallocs = irecv[4]; 1208 } else if (flag == MAT_GLOBAL_SUM) { 1209 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1210 info->nz_used = irecv[0]; 1211 info->nz_allocated = irecv[1]; 1212 info->nz_unneeded = irecv[2]; 1213 info->memory = irecv[3]; 1214 info->mallocs = irecv[4]; 1215 } else { 1216 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1217 } 1218 info->rows_global = (PetscReal)A->M; 1219 info->columns_global = (PetscReal)A->N; 1220 info->rows_local = (PetscReal)A->m; 1221 info->columns_local = (PetscReal)A->N; 1222 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1223 info->fill_ratio_needed = 0; 1224 info->factor_mallocs = 0; 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1230 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1231 { 1232 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1233 int ierr; 1234 1235 PetscFunctionBegin; 1236 switch (op) { 1237 case MAT_NO_NEW_NONZERO_LOCATIONS: 1238 case MAT_YES_NEW_NONZERO_LOCATIONS: 1239 case MAT_COLUMNS_UNSORTED: 1240 case MAT_COLUMNS_SORTED: 1241 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1242 case MAT_KEEP_ZEROED_ROWS: 1243 case MAT_NEW_NONZERO_LOCATION_ERR: 1244 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1245 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1246 break; 1247 case MAT_ROW_ORIENTED: 1248 a->roworiented = PETSC_TRUE; 1249 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1250 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1251 break; 1252 case MAT_ROWS_SORTED: 1253 case MAT_ROWS_UNSORTED: 1254 case MAT_YES_NEW_DIAGONALS: 1255 case MAT_USE_SINGLE_PRECISION_SOLVES: 1256 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1257 break; 1258 case MAT_COLUMN_ORIENTED: 1259 a->roworiented = PETSC_FALSE; 1260 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1261 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1262 break; 1263 case MAT_IGNORE_OFF_PROC_ENTRIES: 1264 a->donotstash = PETSC_TRUE; 1265 break; 1266 case MAT_NO_NEW_DIAGONALS: 1267 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1268 case MAT_USE_HASH_TABLE: 1269 a->ht_flag = PETSC_TRUE; 1270 break; 1271 default: 1272 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1273 } 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1279 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1280 { 1281 int ierr; 1282 PetscFunctionBegin; 1283 ierr = MatDuplicate(A,MAT_COPY_VALUES,B); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1289 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1290 { 1291 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1292 Mat a = baij->A,b = baij->B; 1293 int ierr,s1,s2,s3; 1294 1295 PetscFunctionBegin; 1296 if (ll != rr) { 1297 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1298 } 1299 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1300 if (rr) { 1301 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1302 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1303 /* Overlap communication with computation. */ 1304 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1305 /*} if (ll) { */ 1306 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1307 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1308 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1309 /* } */ 1310 /* scale the diagonal block */ 1311 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1312 1313 /* if (rr) { */ 1314 /* Do a scatter end and then right scale the off-diagonal block */ 1315 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1316 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1317 } 1318 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1324 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag) 1325 { 1326 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1327 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1328 int *procs,*nprocs,j,idx,nsends,*work,row; 1329 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1330 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1331 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1332 MPI_Comm comm = A->comm; 1333 MPI_Request *send_waits,*recv_waits; 1334 MPI_Status recv_status,*send_status; 1335 IS istmp; 1336 PetscTruth found; 1337 1338 PetscFunctionBegin; 1339 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1340 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1341 1342 /* first count number of contributors to each processor */ 1343 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1344 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1345 procs = nprocs + size; 1346 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1347 for (i=0; i<N; i++) { 1348 idx = rows[i]; 1349 found = PETSC_FALSE; 1350 for (j=0; j<size; j++) { 1351 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1352 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 1353 } 1354 } 1355 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1356 } 1357 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1358 1359 /* inform other processors of number of messages and max length*/ 1360 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 1361 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1362 nmax = work[rank]; 1363 nrecvs = work[size+rank]; 1364 ierr = PetscFree(work);CHKERRQ(ierr); 1365 1366 /* post receives: */ 1367 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1368 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1369 for (i=0; i<nrecvs; i++) { 1370 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1371 } 1372 1373 /* do sends: 1374 1) starts[i] gives the starting index in svalues for stuff going to 1375 the ith processor 1376 */ 1377 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1378 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1379 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1380 starts[0] = 0; 1381 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1382 for (i=0; i<N; i++) { 1383 svalues[starts[owner[i]]++] = rows[i]; 1384 } 1385 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1386 1387 starts[0] = 0; 1388 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1389 count = 0; 1390 for (i=0; i<size; i++) { 1391 if (procs[i]) { 1392 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1393 } 1394 } 1395 ierr = PetscFree(starts);CHKERRQ(ierr); 1396 1397 base = owners[rank]*bs; 1398 1399 /* wait on receives */ 1400 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1401 source = lens + nrecvs; 1402 count = nrecvs; slen = 0; 1403 while (count) { 1404 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1405 /* unpack receives into our local space */ 1406 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1407 source[imdex] = recv_status.MPI_SOURCE; 1408 lens[imdex] = n; 1409 slen += n; 1410 count--; 1411 } 1412 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1413 1414 /* move the data into the send scatter */ 1415 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1416 count = 0; 1417 for (i=0; i<nrecvs; i++) { 1418 values = rvalues + i*nmax; 1419 for (j=0; j<lens[i]; j++) { 1420 lrows[count++] = values[j] - base; 1421 } 1422 } 1423 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1424 ierr = PetscFree(lens);CHKERRQ(ierr); 1425 ierr = PetscFree(owner);CHKERRQ(ierr); 1426 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1427 1428 /* actually zap the local rows */ 1429 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1430 PetscLogObjectParent(A,istmp); 1431 1432 /* 1433 Zero the required rows. If the "diagonal block" of the matrix 1434 is square and the user wishes to set the diagonal we use seperate 1435 code so that MatSetValues() is not called for each diagonal allocating 1436 new memory, thus calling lots of mallocs and slowing things down. 1437 1438 Contributed by: Mathew Knepley 1439 */ 1440 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1441 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1442 if (diag && (l->A->M == l->A->N)) { 1443 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1444 } else if (diag) { 1445 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1446 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1447 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1448 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1449 } 1450 for (i=0; i<slen; i++) { 1451 row = lrows[i] + rstart_bs; 1452 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1453 } 1454 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1455 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1456 } else { 1457 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1458 } 1459 1460 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1461 ierr = PetscFree(lrows);CHKERRQ(ierr); 1462 1463 /* wait on sends */ 1464 if (nsends) { 1465 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1466 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1467 ierr = PetscFree(send_status);CHKERRQ(ierr); 1468 } 1469 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1470 ierr = PetscFree(svalues);CHKERRQ(ierr); 1471 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1477 int MatPrintHelp_MPISBAIJ(Mat A) 1478 { 1479 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1480 MPI_Comm comm = A->comm; 1481 static int called = 0; 1482 int ierr; 1483 1484 PetscFunctionBegin; 1485 if (!a->rank) { 1486 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1487 } 1488 if (called) {PetscFunctionReturn(0);} else called = 1; 1489 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1490 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1496 int MatSetUnfactored_MPISBAIJ(Mat A) 1497 { 1498 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1499 int ierr; 1500 1501 PetscFunctionBegin; 1502 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1507 1508 #undef __FUNCT__ 1509 #define __FUNCT__ "MatEqual_MPISBAIJ" 1510 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1511 { 1512 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1513 Mat a,b,c,d; 1514 PetscTruth flg; 1515 int ierr; 1516 1517 PetscFunctionBegin; 1518 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr); 1519 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1520 a = matA->A; b = matA->B; 1521 c = matB->A; d = matB->B; 1522 1523 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1524 if (flg == PETSC_TRUE) { 1525 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1526 } 1527 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1533 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1534 { 1535 int ierr; 1536 1537 PetscFunctionBegin; 1538 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1539 PetscFunctionReturn(0); 1540 } 1541 /* -------------------------------------------------------------------*/ 1542 static struct _MatOps MatOps_Values = { 1543 MatSetValues_MPISBAIJ, 1544 MatGetRow_MPISBAIJ, 1545 MatRestoreRow_MPISBAIJ, 1546 MatMult_MPISBAIJ, 1547 MatMultAdd_MPISBAIJ, 1548 MatMultTranspose_MPISBAIJ, 1549 MatMultTransposeAdd_MPISBAIJ, 1550 0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 0, 1556 MatRelax_MPISBAIJ, 1557 MatTranspose_MPISBAIJ, 1558 MatGetInfo_MPISBAIJ, 1559 MatEqual_MPISBAIJ, 1560 MatGetDiagonal_MPISBAIJ, 1561 MatDiagonalScale_MPISBAIJ, 1562 MatNorm_MPISBAIJ, 1563 MatAssemblyBegin_MPISBAIJ, 1564 MatAssemblyEnd_MPISBAIJ, 1565 0, 1566 MatSetOption_MPISBAIJ, 1567 MatZeroEntries_MPISBAIJ, 1568 MatZeroRows_MPISBAIJ, 1569 0, 1570 0, 1571 0, 1572 0, 1573 MatSetUpPreallocation_MPISBAIJ, 1574 0, 1575 0, 1576 0, 1577 0, 1578 MatDuplicate_MPISBAIJ, 1579 0, 1580 0, 1581 0, 1582 0, 1583 0, 1584 MatGetSubMatrices_MPISBAIJ, 1585 MatIncreaseOverlap_MPISBAIJ, 1586 MatGetValues_MPISBAIJ, 1587 0, 1588 MatPrintHelp_MPISBAIJ, 1589 MatScale_MPISBAIJ, 1590 0, 1591 0, 1592 0, 1593 MatGetBlockSize_MPISBAIJ, 1594 0, 1595 0, 1596 0, 1597 0, 1598 0, 1599 0, 1600 MatSetUnfactored_MPISBAIJ, 1601 0, 1602 MatSetValuesBlocked_MPISBAIJ, 1603 0, 1604 0, 1605 0, 1606 MatGetPetscMaps_Petsc, 1607 0, 1608 0, 1609 0, 1610 0, 1611 0, 1612 0, 1613 MatGetRowMax_MPISBAIJ}; 1614 1615 1616 EXTERN_C_BEGIN 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1619 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1620 { 1621 PetscFunctionBegin; 1622 *a = ((Mat_MPISBAIJ *)A->data)->A; 1623 *iscopy = PETSC_FALSE; 1624 PetscFunctionReturn(0); 1625 } 1626 EXTERN_C_END 1627 1628 EXTERN_C_BEGIN 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatCreate_MPISBAIJ" 1631 int MatCreate_MPISBAIJ(Mat B) 1632 { 1633 Mat_MPISBAIJ *b; 1634 int ierr; 1635 PetscTruth flg; 1636 1637 PetscFunctionBegin; 1638 1639 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1640 B->data = (void*)b; 1641 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1642 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1643 1644 B->ops->destroy = MatDestroy_MPISBAIJ; 1645 B->ops->view = MatView_MPISBAIJ; 1646 B->mapping = 0; 1647 B->factor = 0; 1648 B->assembled = PETSC_FALSE; 1649 1650 B->insertmode = NOT_SET_VALUES; 1651 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1652 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1653 1654 /* build local table of row and column ownerships */ 1655 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1656 b->cowners = b->rowners + b->size + 2; 1657 b->rowners_bs = b->cowners + b->size + 2; 1658 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1659 1660 /* build cache for off array entries formed */ 1661 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1662 b->donotstash = PETSC_FALSE; 1663 b->colmap = PETSC_NULL; 1664 b->garray = PETSC_NULL; 1665 b->roworiented = PETSC_TRUE; 1666 1667 #if defined(PETSC_USE_MAT_SINGLE) 1668 /* stuff for MatSetValues_XXX in single precision */ 1669 b->setvalueslen = 0; 1670 b->setvaluescopy = PETSC_NULL; 1671 #endif 1672 1673 /* stuff used in block assembly */ 1674 b->barray = 0; 1675 1676 /* stuff used for matrix vector multiply */ 1677 b->lvec = 0; 1678 b->Mvctx = 0; 1679 b->slvec0 = 0; 1680 b->slvec0b = 0; 1681 b->slvec1 = 0; 1682 b->slvec1a = 0; 1683 b->slvec1b = 0; 1684 b->sMvctx = 0; 1685 1686 /* stuff for MatGetRow() */ 1687 b->rowindices = 0; 1688 b->rowvalues = 0; 1689 b->getrowactive = PETSC_FALSE; 1690 1691 /* hash table stuff */ 1692 b->ht = 0; 1693 b->hd = 0; 1694 b->ht_size = 0; 1695 b->ht_flag = PETSC_FALSE; 1696 b->ht_fact = 0; 1697 b->ht_total_ct = 0; 1698 b->ht_insert_ct = 0; 1699 1700 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1701 if (flg) { 1702 PetscReal fact = 1.39; 1703 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1704 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1705 if (fact <= 1.0) fact = 1.39; 1706 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1707 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1708 } 1709 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1710 "MatStoreValues_MPISBAIJ", 1711 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1712 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1713 "MatRetrieveValues_MPISBAIJ", 1714 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1715 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1716 "MatGetDiagonalBlock_MPISBAIJ", 1717 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1718 PetscFunctionReturn(0); 1719 } 1720 EXTERN_C_END 1721 1722 #undef __FUNCT__ 1723 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1724 /*@C 1725 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1726 the user should preallocate the matrix storage by setting the parameters 1727 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1728 performance can be increased by more than a factor of 50. 1729 1730 Collective on Mat 1731 1732 Input Parameters: 1733 + A - the matrix 1734 . bs - size of blockk 1735 . d_nz - number of block nonzeros per block row in diagonal portion of local 1736 submatrix (same for all local rows) 1737 . d_nnz - array containing the number of block nonzeros in the various block rows 1738 in the upper triangular and diagonal part of the in diagonal portion of the local 1739 (possibly different for each block row) or PETSC_NULL. You must leave room 1740 for the diagonal entry even if it is zero. 1741 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1742 submatrix (same for all local rows). 1743 - o_nnz - array containing the number of nonzeros in the various block rows of the 1744 off-diagonal portion of the local submatrix (possibly different for 1745 each block row) or PETSC_NULL. 1746 1747 1748 Options Database Keys: 1749 . -mat_no_unroll - uses code that does not unroll the loops in the 1750 block calculations (much slower) 1751 . -mat_block_size - size of the blocks to use 1752 1753 Notes: 1754 1755 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1756 than it must be used on all processors that share the object for that argument. 1757 1758 Storage Information: 1759 For a square global matrix we define each processor's diagonal portion 1760 to be its local rows and the corresponding columns (a square submatrix); 1761 each processor's off-diagonal portion encompasses the remainder of the 1762 local matrix (a rectangular submatrix). 1763 1764 The user can specify preallocated storage for the diagonal part of 1765 the local submatrix with either d_nz or d_nnz (not both). Set 1766 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1767 memory allocation. Likewise, specify preallocated storage for the 1768 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1769 1770 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1771 the figure below we depict these three local rows and all columns (0-11). 1772 1773 .vb 1774 0 1 2 3 4 5 6 7 8 9 10 11 1775 ------------------- 1776 row 3 | o o o d d d o o o o o o 1777 row 4 | o o o d d d o o o o o o 1778 row 5 | o o o d d d o o o o o o 1779 ------------------- 1780 .ve 1781 1782 Thus, any entries in the d locations are stored in the d (diagonal) 1783 submatrix, and any entries in the o locations are stored in the 1784 o (off-diagonal) submatrix. Note that the d matrix is stored in 1785 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1786 1787 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1788 plus the diagonal part of the d matrix, 1789 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1790 In general, for PDE problems in which most nonzeros are near the diagonal, 1791 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1792 or you will get TERRIBLE performance; see the users' manual chapter on 1793 matrices. 1794 1795 Level: intermediate 1796 1797 .keywords: matrix, block, aij, compressed row, sparse, parallel 1798 1799 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1800 @*/ 1801 1802 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1803 { 1804 Mat_MPISBAIJ *b; 1805 int ierr,i,mbs,Mbs; 1806 PetscTruth flg2; 1807 1808 PetscFunctionBegin; 1809 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr); 1810 if (!flg2) PetscFunctionReturn(0); 1811 1812 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1813 1814 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1815 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1816 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1817 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1818 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1819 if (d_nnz) { 1820 for (i=0; i<B->m/bs; i++) { 1821 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1822 } 1823 } 1824 if (o_nnz) { 1825 for (i=0; i<B->m/bs; i++) { 1826 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1827 } 1828 } 1829 B->preallocated = PETSC_TRUE; 1830 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1831 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1832 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1833 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1834 1835 b = (Mat_MPISBAIJ*)B->data; 1836 mbs = B->m/bs; 1837 Mbs = B->M/bs; 1838 if (mbs*bs != B->m) { 1839 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1840 } 1841 1842 b->bs = bs; 1843 b->bs2 = bs*bs; 1844 b->mbs = mbs; 1845 b->nbs = mbs; 1846 b->Mbs = Mbs; 1847 b->Nbs = Mbs; 1848 1849 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1850 b->rowners[0] = 0; 1851 for (i=2; i<=b->size; i++) { 1852 b->rowners[i] += b->rowners[i-1]; 1853 } 1854 b->rstart = b->rowners[b->rank]; 1855 b->rend = b->rowners[b->rank+1]; 1856 b->cstart = b->rstart; 1857 b->cend = b->rend; 1858 for (i=0; i<=b->size; i++) { 1859 b->rowners_bs[i] = b->rowners[i]*bs; 1860 } 1861 b->rstart_bs = b-> rstart*bs; 1862 b->rend_bs = b->rend*bs; 1863 1864 b->cstart_bs = b->cstart*bs; 1865 b->cend_bs = b->cend*bs; 1866 1867 1868 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1869 PetscLogObjectParent(B,b->A); 1870 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1871 PetscLogObjectParent(B,b->B); 1872 1873 /* build cache for off array entries formed */ 1874 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1875 1876 PetscFunctionReturn(0); 1877 } 1878 1879 #undef __FUNCT__ 1880 #define __FUNCT__ "MatCreateMPISBAIJ" 1881 /*@C 1882 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1883 (block compressed row). For good matrix assembly performance 1884 the user should preallocate the matrix storage by setting the parameters 1885 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1886 performance can be increased by more than a factor of 50. 1887 1888 Collective on MPI_Comm 1889 1890 Input Parameters: 1891 + comm - MPI communicator 1892 . bs - size of blockk 1893 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1894 This value should be the same as the local size used in creating the 1895 y vector for the matrix-vector product y = Ax. 1896 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1897 This value should be the same as the local size used in creating the 1898 x vector for the matrix-vector product y = Ax. 1899 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1900 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1901 . d_nz - number of block nonzeros per block row in diagonal portion of local 1902 submatrix (same for all local rows) 1903 . d_nnz - array containing the number of block nonzeros in the various block rows 1904 in the upper triangular portion of the in diagonal portion of the local 1905 (possibly different for each block block row) or PETSC_NULL. 1906 You must leave room for the diagonal entry even if it is zero. 1907 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1908 submatrix (same for all local rows). 1909 - o_nnz - array containing the number of nonzeros in the various block rows of the 1910 off-diagonal portion of the local submatrix (possibly different for 1911 each block row) or PETSC_NULL. 1912 1913 Output Parameter: 1914 . A - the matrix 1915 1916 Options Database Keys: 1917 . -mat_no_unroll - uses code that does not unroll the loops in the 1918 block calculations (much slower) 1919 . -mat_block_size - size of the blocks to use 1920 . -mat_mpi - use the parallel matrix data structures even on one processor 1921 (defaults to using SeqBAIJ format on one processor) 1922 1923 Notes: 1924 The user MUST specify either the local or global matrix dimensions 1925 (possibly both). 1926 1927 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1928 than it must be used on all processors that share the object for that argument. 1929 1930 Storage Information: 1931 For a square global matrix we define each processor's diagonal portion 1932 to be its local rows and the corresponding columns (a square submatrix); 1933 each processor's off-diagonal portion encompasses the remainder of the 1934 local matrix (a rectangular submatrix). 1935 1936 The user can specify preallocated storage for the diagonal part of 1937 the local submatrix with either d_nz or d_nnz (not both). Set 1938 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1939 memory allocation. Likewise, specify preallocated storage for the 1940 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1941 1942 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1943 the figure below we depict these three local rows and all columns (0-11). 1944 1945 .vb 1946 0 1 2 3 4 5 6 7 8 9 10 11 1947 ------------------- 1948 row 3 | o o o d d d o o o o o o 1949 row 4 | o o o d d d o o o o o o 1950 row 5 | o o o d d d o o o o o o 1951 ------------------- 1952 .ve 1953 1954 Thus, any entries in the d locations are stored in the d (diagonal) 1955 submatrix, and any entries in the o locations are stored in the 1956 o (off-diagonal) submatrix. Note that the d matrix is stored in 1957 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1958 1959 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1960 plus the diagonal part of the d matrix, 1961 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1962 In general, for PDE problems in which most nonzeros are near the diagonal, 1963 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1964 or you will get TERRIBLE performance; see the users' manual chapter on 1965 matrices. 1966 1967 Level: intermediate 1968 1969 .keywords: matrix, block, aij, compressed row, sparse, parallel 1970 1971 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1972 @*/ 1973 1974 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1975 { 1976 int ierr,size; 1977 1978 PetscFunctionBegin; 1979 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1980 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1981 if (size > 1) { 1982 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1983 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1984 } else { 1985 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1986 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1987 } 1988 PetscFunctionReturn(0); 1989 } 1990 1991 1992 #undef __FUNCT__ 1993 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1994 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1995 { 1996 Mat mat; 1997 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1998 int ierr,len=0; 1999 2000 PetscFunctionBegin; 2001 *newmat = 0; 2002 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2003 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 2004 mat->preallocated = PETSC_TRUE; 2005 a = (Mat_MPISBAIJ*)mat->data; 2006 a->bs = oldmat->bs; 2007 a->bs2 = oldmat->bs2; 2008 a->mbs = oldmat->mbs; 2009 a->nbs = oldmat->nbs; 2010 a->Mbs = oldmat->Mbs; 2011 a->Nbs = oldmat->Nbs; 2012 2013 a->rstart = oldmat->rstart; 2014 a->rend = oldmat->rend; 2015 a->cstart = oldmat->cstart; 2016 a->cend = oldmat->cend; 2017 a->size = oldmat->size; 2018 a->rank = oldmat->rank; 2019 a->donotstash = oldmat->donotstash; 2020 a->roworiented = oldmat->roworiented; 2021 a->rowindices = 0; 2022 a->rowvalues = 0; 2023 a->getrowactive = PETSC_FALSE; 2024 a->barray = 0; 2025 a->rstart_bs = oldmat->rstart_bs; 2026 a->rend_bs = oldmat->rend_bs; 2027 a->cstart_bs = oldmat->cstart_bs; 2028 a->cend_bs = oldmat->cend_bs; 2029 2030 /* hash table stuff */ 2031 a->ht = 0; 2032 a->hd = 0; 2033 a->ht_size = 0; 2034 a->ht_flag = oldmat->ht_flag; 2035 a->ht_fact = oldmat->ht_fact; 2036 a->ht_total_ct = 0; 2037 a->ht_insert_ct = 0; 2038 2039 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 2040 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2041 a->cowners = a->rowners + a->size + 2; 2042 a->rowners_bs = a->cowners + a->size + 2; 2043 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2044 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2045 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2046 if (oldmat->colmap) { 2047 #if defined (PETSC_USE_CTABLE) 2048 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2049 #else 2050 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2051 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2052 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2053 #endif 2054 } else a->colmap = 0; 2055 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2056 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2057 PetscLogObjectMemory(mat,len*sizeof(int)); 2058 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2059 } else a->garray = 0; 2060 2061 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2062 PetscLogObjectParent(mat,a->lvec); 2063 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2064 2065 PetscLogObjectParent(mat,a->Mvctx); 2066 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2067 PetscLogObjectParent(mat,a->A); 2068 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2069 PetscLogObjectParent(mat,a->B); 2070 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2071 *newmat = mat; 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #include "petscsys.h" 2076 2077 EXTERN_C_BEGIN 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatLoad_MPISBAIJ" 2080 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2081 { 2082 Mat A; 2083 int i,nz,ierr,j,rstart,rend,fd; 2084 PetscScalar *vals,*buf; 2085 MPI_Comm comm = ((PetscObject)viewer)->comm; 2086 MPI_Status status; 2087 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2088 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2089 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2090 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2091 int dcount,kmax,k,nzcount,tmp; 2092 2093 PetscFunctionBegin; 2094 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2095 2096 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2097 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2098 if (!rank) { 2099 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2100 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2101 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2102 if (header[3] < 0) { 2103 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2104 } 2105 } 2106 2107 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2108 M = header[1]; N = header[2]; 2109 2110 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2111 2112 /* 2113 This code adds extra rows to make sure the number of rows is 2114 divisible by the blocksize 2115 */ 2116 Mbs = M/bs; 2117 extra_rows = bs - M + bs*(Mbs); 2118 if (extra_rows == bs) extra_rows = 0; 2119 else Mbs++; 2120 if (extra_rows &&!rank) { 2121 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2122 } 2123 2124 /* determine ownership of all rows */ 2125 mbs = Mbs/size + ((Mbs % size) > rank); 2126 m = mbs*bs; 2127 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2128 browners = rowners + size + 1; 2129 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2130 rowners[0] = 0; 2131 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2132 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2133 rstart = rowners[rank]; 2134 rend = rowners[rank+1]; 2135 2136 /* distribute row lengths to all processors */ 2137 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2138 if (!rank) { 2139 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2140 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2141 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2142 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2143 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2144 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2145 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2146 } else { 2147 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2148 } 2149 2150 if (!rank) { /* procs[0] */ 2151 /* calculate the number of nonzeros on each processor */ 2152 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2153 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2154 for (i=0; i<size; i++) { 2155 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2156 procsnz[i] += rowlengths[j]; 2157 } 2158 } 2159 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2160 2161 /* determine max buffer needed and allocate it */ 2162 maxnz = 0; 2163 for (i=0; i<size; i++) { 2164 maxnz = PetscMax(maxnz,procsnz[i]); 2165 } 2166 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2167 2168 /* read in my part of the matrix column indices */ 2169 nz = procsnz[0]; 2170 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2171 mycols = ibuf; 2172 if (size == 1) nz -= extra_rows; 2173 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2174 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2175 2176 /* read in every ones (except the last) and ship off */ 2177 for (i=1; i<size-1; i++) { 2178 nz = procsnz[i]; 2179 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2180 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2181 } 2182 /* read in the stuff for the last proc */ 2183 if (size != 1) { 2184 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2185 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2186 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2187 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2188 } 2189 ierr = PetscFree(cols);CHKERRQ(ierr); 2190 } else { /* procs[i], i>0 */ 2191 /* determine buffer space needed for message */ 2192 nz = 0; 2193 for (i=0; i<m; i++) { 2194 nz += locrowlens[i]; 2195 } 2196 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2197 mycols = ibuf; 2198 /* receive message of column indices*/ 2199 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2200 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2201 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2202 } 2203 2204 /* loop over local rows, determining number of off diagonal entries */ 2205 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2206 odlens = dlens + (rend-rstart); 2207 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2208 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2209 masked1 = mask + Mbs; 2210 masked2 = masked1 + Mbs; 2211 rowcount = 0; nzcount = 0; 2212 for (i=0; i<mbs; i++) { 2213 dcount = 0; 2214 odcount = 0; 2215 for (j=0; j<bs; j++) { 2216 kmax = locrowlens[rowcount]; 2217 for (k=0; k<kmax; k++) { 2218 tmp = mycols[nzcount++]/bs; /* block col. index */ 2219 if (!mask[tmp]) { 2220 mask[tmp] = 1; 2221 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2222 else masked1[dcount++] = tmp; /* entry in diag portion */ 2223 } 2224 } 2225 rowcount++; 2226 } 2227 2228 dlens[i] = dcount; /* d_nzz[i] */ 2229 odlens[i] = odcount; /* o_nzz[i] */ 2230 2231 /* zero out the mask elements we set */ 2232 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2233 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2234 } 2235 2236 /* create our matrix */ 2237 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2238 CHKERRQ(ierr); 2239 A = *newmat; 2240 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2241 2242 if (!rank) { 2243 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2244 /* read in my part of the matrix numerical values */ 2245 nz = procsnz[0]; 2246 vals = buf; 2247 mycols = ibuf; 2248 if (size == 1) nz -= extra_rows; 2249 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2250 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2251 2252 /* insert into matrix */ 2253 jj = rstart*bs; 2254 for (i=0; i<m; i++) { 2255 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2256 mycols += locrowlens[i]; 2257 vals += locrowlens[i]; 2258 jj++; 2259 } 2260 2261 /* read in other processors (except the last one) and ship out */ 2262 for (i=1; i<size-1; i++) { 2263 nz = procsnz[i]; 2264 vals = buf; 2265 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2266 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2267 } 2268 /* the last proc */ 2269 if (size != 1){ 2270 nz = procsnz[i] - extra_rows; 2271 vals = buf; 2272 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2273 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2274 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2275 } 2276 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2277 2278 } else { 2279 /* receive numeric values */ 2280 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2281 2282 /* receive message of values*/ 2283 vals = buf; 2284 mycols = ibuf; 2285 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2286 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2287 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2288 2289 /* insert into matrix */ 2290 jj = rstart*bs; 2291 for (i=0; i<m; i++) { 2292 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2293 mycols += locrowlens[i]; 2294 vals += locrowlens[i]; 2295 jj++; 2296 } 2297 } 2298 2299 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2300 ierr = PetscFree(buf);CHKERRQ(ierr); 2301 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2302 ierr = PetscFree(rowners);CHKERRQ(ierr); 2303 ierr = PetscFree(dlens);CHKERRQ(ierr); 2304 ierr = PetscFree(mask);CHKERRQ(ierr); 2305 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2306 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2307 PetscFunctionReturn(0); 2308 } 2309 EXTERN_C_END 2310 2311 #undef __FUNCT__ 2312 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2313 /*@ 2314 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2315 2316 Input Parameters: 2317 . mat - the matrix 2318 . fact - factor 2319 2320 Collective on Mat 2321 2322 Level: advanced 2323 2324 Notes: 2325 This can also be set by the command line option: -mat_use_hash_table fact 2326 2327 .keywords: matrix, hashtable, factor, HT 2328 2329 .seealso: MatSetOption() 2330 @*/ 2331 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2332 { 2333 PetscFunctionBegin; 2334 SETERRQ(1,"Function not yet written for SBAIJ format"); 2335 /* PetscFunctionReturn(0); */ 2336 } 2337 2338 #undef __FUNCT__ 2339 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2340 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2341 { 2342 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2343 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2344 PetscReal atmp; 2345 PetscReal *work,*svalues,*rvalues; 2346 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2347 int rank,size,*rowners_bs,dest,count,source; 2348 PetscScalar *va; 2349 MatScalar *ba; 2350 MPI_Status stat; 2351 2352 PetscFunctionBegin; 2353 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2354 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2355 2356 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 2357 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 2358 2359 bs = a->bs; 2360 mbs = a->mbs; 2361 Mbs = a->Mbs; 2362 ba = b->a; 2363 bi = b->i; 2364 bj = b->j; 2365 /* 2366 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2367 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2368 */ 2369 2370 /* find ownerships */ 2371 rowners_bs = a->rowners_bs; 2372 /* 2373 if (!rank){ 2374 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2375 } 2376 */ 2377 2378 /* each proc creates an array to be distributed */ 2379 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2380 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2381 2382 /* row_max for B */ 2383 if (rank != size-1){ 2384 for (i=0; i<mbs; i++) { 2385 ncols = bi[1] - bi[0]; bi++; 2386 brow = bs*i; 2387 for (j=0; j<ncols; j++){ 2388 bcol = bs*(*bj); 2389 for (kcol=0; kcol<bs; kcol++){ 2390 col = bcol + kcol; /* local col index */ 2391 col += rowners_bs[rank+1]; /* global col index */ 2392 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2393 for (krow=0; krow<bs; krow++){ 2394 atmp = PetscAbsScalar(*ba); ba++; 2395 row = brow + krow; /* local row index */ 2396 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2397 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2398 if (work[col] < atmp) work[col] = atmp; 2399 } 2400 } 2401 bj++; 2402 } 2403 } 2404 /* 2405 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2406 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2407 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2408 */ 2409 2410 /* send values to its owners */ 2411 for (dest=rank+1; dest<size; dest++){ 2412 svalues = work + rowners_bs[dest]; 2413 count = rowners_bs[dest+1]-rowners_bs[dest]; 2414 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PETSC_COMM_WORLD);CHKERRQ(ierr); 2415 /* 2416 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2417 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2418 */ 2419 } 2420 } 2421 2422 /* receive values */ 2423 if (rank){ 2424 rvalues = work; 2425 count = rowners_bs[rank+1]-rowners_bs[rank]; 2426 for (source=0; source<rank; source++){ 2427 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);CHKERRQ(ierr); 2428 /* process values */ 2429 for (i=0; i<count; i++){ 2430 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2431 } 2432 /* 2433 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2434 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2435 */ 2436 } 2437 } 2438 2439 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2440 ierr = PetscFree(work);CHKERRQ(ierr); 2441 PetscFunctionReturn(0); 2442 } 2443 2444 #undef __FUNCT__ 2445 #define __FUNCT__ "MatRelax_MPISBAIJ" 2446 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2447 { 2448 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2449 int ierr,mbs=mat->mbs,bs=mat->bs; 2450 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2451 Vec bb1; 2452 2453 PetscFunctionBegin; 2454 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2455 if (bs > 1) 2456 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2457 2458 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2459 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2460 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2461 its--; 2462 } 2463 2464 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2465 while (its--){ 2466 2467 /* lower triangular part: slvec0b = - B^T*xx */ 2468 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2469 2470 /* copy xx into slvec0a */ 2471 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2472 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2473 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2474 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2475 2476 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2477 2478 /* copy bb into slvec1a */ 2479 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2480 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2481 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2482 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2483 2484 /* set slvec1b = 0 */ 2485 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2486 2487 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2488 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2489 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2490 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2491 2492 /* upper triangular part: bb1 = bb1 - B*x */ 2493 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2494 2495 /* local diagonal sweep */ 2496 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2497 } 2498 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2499 } else { 2500 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2501 } 2502 PetscFunctionReturn(0); 2503 } 2504 2505 #undef __FUNCT__ 2506 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2507 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2508 { 2509 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2510 int ierr; 2511 PetscScalar mone=-1.0; 2512 Vec lvec1,bb1; 2513 2514 PetscFunctionBegin; 2515 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2516 if (mat->bs > 1) 2517 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2518 2519 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2520 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2521 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2522 its--; 2523 } 2524 2525 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2526 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2527 while (its--){ 2528 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2529 2530 /* lower diagonal part: bb1 = bb - B^T*xx */ 2531 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2532 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2533 2534 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2535 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2536 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2537 2538 /* upper diagonal part: bb1 = bb1 - B*x */ 2539 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2540 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2541 2542 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2543 2544 /* diagonal sweep */ 2545 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2546 } 2547 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2548 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2549 } else { 2550 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2551 } 2552 PetscFunctionReturn(0); 2553 } 2554 2555