1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 12 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 13 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *); 14 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode); 15 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 18 extern int MatPrintHelp_SeqSBAIJ(Mat); 19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 23 extern int MatUseSpooles_MPISBAIJ(Mat); 24 25 /* UGLY, ugly, ugly 26 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 27 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 28 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 29 converts the entries into single precision and then calls ..._MatScalar() to put them 30 into the single precision data structures. 31 */ 32 #if defined(PETSC_USE_MAT_SINGLE) 33 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 34 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 35 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 36 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 37 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 38 #else 39 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 40 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 41 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 42 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 43 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 44 #endif 45 46 EXTERN_C_BEGIN 47 #undef __FUNCT__ 48 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 49 int MatStoreValues_MPISBAIJ(Mat mat) 50 { 51 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 52 int ierr; 53 54 PetscFunctionBegin; 55 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 56 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 57 PetscFunctionReturn(0); 58 } 59 EXTERN_C_END 60 61 EXTERN_C_BEGIN 62 #undef __FUNCT__ 63 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 64 int MatRetrieveValues_MPISBAIJ(Mat mat) 65 { 66 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 67 int ierr; 68 69 PetscFunctionBegin; 70 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 71 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 EXTERN_C_END 75 76 /* 77 Local utility routine that creates a mapping from the global column 78 number to the local number in the off-diagonal part of the local 79 storage of the matrix. This is done in a non scable way since the 80 length of colmap equals the global matrix length. 81 */ 82 #undef __FUNCT__ 83 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private" 84 static int CreateColmap_MPISBAIJ_Private(Mat mat) 85 { 86 PetscFunctionBegin; 87 SETERRQ(1,"Function not yet written for SBAIJ format"); 88 /* PetscFunctionReturn(0); */ 89 } 90 91 #define CHUNKSIZE 10 92 93 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 94 { \ 95 \ 96 brow = row/bs; \ 97 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 98 rmax = aimax[brow]; nrow = ailen[brow]; \ 99 bcol = col/bs; \ 100 ridx = row % bs; cidx = col % bs; \ 101 low = 0; high = nrow; \ 102 while (high-low > 3) { \ 103 t = (low+high)/2; \ 104 if (rp[t] > bcol) high = t; \ 105 else low = t; \ 106 } \ 107 for (_i=low; _i<high; _i++) { \ 108 if (rp[_i] > bcol) break; \ 109 if (rp[_i] == bcol) { \ 110 bap = ap + bs2*_i + bs*cidx + ridx; \ 111 if (addv == ADD_VALUES) *bap += value; \ 112 else *bap = value; \ 113 goto a_noinsert; \ 114 } \ 115 } \ 116 if (a->nonew == 1) goto a_noinsert; \ 117 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 118 if (nrow >= rmax) { \ 119 /* there is no extra room in row, therefore enlarge */ \ 120 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 121 MatScalar *new_a; \ 122 \ 123 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 124 \ 125 /* malloc new storage space */ \ 126 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 127 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 128 new_j = (int*)(new_a + bs2*new_nz); \ 129 new_i = new_j + new_nz; \ 130 \ 131 /* copy over old data into new slots */ \ 132 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 133 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 134 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 135 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 136 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 137 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 139 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 140 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 141 /* free up old matrix storage */ \ 142 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 143 if (!a->singlemalloc) { \ 144 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 145 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 146 } \ 147 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 148 a->singlemalloc = PETSC_TRUE; \ 149 \ 150 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 151 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 152 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 153 a->s_maxnz += bs2*CHUNKSIZE; \ 154 a->reallocs++; \ 155 a->s_nz++; \ 156 } \ 157 N = nrow++ - 1; \ 158 /* shift up all the later entries in this row */ \ 159 for (ii=N; ii>=_i; ii--) { \ 160 rp[ii+1] = rp[ii]; \ 161 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 162 } \ 163 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 164 rp[_i] = bcol; \ 165 ap[bs2*_i + bs*cidx + ridx] = value; \ 166 a_noinsert:; \ 167 ailen[brow] = nrow; \ 168 } 169 #ifndef MatSetValues_SeqBAIJ_B_Private 170 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 171 { \ 172 brow = row/bs; \ 173 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 174 rmax = bimax[brow]; nrow = bilen[brow]; \ 175 bcol = col/bs; \ 176 ridx = row % bs; cidx = col % bs; \ 177 low = 0; high = nrow; \ 178 while (high-low > 3) { \ 179 t = (low+high)/2; \ 180 if (rp[t] > bcol) high = t; \ 181 else low = t; \ 182 } \ 183 for (_i=low; _i<high; _i++) { \ 184 if (rp[_i] > bcol) break; \ 185 if (rp[_i] == bcol) { \ 186 bap = ap + bs2*_i + bs*cidx + ridx; \ 187 if (addv == ADD_VALUES) *bap += value; \ 188 else *bap = value; \ 189 goto b_noinsert; \ 190 } \ 191 } \ 192 if (b->nonew == 1) goto b_noinsert; \ 193 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 194 if (nrow >= rmax) { \ 195 /* there is no extra room in row, therefore enlarge */ \ 196 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 197 MatScalar *new_a; \ 198 \ 199 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 200 \ 201 /* malloc new storage space */ \ 202 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 203 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 204 new_j = (int*)(new_a + bs2*new_nz); \ 205 new_i = new_j + new_nz; \ 206 \ 207 /* copy over old data into new slots */ \ 208 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 209 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 210 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 211 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 212 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 213 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 214 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 215 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 216 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 217 /* free up old matrix storage */ \ 218 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 219 if (!b->singlemalloc) { \ 220 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 221 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 222 } \ 223 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 224 b->singlemalloc = PETSC_TRUE; \ 225 \ 226 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 227 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 228 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 229 b->maxnz += bs2*CHUNKSIZE; \ 230 b->reallocs++; \ 231 b->nz++; \ 232 } \ 233 N = nrow++ - 1; \ 234 /* shift up all the later entries in this row */ \ 235 for (ii=N; ii>=_i; ii--) { \ 236 rp[ii+1] = rp[ii]; \ 237 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 238 } \ 239 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 240 rp[_i] = bcol; \ 241 ap[bs2*_i + bs*cidx + ridx] = value; \ 242 b_noinsert:; \ 243 bilen[brow] = nrow; \ 244 } 245 #endif 246 247 #if defined(PETSC_USE_MAT_SINGLE) 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatSetValues_MPISBAIJ" 250 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 251 { 252 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 253 int ierr,i,N = m*n; 254 MatScalar *vsingle; 255 256 PetscFunctionBegin; 257 if (N > b->setvalueslen) { 258 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 259 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 260 b->setvalueslen = N; 261 } 262 vsingle = b->setvaluescopy; 263 264 for (i=0; i<N; i++) { 265 vsingle[i] = v[i]; 266 } 267 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 273 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 274 { 275 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 276 int ierr,i,N = m*n*b->bs2; 277 MatScalar *vsingle; 278 279 PetscFunctionBegin; 280 if (N > b->setvalueslen) { 281 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 282 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 283 b->setvalueslen = N; 284 } 285 vsingle = b->setvaluescopy; 286 for (i=0; i<N; i++) { 287 vsingle[i] = v[i]; 288 } 289 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 295 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 296 { 297 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 298 int ierr,i,N = m*n; 299 MatScalar *vsingle; 300 301 PetscFunctionBegin; 302 SETERRQ(1,"Function not yet written for SBAIJ format"); 303 /* PetscFunctionReturn(0); */ 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 308 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 309 { 310 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 311 int ierr,i,N = m*n*b->bs2; 312 MatScalar *vsingle; 313 314 PetscFunctionBegin; 315 SETERRQ(1,"Function not yet written for SBAIJ format"); 316 /* PetscFunctionReturn(0); */ 317 } 318 #endif 319 320 /* Only add/insert a(i,j) with i<=j (blocks). 321 Any a(i,j) with i>j input by user is ingored. 322 */ 323 #undef __FUNCT__ 324 #define __FUNCT__ "MatSetValues_MPIBAIJ" 325 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 326 { 327 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 328 MatScalar value; 329 PetscTruth roworiented = baij->roworiented; 330 int ierr,i,j,row,col; 331 int rstart_orig=baij->rstart_bs; 332 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 333 int cend_orig=baij->cend_bs,bs=baij->bs; 334 335 /* Some Variables required in the macro */ 336 Mat A = baij->A; 337 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 338 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 339 MatScalar *aa=a->a; 340 341 Mat B = baij->B; 342 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 343 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 344 MatScalar *ba=b->a; 345 346 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 347 int low,high,t,ridx,cidx,bs2=a->bs2; 348 MatScalar *ap,*bap; 349 350 /* for stash */ 351 int n_loc, *in_loc=0; 352 MatScalar *v_loc=0; 353 354 PetscFunctionBegin; 355 356 if(!baij->donotstash){ 357 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 358 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 359 } 360 361 for (i=0; i<m; i++) { 362 if (im[i] < 0) continue; 363 #if defined(PETSC_USE_BOPT_g) 364 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 365 #endif 366 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 367 row = im[i] - rstart_orig; /* local row index */ 368 for (j=0; j<n; j++) { 369 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 370 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 371 col = in[j] - cstart_orig; /* local col index */ 372 brow = row/bs; bcol = col/bs; 373 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 374 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 375 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 376 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 377 } else if (in[j] < 0) continue; 378 #if defined(PETSC_USE_BOPT_g) 379 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 380 #endif 381 else { /* off-diag entry (B) */ 382 if (mat->was_assembled) { 383 if (!baij->colmap) { 384 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 385 } 386 #if defined (PETSC_USE_CTABLE) 387 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 388 col = col - 1; 389 #else 390 col = baij->colmap[in[j]/bs] - 1; 391 #endif 392 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 393 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 394 col = in[j]; 395 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 396 B = baij->B; 397 b = (Mat_SeqBAIJ*)(B)->data; 398 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 399 ba=b->a; 400 } else col += in[j]%bs; 401 } else col = in[j]; 402 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 403 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 404 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 405 } 406 } 407 } else { /* off processor entry */ 408 if (!baij->donotstash) { 409 n_loc = 0; 410 for (j=0; j<n; j++){ 411 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 412 in_loc[n_loc] = in[j]; 413 if (roworiented) { 414 v_loc[n_loc] = v[i*n+j]; 415 } else { 416 v_loc[n_loc] = v[j*m+i]; 417 } 418 n_loc++; 419 } 420 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 421 } 422 } 423 } 424 425 if(!baij->donotstash){ 426 ierr = PetscFree(in_loc);CHKERRQ(ierr); 427 ierr = PetscFree(v_loc);CHKERRQ(ierr); 428 } 429 PetscFunctionReturn(0); 430 } 431 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 434 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 435 { 436 PetscFunctionBegin; 437 SETERRQ(1,"Function not yet written for SBAIJ format"); 438 /* PetscFunctionReturn(0); */ 439 } 440 441 #define HASH_KEY 0.6180339887 442 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 443 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 444 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 447 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 448 { 449 PetscFunctionBegin; 450 SETERRQ(1,"Function not yet written for SBAIJ format"); 451 /* PetscFunctionReturn(0); */ 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 456 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 457 { 458 PetscFunctionBegin; 459 SETERRQ(1,"Function not yet written for SBAIJ format"); 460 /* PetscFunctionReturn(0); */ 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatGetValues_MPISBAIJ" 465 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 466 { 467 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 468 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 469 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 470 471 PetscFunctionBegin; 472 for (i=0; i<m; i++) { 473 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 474 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 475 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 476 row = idxm[i] - bsrstart; 477 for (j=0; j<n; j++) { 478 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 479 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 480 if (idxn[j] >= bscstart && idxn[j] < bscend){ 481 col = idxn[j] - bscstart; 482 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 483 } else { 484 if (!baij->colmap) { 485 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 486 } 487 #if defined (PETSC_USE_CTABLE) 488 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 489 data --; 490 #else 491 data = baij->colmap[idxn[j]/bs]-1; 492 #endif 493 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 494 else { 495 col = data + idxn[j]%bs; 496 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 497 } 498 } 499 } 500 } else { 501 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 502 } 503 } 504 PetscFunctionReturn(0); 505 } 506 507 #undef __FUNCT__ 508 #define __FUNCT__ "MatNorm_MPISBAIJ" 509 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 510 { 511 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 512 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 513 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 514 int ierr; 515 PetscReal sum[2],*lnorm2; 516 517 PetscFunctionBegin; 518 if (baij->size == 1) { 519 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 520 } else { 521 if (type == NORM_FROBENIUS) { 522 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 523 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 524 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 525 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 526 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 527 /* 528 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 529 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 530 */ 531 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 532 /* 533 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 534 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 535 536 *norm = sqrt(sum[0] + 2*sum[1]); 537 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 538 } else { 539 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 540 } 541 } 542 PetscFunctionReturn(0); 543 } 544 545 /* 546 Creates the hash table, and sets the table 547 This table is created only once. 548 If new entried need to be added to the matrix 549 then the hash table has to be destroyed and 550 recreated. 551 */ 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 554 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 555 { 556 PetscFunctionBegin; 557 SETERRQ(1,"Function not yet written for SBAIJ format"); 558 /* PetscFunctionReturn(0); */ 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 563 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 564 { 565 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 566 int ierr,nstash,reallocs; 567 InsertMode addv; 568 569 PetscFunctionBegin; 570 if (baij->donotstash) { 571 PetscFunctionReturn(0); 572 } 573 574 /* make sure all processors are either in INSERTMODE or ADDMODE */ 575 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 576 if (addv == (ADD_VALUES|INSERT_VALUES)) { 577 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 578 } 579 mat->insertmode = addv; /* in case this processor had no cache */ 580 581 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 582 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 583 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 584 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 585 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 586 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 592 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 593 { 594 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 595 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 596 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 597 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 598 int *row,*col,other_disassembled; 599 PetscTruth r1,r2,r3; 600 MatScalar *val; 601 InsertMode addv = mat->insertmode; 602 #if defined(PETSC_HAVE_SPOOLES) 603 PetscTruth flag; 604 #endif 605 606 PetscFunctionBegin; 607 608 if (!baij->donotstash) { 609 while (1) { 610 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 611 /* 612 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 613 PetscSynchronizedFlush(PETSC_COMM_WORLD); 614 */ 615 if (!flg) break; 616 617 for (i=0; i<n;) { 618 /* Now identify the consecutive vals belonging to the same row */ 619 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 620 if (j < n) ncols = j-i; 621 else ncols = n-i; 622 /* Now assemble all these values with a single function call */ 623 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 624 i = j; 625 } 626 } 627 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 628 /* Now process the block-stash. Since the values are stashed column-oriented, 629 set the roworiented flag to column oriented, and after MatSetValues() 630 restore the original flags */ 631 r1 = baij->roworiented; 632 r2 = a->roworiented; 633 r3 = b->roworiented; 634 baij->roworiented = PETSC_FALSE; 635 a->roworiented = PETSC_FALSE; 636 b->roworiented = PETSC_FALSE; 637 while (1) { 638 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 639 if (!flg) break; 640 641 for (i=0; i<n;) { 642 /* Now identify the consecutive vals belonging to the same row */ 643 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 644 if (j < n) ncols = j-i; 645 else ncols = n-i; 646 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 647 i = j; 648 } 649 } 650 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 651 baij->roworiented = r1; 652 a->roworiented = r2; 653 b->roworiented = r3; 654 } 655 656 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 657 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 658 659 /* determine if any processor has disassembled, if so we must 660 also disassemble ourselfs, in order that we may reassemble. */ 661 /* 662 if nonzero structure of submatrix B cannot change then we know that 663 no processor disassembled thus we can skip this stuff 664 */ 665 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 666 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 667 if (mat->was_assembled && !other_disassembled) { 668 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 669 } 670 } 671 672 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 673 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 674 } 675 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 676 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 677 678 #if defined(PETSC_USE_BOPT_g) 679 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 680 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 681 baij->ht_total_ct = 0; 682 baij->ht_insert_ct = 0; 683 } 684 #endif 685 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 686 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 687 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 688 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 689 } 690 691 if (baij->rowvalues) { 692 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 693 baij->rowvalues = 0; 694 } 695 696 #if defined(PETSC_HAVE_SPOOLES) 697 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 698 if (flag) { ierr = MatUseSpooles_MPISBAIJ(mat);CHKERRQ(ierr); } 699 #endif 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 705 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 706 { 707 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 708 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 709 PetscTruth isascii,isdraw; 710 PetscViewer sviewer; 711 PetscViewerFormat format; 712 713 PetscFunctionBegin; 714 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 715 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 716 if (isascii) { 717 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 718 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 719 MatInfo info; 720 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 721 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 722 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 723 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 724 baij->bs,(int)info.memory);CHKERRQ(ierr); 725 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 726 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 727 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 728 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 729 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 730 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } else if (format == PETSC_VIEWER_ASCII_INFO) { 733 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 734 PetscFunctionReturn(0); 735 } 736 } 737 738 if (isdraw) { 739 PetscDraw draw; 740 PetscTruth isnull; 741 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 742 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 743 } 744 745 if (size == 1) { 746 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 747 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 748 } else { 749 /* assemble the entire matrix onto first processor. */ 750 Mat A; 751 Mat_SeqSBAIJ *Aloc; 752 Mat_SeqBAIJ *Bloc; 753 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 754 MatScalar *a; 755 756 if (!rank) { 757 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 758 } else { 759 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 760 } 761 PetscLogObjectParent(mat,A); 762 763 /* copy over the A part */ 764 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 765 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 766 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 767 768 for (i=0; i<mbs; i++) { 769 rvals[0] = bs*(baij->rstart + i); 770 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 771 for (j=ai[i]; j<ai[i+1]; j++) { 772 col = (baij->cstart+aj[j])*bs; 773 for (k=0; k<bs; k++) { 774 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 775 col++; a += bs; 776 } 777 } 778 } 779 /* copy over the B part */ 780 Bloc = (Mat_SeqBAIJ*)baij->B->data; 781 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 782 for (i=0; i<mbs; i++) { 783 rvals[0] = bs*(baij->rstart + i); 784 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 785 for (j=ai[i]; j<ai[i+1]; j++) { 786 col = baij->garray[aj[j]]*bs; 787 for (k=0; k<bs; k++) { 788 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 789 col++; a += bs; 790 } 791 } 792 } 793 ierr = PetscFree(rvals);CHKERRQ(ierr); 794 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 795 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 796 /* 797 Everyone has to call to draw the matrix since the graphics waits are 798 synchronized across all processors that share the PetscDraw object 799 */ 800 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 801 if (!rank) { 802 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 803 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 804 } 805 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 806 ierr = MatDestroy(A);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatView_MPISBAIJ" 813 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 814 { 815 int ierr; 816 PetscTruth isascii,isdraw,issocket,isbinary; 817 818 PetscFunctionBegin; 819 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 820 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 821 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 822 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 823 if (isascii || isdraw || issocket || isbinary) { 824 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 825 } else { 826 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 827 } 828 PetscFunctionReturn(0); 829 } 830 831 #undef __FUNCT__ 832 #define __FUNCT__ "MatDestroy_MPISBAIJ" 833 int MatDestroy_MPISBAIJ(Mat mat) 834 { 835 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 836 int ierr; 837 838 PetscFunctionBegin; 839 #if defined(PETSC_USE_LOG) 840 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 841 #endif 842 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 843 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 844 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 845 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 846 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 847 #if defined (PETSC_USE_CTABLE) 848 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 849 #else 850 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 851 #endif 852 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 853 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 854 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 855 if (baij->slvec0) { 856 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 857 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 858 } 859 if (baij->slvec1) { 860 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 861 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 862 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 863 } 864 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 865 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 866 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 867 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 868 #if defined(PETSC_USE_MAT_SINGLE) 869 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 870 #endif 871 ierr = PetscFree(baij);CHKERRQ(ierr); 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "MatMult_MPISBAIJ" 877 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 878 { 879 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 880 int ierr,nt,mbs=a->mbs,bs=a->bs; 881 PetscScalar *x,*from,zero=0.0; 882 883 PetscFunctionBegin; 884 /* 885 PetscSynchronizedPrintf(PETSC_COMM_WORLD," _1comm is called ...\n"); 886 PetscSynchronizedFlush(PETSC_COMM_WORLD); 887 */ 888 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 889 if (nt != A->n) { 890 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 891 } 892 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 893 if (nt != A->m) { 894 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 895 } 896 897 /* diagonal part */ 898 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 899 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 900 901 /* subdiagonal part */ 902 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 903 904 /* copy x into the vec slvec0 */ 905 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 906 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 907 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 908 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 909 910 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 911 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 912 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 913 914 /* supperdiagonal part */ 915 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 916 917 PetscFunctionReturn(0); 918 } 919 920 #undef __FUNCT__ 921 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 922 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 923 { 924 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 925 int ierr,nt; 926 927 PetscFunctionBegin; 928 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 929 if (nt != A->n) { 930 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 931 } 932 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 933 if (nt != A->m) { 934 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 935 } 936 937 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 938 /* do diagonal part */ 939 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 940 /* do supperdiagonal part */ 941 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 942 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 943 /* do subdiagonal part */ 944 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 945 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 946 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 947 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 953 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 954 { 955 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 956 int ierr,mbs=a->mbs,bs=a->bs; 957 PetscScalar *x,*from,zero=0.0; 958 959 PetscFunctionBegin; 960 /* 961 PetscSynchronizedPrintf(PETSC_COMM_WORLD," MatMultAdd is called ...\n"); 962 PetscSynchronizedFlush(PETSC_COMM_WORLD); 963 */ 964 /* diagonal part */ 965 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 966 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 967 968 /* subdiagonal part */ 969 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 970 971 /* copy x into the vec slvec0 */ 972 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 973 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 974 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 975 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 976 977 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 978 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 979 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 980 981 /* supperdiagonal part */ 982 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 983 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 989 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 990 { 991 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 992 int ierr; 993 994 PetscFunctionBegin; 995 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 996 /* do diagonal part */ 997 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 998 /* do supperdiagonal part */ 999 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1000 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1001 1002 /* do subdiagonal part */ 1003 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1004 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1006 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1012 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1013 { 1014 PetscFunctionBegin; 1015 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1016 /* PetscFunctionReturn(0); */ 1017 } 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1021 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1022 { 1023 PetscFunctionBegin; 1024 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1025 /* PetscFunctionReturn(0); */ 1026 } 1027 1028 /* 1029 This only works correctly for square matrices where the subblock A->A is the 1030 diagonal block 1031 */ 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1034 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1035 { 1036 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1037 int ierr; 1038 1039 PetscFunctionBegin; 1040 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1041 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatScale_MPISBAIJ" 1047 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A) 1048 { 1049 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1050 int ierr; 1051 1052 PetscFunctionBegin; 1053 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1054 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1060 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1061 { 1062 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1063 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1064 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1065 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1066 int *cmap,*idx_p,cstart = mat->cstart; 1067 1068 PetscFunctionBegin; 1069 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1070 mat->getrowactive = PETSC_TRUE; 1071 1072 if (!mat->rowvalues && (idx || v)) { 1073 /* 1074 allocate enough space to hold information from the longest row. 1075 */ 1076 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1077 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1078 int max = 1,mbs = mat->mbs,tmp; 1079 for (i=0; i<mbs; i++) { 1080 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1081 if (max < tmp) { max = tmp; } 1082 } 1083 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1084 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1085 } 1086 1087 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1088 lrow = row - brstart; /* local row index */ 1089 1090 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1091 if (!v) {pvA = 0; pvB = 0;} 1092 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1093 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1094 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1095 nztot = nzA + nzB; 1096 1097 cmap = mat->garray; 1098 if (v || idx) { 1099 if (nztot) { 1100 /* Sort by increasing column numbers, assuming A and B already sorted */ 1101 int imark = -1; 1102 if (v) { 1103 *v = v_p = mat->rowvalues; 1104 for (i=0; i<nzB; i++) { 1105 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1106 else break; 1107 } 1108 imark = i; 1109 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1110 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1111 } 1112 if (idx) { 1113 *idx = idx_p = mat->rowindices; 1114 if (imark > -1) { 1115 for (i=0; i<imark; i++) { 1116 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1117 } 1118 } else { 1119 for (i=0; i<nzB; i++) { 1120 if (cmap[cworkB[i]/bs] < cstart) 1121 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1122 else break; 1123 } 1124 imark = i; 1125 } 1126 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1127 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1128 } 1129 } else { 1130 if (idx) *idx = 0; 1131 if (v) *v = 0; 1132 } 1133 } 1134 *nz = nztot; 1135 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1136 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1142 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1143 { 1144 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1145 1146 PetscFunctionBegin; 1147 if (baij->getrowactive == PETSC_FALSE) { 1148 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1149 } 1150 baij->getrowactive = PETSC_FALSE; 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1156 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1157 { 1158 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1159 1160 PetscFunctionBegin; 1161 *bs = baij->bs; 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1167 int MatZeroEntries_MPISBAIJ(Mat A) 1168 { 1169 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1170 int ierr; 1171 1172 PetscFunctionBegin; 1173 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1174 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1180 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1181 { 1182 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1183 Mat A = a->A,B = a->B; 1184 int ierr; 1185 PetscReal isend[5],irecv[5]; 1186 1187 PetscFunctionBegin; 1188 info->block_size = (PetscReal)a->bs; 1189 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1190 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1191 isend[3] = info->memory; isend[4] = info->mallocs; 1192 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1193 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1194 isend[3] += info->memory; isend[4] += info->mallocs; 1195 if (flag == MAT_LOCAL) { 1196 info->nz_used = isend[0]; 1197 info->nz_allocated = isend[1]; 1198 info->nz_unneeded = isend[2]; 1199 info->memory = isend[3]; 1200 info->mallocs = isend[4]; 1201 } else if (flag == MAT_GLOBAL_MAX) { 1202 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1203 info->nz_used = irecv[0]; 1204 info->nz_allocated = irecv[1]; 1205 info->nz_unneeded = irecv[2]; 1206 info->memory = irecv[3]; 1207 info->mallocs = irecv[4]; 1208 } else if (flag == MAT_GLOBAL_SUM) { 1209 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1210 info->nz_used = irecv[0]; 1211 info->nz_allocated = irecv[1]; 1212 info->nz_unneeded = irecv[2]; 1213 info->memory = irecv[3]; 1214 info->mallocs = irecv[4]; 1215 } else { 1216 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1217 } 1218 info->rows_global = (PetscReal)A->M; 1219 info->columns_global = (PetscReal)A->N; 1220 info->rows_local = (PetscReal)A->m; 1221 info->columns_local = (PetscReal)A->N; 1222 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1223 info->fill_ratio_needed = 0; 1224 info->factor_mallocs = 0; 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1230 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1231 { 1232 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1233 int ierr; 1234 1235 PetscFunctionBegin; 1236 switch (op) { 1237 case MAT_NO_NEW_NONZERO_LOCATIONS: 1238 case MAT_YES_NEW_NONZERO_LOCATIONS: 1239 case MAT_COLUMNS_UNSORTED: 1240 case MAT_COLUMNS_SORTED: 1241 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1242 case MAT_KEEP_ZEROED_ROWS: 1243 case MAT_NEW_NONZERO_LOCATION_ERR: 1244 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1245 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1246 break; 1247 case MAT_ROW_ORIENTED: 1248 a->roworiented = PETSC_TRUE; 1249 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1250 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1251 break; 1252 case MAT_ROWS_SORTED: 1253 case MAT_ROWS_UNSORTED: 1254 case MAT_YES_NEW_DIAGONALS: 1255 case MAT_USE_SINGLE_PRECISION_SOLVES: 1256 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1257 break; 1258 case MAT_COLUMN_ORIENTED: 1259 a->roworiented = PETSC_FALSE; 1260 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1261 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1262 break; 1263 case MAT_IGNORE_OFF_PROC_ENTRIES: 1264 a->donotstash = PETSC_TRUE; 1265 break; 1266 case MAT_NO_NEW_DIAGONALS: 1267 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1268 case MAT_USE_HASH_TABLE: 1269 a->ht_flag = PETSC_TRUE; 1270 break; 1271 default: 1272 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1273 } 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatTranspose_MPISBAIJ(" 1279 int MatTranspose_MPISBAIJ(Mat A,Mat *matout) 1280 { 1281 PetscFunctionBegin; 1282 SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called"); 1283 /* PetscFunctionReturn(0); */ 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1288 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1289 { 1290 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1291 Mat a = baij->A,b = baij->B; 1292 int ierr,s1,s2,s3; 1293 1294 PetscFunctionBegin; 1295 if (ll != rr) { 1296 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1297 } 1298 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1299 if (rr) { 1300 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1301 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1302 /* Overlap communication with computation. */ 1303 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1304 /*} if (ll) { */ 1305 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1306 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1307 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1308 /* } */ 1309 /* scale the diagonal block */ 1310 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1311 1312 /* if (rr) { */ 1313 /* Do a scatter end and then right scale the off-diagonal block */ 1314 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1315 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1316 } 1317 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1323 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag) 1324 { 1325 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1326 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1327 int *procs,*nprocs,j,idx,nsends,*work,row; 1328 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1329 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1330 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1331 MPI_Comm comm = A->comm; 1332 MPI_Request *send_waits,*recv_waits; 1333 MPI_Status recv_status,*send_status; 1334 IS istmp; 1335 PetscTruth found; 1336 1337 PetscFunctionBegin; 1338 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1339 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1340 1341 /* first count number of contributors to each processor */ 1342 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1343 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1344 procs = nprocs + size; 1345 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1346 for (i=0; i<N; i++) { 1347 idx = rows[i]; 1348 found = PETSC_FALSE; 1349 for (j=0; j<size; j++) { 1350 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1351 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 1352 } 1353 } 1354 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1355 } 1356 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1357 1358 /* inform other processors of number of messages and max length*/ 1359 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 1360 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1361 nmax = work[rank]; 1362 nrecvs = work[size+rank]; 1363 ierr = PetscFree(work);CHKERRQ(ierr); 1364 1365 /* post receives: */ 1366 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1367 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1368 for (i=0; i<nrecvs; i++) { 1369 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1370 } 1371 1372 /* do sends: 1373 1) starts[i] gives the starting index in svalues for stuff going to 1374 the ith processor 1375 */ 1376 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1377 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1378 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1379 starts[0] = 0; 1380 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1381 for (i=0; i<N; i++) { 1382 svalues[starts[owner[i]]++] = rows[i]; 1383 } 1384 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1385 1386 starts[0] = 0; 1387 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1388 count = 0; 1389 for (i=0; i<size; i++) { 1390 if (procs[i]) { 1391 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1392 } 1393 } 1394 ierr = PetscFree(starts);CHKERRQ(ierr); 1395 1396 base = owners[rank]*bs; 1397 1398 /* wait on receives */ 1399 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1400 source = lens + nrecvs; 1401 count = nrecvs; slen = 0; 1402 while (count) { 1403 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1404 /* unpack receives into our local space */ 1405 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1406 source[imdex] = recv_status.MPI_SOURCE; 1407 lens[imdex] = n; 1408 slen += n; 1409 count--; 1410 } 1411 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1412 1413 /* move the data into the send scatter */ 1414 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1415 count = 0; 1416 for (i=0; i<nrecvs; i++) { 1417 values = rvalues + i*nmax; 1418 for (j=0; j<lens[i]; j++) { 1419 lrows[count++] = values[j] - base; 1420 } 1421 } 1422 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1423 ierr = PetscFree(lens);CHKERRQ(ierr); 1424 ierr = PetscFree(owner);CHKERRQ(ierr); 1425 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1426 1427 /* actually zap the local rows */ 1428 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1429 PetscLogObjectParent(A,istmp); 1430 1431 /* 1432 Zero the required rows. If the "diagonal block" of the matrix 1433 is square and the user wishes to set the diagonal we use seperate 1434 code so that MatSetValues() is not called for each diagonal allocating 1435 new memory, thus calling lots of mallocs and slowing things down. 1436 1437 Contributed by: Mathew Knepley 1438 */ 1439 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1440 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1441 if (diag && (l->A->M == l->A->N)) { 1442 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1443 } else if (diag) { 1444 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1445 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1446 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1447 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1448 } 1449 for (i=0; i<slen; i++) { 1450 row = lrows[i] + rstart_bs; 1451 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1452 } 1453 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1454 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1455 } else { 1456 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1457 } 1458 1459 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1460 ierr = PetscFree(lrows);CHKERRQ(ierr); 1461 1462 /* wait on sends */ 1463 if (nsends) { 1464 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1465 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1466 ierr = PetscFree(send_status);CHKERRQ(ierr); 1467 } 1468 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1469 ierr = PetscFree(svalues);CHKERRQ(ierr); 1470 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1476 int MatPrintHelp_MPISBAIJ(Mat A) 1477 { 1478 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1479 MPI_Comm comm = A->comm; 1480 static int called = 0; 1481 int ierr; 1482 1483 PetscFunctionBegin; 1484 if (!a->rank) { 1485 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1486 } 1487 if (called) {PetscFunctionReturn(0);} else called = 1; 1488 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1489 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1495 int MatSetUnfactored_MPISBAIJ(Mat A) 1496 { 1497 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1498 int ierr; 1499 1500 PetscFunctionBegin; 1501 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1502 PetscFunctionReturn(0); 1503 } 1504 1505 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "MatEqual_MPISBAIJ" 1509 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1510 { 1511 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1512 Mat a,b,c,d; 1513 PetscTruth flg; 1514 int ierr; 1515 1516 PetscFunctionBegin; 1517 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr); 1518 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1519 a = matA->A; b = matA->B; 1520 c = matB->A; d = matB->B; 1521 1522 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1523 if (flg == PETSC_TRUE) { 1524 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1525 } 1526 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1532 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1533 { 1534 int ierr; 1535 1536 PetscFunctionBegin; 1537 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 /* -------------------------------------------------------------------*/ 1541 static struct _MatOps MatOps_Values = { 1542 MatSetValues_MPISBAIJ, 1543 MatGetRow_MPISBAIJ, 1544 MatRestoreRow_MPISBAIJ, 1545 MatMult_MPISBAIJ, 1546 MatMultAdd_MPISBAIJ, 1547 MatMultTranspose_MPISBAIJ, 1548 MatMultTransposeAdd_MPISBAIJ, 1549 0, 1550 0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 MatRelax_MPISBAIJ, 1556 MatTranspose_MPISBAIJ, 1557 MatGetInfo_MPISBAIJ, 1558 MatEqual_MPISBAIJ, 1559 MatGetDiagonal_MPISBAIJ, 1560 MatDiagonalScale_MPISBAIJ, 1561 MatNorm_MPISBAIJ, 1562 MatAssemblyBegin_MPISBAIJ, 1563 MatAssemblyEnd_MPISBAIJ, 1564 0, 1565 MatSetOption_MPISBAIJ, 1566 MatZeroEntries_MPISBAIJ, 1567 MatZeroRows_MPISBAIJ, 1568 0, 1569 0, 1570 0, 1571 0, 1572 MatSetUpPreallocation_MPISBAIJ, 1573 0, 1574 0, 1575 0, 1576 0, 1577 MatDuplicate_MPISBAIJ, 1578 0, 1579 0, 1580 0, 1581 0, 1582 0, 1583 MatGetSubMatrices_MPISBAIJ, 1584 MatIncreaseOverlap_MPISBAIJ, 1585 MatGetValues_MPISBAIJ, 1586 0, 1587 MatPrintHelp_MPISBAIJ, 1588 MatScale_MPISBAIJ, 1589 0, 1590 0, 1591 0, 1592 MatGetBlockSize_MPISBAIJ, 1593 0, 1594 0, 1595 0, 1596 0, 1597 0, 1598 0, 1599 MatSetUnfactored_MPISBAIJ, 1600 0, 1601 MatSetValuesBlocked_MPISBAIJ, 1602 0, 1603 0, 1604 0, 1605 MatGetPetscMaps_Petsc, 1606 0, 1607 0, 1608 0, 1609 0, 1610 0, 1611 0, 1612 MatGetRowMax_MPISBAIJ}; 1613 1614 1615 EXTERN_C_BEGIN 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1618 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1619 { 1620 PetscFunctionBegin; 1621 *a = ((Mat_MPISBAIJ *)A->data)->A; 1622 *iscopy = PETSC_FALSE; 1623 PetscFunctionReturn(0); 1624 } 1625 EXTERN_C_END 1626 1627 EXTERN_C_BEGIN 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatCreate_MPISBAIJ" 1630 int MatCreate_MPISBAIJ(Mat B) 1631 { 1632 Mat_MPISBAIJ *b; 1633 int ierr; 1634 PetscTruth flg; 1635 1636 PetscFunctionBegin; 1637 1638 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1639 B->data = (void*)b; 1640 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1641 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1642 1643 B->ops->destroy = MatDestroy_MPISBAIJ; 1644 B->ops->view = MatView_MPISBAIJ; 1645 B->mapping = 0; 1646 B->factor = 0; 1647 B->assembled = PETSC_FALSE; 1648 1649 B->insertmode = NOT_SET_VALUES; 1650 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1651 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1652 1653 /* build local table of row and column ownerships */ 1654 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1655 b->cowners = b->rowners + b->size + 2; 1656 b->rowners_bs = b->cowners + b->size + 2; 1657 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1658 1659 /* build cache for off array entries formed */ 1660 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1661 b->donotstash = PETSC_FALSE; 1662 b->colmap = PETSC_NULL; 1663 b->garray = PETSC_NULL; 1664 b->roworiented = PETSC_TRUE; 1665 1666 #if defined(PETSC_USE_MAT_SINGLE) 1667 /* stuff for MatSetValues_XXX in single precision */ 1668 b->setvalueslen = 0; 1669 b->setvaluescopy = PETSC_NULL; 1670 #endif 1671 1672 /* stuff used in block assembly */ 1673 b->barray = 0; 1674 1675 /* stuff used for matrix vector multiply */ 1676 b->lvec = 0; 1677 b->Mvctx = 0; 1678 b->slvec0 = 0; 1679 b->slvec0b = 0; 1680 b->slvec1 = 0; 1681 b->slvec1a = 0; 1682 b->slvec1b = 0; 1683 b->sMvctx = 0; 1684 1685 /* stuff for MatGetRow() */ 1686 b->rowindices = 0; 1687 b->rowvalues = 0; 1688 b->getrowactive = PETSC_FALSE; 1689 1690 /* hash table stuff */ 1691 b->ht = 0; 1692 b->hd = 0; 1693 b->ht_size = 0; 1694 b->ht_flag = PETSC_FALSE; 1695 b->ht_fact = 0; 1696 b->ht_total_ct = 0; 1697 b->ht_insert_ct = 0; 1698 1699 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1700 if (flg) { 1701 PetscReal fact = 1.39; 1702 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1703 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1704 if (fact <= 1.0) fact = 1.39; 1705 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1706 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1707 } 1708 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1709 "MatStoreValues_MPISBAIJ", 1710 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1711 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1712 "MatRetrieveValues_MPISBAIJ", 1713 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1714 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1715 "MatGetDiagonalBlock_MPISBAIJ", 1716 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 EXTERN_C_END 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1723 /*@C 1724 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1725 the user should preallocate the matrix storage by setting the parameters 1726 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1727 performance can be increased by more than a factor of 50. 1728 1729 Collective on Mat 1730 1731 Input Parameters: 1732 + A - the matrix 1733 . bs - size of blockk 1734 . d_nz - number of block nonzeros per block row in diagonal portion of local 1735 submatrix (same for all local rows) 1736 . d_nnz - array containing the number of block nonzeros in the various block rows 1737 of the in diagonal portion of the local (possibly different for each block 1738 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1739 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1740 submatrix (same for all local rows). 1741 - o_nnz - array containing the number of nonzeros in the various block rows of the 1742 off-diagonal portion of the local submatrix (possibly different for 1743 each block row) or PETSC_NULL. 1744 1745 1746 Options Database Keys: 1747 . -mat_no_unroll - uses code that does not unroll the loops in the 1748 block calculations (much slower) 1749 . -mat_block_size - size of the blocks to use 1750 1751 Notes: 1752 1753 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1754 than it must be used on all processors that share the object for that argument. 1755 1756 Storage Information: 1757 For a square global matrix we define each processor's diagonal portion 1758 to be its local rows and the corresponding columns (a square submatrix); 1759 each processor's off-diagonal portion encompasses the remainder of the 1760 local matrix (a rectangular submatrix). 1761 1762 The user can specify preallocated storage for the diagonal part of 1763 the local submatrix with either d_nz or d_nnz (not both). Set 1764 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1765 memory allocation. Likewise, specify preallocated storage for the 1766 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1767 1768 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1769 the figure below we depict these three local rows and all columns (0-11). 1770 1771 .vb 1772 0 1 2 3 4 5 6 7 8 9 10 11 1773 ------------------- 1774 row 3 | o o o d d d o o o o o o 1775 row 4 | o o o d d d o o o o o o 1776 row 5 | o o o d d d o o o o o o 1777 ------------------- 1778 .ve 1779 1780 Thus, any entries in the d locations are stored in the d (diagonal) 1781 submatrix, and any entries in the o locations are stored in the 1782 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1783 stored simply in the MATSEQBAIJ format for compressed row storage. 1784 1785 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1786 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1787 In general, for PDE problems in which most nonzeros are near the diagonal, 1788 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1789 or you will get TERRIBLE performance; see the users' manual chapter on 1790 matrices. 1791 1792 Level: intermediate 1793 1794 .keywords: matrix, block, aij, compressed row, sparse, parallel 1795 1796 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1797 @*/ 1798 1799 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1800 { 1801 Mat_MPISBAIJ *b; 1802 int ierr,i,mbs,Mbs; 1803 PetscTruth flg2; 1804 1805 PetscFunctionBegin; 1806 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr); 1807 if (!flg2) PetscFunctionReturn(0); 1808 1809 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1810 1811 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1812 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1813 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1814 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1815 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1816 if (d_nnz) { 1817 for (i=0; i<B->m/bs; i++) { 1818 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1819 } 1820 } 1821 if (o_nnz) { 1822 for (i=0; i<B->m/bs; i++) { 1823 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1824 } 1825 } 1826 B->preallocated = PETSC_TRUE; 1827 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1828 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1829 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1830 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1831 1832 b = (Mat_MPISBAIJ*)B->data; 1833 mbs = B->m/bs; 1834 Mbs = B->M/bs; 1835 if (mbs*bs != B->m) { 1836 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1837 } 1838 1839 b->bs = bs; 1840 b->bs2 = bs*bs; 1841 b->mbs = mbs; 1842 b->nbs = mbs; 1843 b->Mbs = Mbs; 1844 b->Nbs = Mbs; 1845 1846 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1847 b->rowners[0] = 0; 1848 for (i=2; i<=b->size; i++) { 1849 b->rowners[i] += b->rowners[i-1]; 1850 } 1851 b->rstart = b->rowners[b->rank]; 1852 b->rend = b->rowners[b->rank+1]; 1853 b->cstart = b->rstart; 1854 b->cend = b->rend; 1855 for (i=0; i<=b->size; i++) { 1856 b->rowners_bs[i] = b->rowners[i]*bs; 1857 } 1858 b->rstart_bs = b-> rstart*bs; 1859 b->rend_bs = b->rend*bs; 1860 1861 b->cstart_bs = b->cstart*bs; 1862 b->cend_bs = b->cend*bs; 1863 1864 1865 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1866 PetscLogObjectParent(B,b->A); 1867 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1868 PetscLogObjectParent(B,b->B); 1869 1870 /* build cache for off array entries formed */ 1871 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1872 1873 PetscFunctionReturn(0); 1874 } 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatCreateMPISBAIJ" 1878 /*@C 1879 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1880 (block compressed row). For good matrix assembly performance 1881 the user should preallocate the matrix storage by setting the parameters 1882 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1883 performance can be increased by more than a factor of 50. 1884 1885 Collective on MPI_Comm 1886 1887 Input Parameters: 1888 + comm - MPI communicator 1889 . bs - size of blockk 1890 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1891 This value should be the same as the local size used in creating the 1892 y vector for the matrix-vector product y = Ax. 1893 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1894 This value should be the same as the local size used in creating the 1895 x vector for the matrix-vector product y = Ax. 1896 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1897 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1898 . d_nz - number of block nonzeros per block row in diagonal portion of local 1899 submatrix (same for all local rows) 1900 . d_nnz - array containing the number of block nonzeros in the various block rows 1901 of the in diagonal portion of the local (possibly different for each block 1902 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1903 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1904 submatrix (same for all local rows). 1905 - o_nnz - array containing the number of nonzeros in the various block rows of the 1906 off-diagonal portion of the local submatrix (possibly different for 1907 each block row) or PETSC_NULL. 1908 1909 Output Parameter: 1910 . A - the matrix 1911 1912 Options Database Keys: 1913 . -mat_no_unroll - uses code that does not unroll the loops in the 1914 block calculations (much slower) 1915 . -mat_block_size - size of the blocks to use 1916 . -mat_mpi - use the parallel matrix data structures even on one processor 1917 (defaults to using SeqBAIJ format on one processor) 1918 1919 Notes: 1920 The user MUST specify either the local or global matrix dimensions 1921 (possibly both). 1922 1923 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1924 than it must be used on all processors that share the object for that argument. 1925 1926 Storage Information: 1927 For a square global matrix we define each processor's diagonal portion 1928 to be its local rows and the corresponding columns (a square submatrix); 1929 each processor's off-diagonal portion encompasses the remainder of the 1930 local matrix (a rectangular submatrix). 1931 1932 The user can specify preallocated storage for the diagonal part of 1933 the local submatrix with either d_nz or d_nnz (not both). Set 1934 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1935 memory allocation. Likewise, specify preallocated storage for the 1936 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1937 1938 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1939 the figure below we depict these three local rows and all columns (0-11). 1940 1941 .vb 1942 0 1 2 3 4 5 6 7 8 9 10 11 1943 ------------------- 1944 row 3 | o o o d d d o o o o o o 1945 row 4 | o o o d d d o o o o o o 1946 row 5 | o o o d d d o o o o o o 1947 ------------------- 1948 .ve 1949 1950 Thus, any entries in the d locations are stored in the d (diagonal) 1951 submatrix, and any entries in the o locations are stored in the 1952 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1953 stored simply in the MATSEQBAIJ format for compressed row storage. 1954 1955 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1956 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1957 In general, for PDE problems in which most nonzeros are near the diagonal, 1958 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1959 or you will get TERRIBLE performance; see the users' manual chapter on 1960 matrices. 1961 1962 Level: intermediate 1963 1964 .keywords: matrix, block, aij, compressed row, sparse, parallel 1965 1966 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1967 @*/ 1968 1969 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1970 { 1971 int ierr,size; 1972 1973 PetscFunctionBegin; 1974 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1975 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1976 if (size > 1) { 1977 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1978 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1979 } else { 1980 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1981 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1982 } 1983 PetscFunctionReturn(0); 1984 } 1985 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1989 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1990 { 1991 Mat mat; 1992 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1993 int ierr,len=0; 1994 1995 PetscFunctionBegin; 1996 *newmat = 0; 1997 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1998 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 1999 mat->preallocated = PETSC_TRUE; 2000 a = (Mat_MPISBAIJ*)mat->data; 2001 a->bs = oldmat->bs; 2002 a->bs2 = oldmat->bs2; 2003 a->mbs = oldmat->mbs; 2004 a->nbs = oldmat->nbs; 2005 a->Mbs = oldmat->Mbs; 2006 a->Nbs = oldmat->Nbs; 2007 2008 a->rstart = oldmat->rstart; 2009 a->rend = oldmat->rend; 2010 a->cstart = oldmat->cstart; 2011 a->cend = oldmat->cend; 2012 a->size = oldmat->size; 2013 a->rank = oldmat->rank; 2014 a->donotstash = oldmat->donotstash; 2015 a->roworiented = oldmat->roworiented; 2016 a->rowindices = 0; 2017 a->rowvalues = 0; 2018 a->getrowactive = PETSC_FALSE; 2019 a->barray = 0; 2020 a->rstart_bs = oldmat->rstart_bs; 2021 a->rend_bs = oldmat->rend_bs; 2022 a->cstart_bs = oldmat->cstart_bs; 2023 a->cend_bs = oldmat->cend_bs; 2024 2025 /* hash table stuff */ 2026 a->ht = 0; 2027 a->hd = 0; 2028 a->ht_size = 0; 2029 a->ht_flag = oldmat->ht_flag; 2030 a->ht_fact = oldmat->ht_fact; 2031 a->ht_total_ct = 0; 2032 a->ht_insert_ct = 0; 2033 2034 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 2035 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2036 a->cowners = a->rowners + a->size + 2; 2037 a->rowners_bs = a->cowners + a->size + 2; 2038 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2039 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2040 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2041 if (oldmat->colmap) { 2042 #if defined (PETSC_USE_CTABLE) 2043 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2044 #else 2045 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2046 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2047 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2048 #endif 2049 } else a->colmap = 0; 2050 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2051 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2052 PetscLogObjectMemory(mat,len*sizeof(int)); 2053 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2054 } else a->garray = 0; 2055 2056 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2057 PetscLogObjectParent(mat,a->lvec); 2058 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2059 2060 PetscLogObjectParent(mat,a->Mvctx); 2061 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2062 PetscLogObjectParent(mat,a->A); 2063 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2064 PetscLogObjectParent(mat,a->B); 2065 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2066 *newmat = mat; 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #include "petscsys.h" 2071 2072 EXTERN_C_BEGIN 2073 #undef __FUNCT__ 2074 #define __FUNCT__ "MatLoad_MPISBAIJ" 2075 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2076 { 2077 Mat A; 2078 int i,nz,ierr,j,rstart,rend,fd; 2079 PetscScalar *vals,*buf; 2080 MPI_Comm comm = ((PetscObject)viewer)->comm; 2081 MPI_Status status; 2082 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2083 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2084 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2085 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2086 int dcount,kmax,k,nzcount,tmp; 2087 2088 PetscFunctionBegin; 2089 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2090 2091 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2092 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2093 if (!rank) { 2094 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2095 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2096 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2097 if (header[3] < 0) { 2098 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2099 } 2100 } 2101 2102 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2103 M = header[1]; N = header[2]; 2104 2105 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2106 2107 /* 2108 This code adds extra rows to make sure the number of rows is 2109 divisible by the blocksize 2110 */ 2111 Mbs = M/bs; 2112 extra_rows = bs - M + bs*(Mbs); 2113 if (extra_rows == bs) extra_rows = 0; 2114 else Mbs++; 2115 if (extra_rows &&!rank) { 2116 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2117 } 2118 2119 /* determine ownership of all rows */ 2120 mbs = Mbs/size + ((Mbs % size) > rank); 2121 m = mbs*bs; 2122 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2123 browners = rowners + size + 1; 2124 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2125 rowners[0] = 0; 2126 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2127 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2128 rstart = rowners[rank]; 2129 rend = rowners[rank+1]; 2130 2131 /* distribute row lengths to all processors */ 2132 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2133 if (!rank) { 2134 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2135 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2136 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2137 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2138 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2139 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2140 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2141 } else { 2142 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2143 } 2144 2145 if (!rank) { /* procs[0] */ 2146 /* calculate the number of nonzeros on each processor */ 2147 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2148 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2149 for (i=0; i<size; i++) { 2150 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2151 procsnz[i] += rowlengths[j]; 2152 } 2153 } 2154 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2155 2156 /* determine max buffer needed and allocate it */ 2157 maxnz = 0; 2158 for (i=0; i<size; i++) { 2159 maxnz = PetscMax(maxnz,procsnz[i]); 2160 } 2161 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2162 2163 /* read in my part of the matrix column indices */ 2164 nz = procsnz[0]; 2165 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2166 mycols = ibuf; 2167 if (size == 1) nz -= extra_rows; 2168 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2169 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2170 2171 /* read in every ones (except the last) and ship off */ 2172 for (i=1; i<size-1; i++) { 2173 nz = procsnz[i]; 2174 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2175 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2176 } 2177 /* read in the stuff for the last proc */ 2178 if (size != 1) { 2179 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2180 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2181 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2182 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2183 } 2184 ierr = PetscFree(cols);CHKERRQ(ierr); 2185 } else { /* procs[i], i>0 */ 2186 /* determine buffer space needed for message */ 2187 nz = 0; 2188 for (i=0; i<m; i++) { 2189 nz += locrowlens[i]; 2190 } 2191 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2192 mycols = ibuf; 2193 /* receive message of column indices*/ 2194 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2195 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2196 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2197 } 2198 2199 /* loop over local rows, determining number of off diagonal entries */ 2200 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2201 odlens = dlens + (rend-rstart); 2202 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2203 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2204 masked1 = mask + Mbs; 2205 masked2 = masked1 + Mbs; 2206 rowcount = 0; nzcount = 0; 2207 for (i=0; i<mbs; i++) { 2208 dcount = 0; 2209 odcount = 0; 2210 for (j=0; j<bs; j++) { 2211 kmax = locrowlens[rowcount]; 2212 for (k=0; k<kmax; k++) { 2213 tmp = mycols[nzcount++]/bs; /* block col. index */ 2214 if (!mask[tmp]) { 2215 mask[tmp] = 1; 2216 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2217 else masked1[dcount++] = tmp; /* entry in diag portion */ 2218 } 2219 } 2220 rowcount++; 2221 } 2222 2223 dlens[i] = dcount; /* d_nzz[i] */ 2224 odlens[i] = odcount; /* o_nzz[i] */ 2225 2226 /* zero out the mask elements we set */ 2227 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2228 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2229 } 2230 2231 /* create our matrix */ 2232 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2233 CHKERRQ(ierr); 2234 A = *newmat; 2235 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2236 2237 if (!rank) { 2238 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2239 /* read in my part of the matrix numerical values */ 2240 nz = procsnz[0]; 2241 vals = buf; 2242 mycols = ibuf; 2243 if (size == 1) nz -= extra_rows; 2244 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2245 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2246 2247 /* insert into matrix */ 2248 jj = rstart*bs; 2249 for (i=0; i<m; i++) { 2250 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2251 mycols += locrowlens[i]; 2252 vals += locrowlens[i]; 2253 jj++; 2254 } 2255 2256 /* read in other processors (except the last one) and ship out */ 2257 for (i=1; i<size-1; i++) { 2258 nz = procsnz[i]; 2259 vals = buf; 2260 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2261 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2262 } 2263 /* the last proc */ 2264 if (size != 1){ 2265 nz = procsnz[i] - extra_rows; 2266 vals = buf; 2267 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2268 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2269 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2270 } 2271 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2272 2273 } else { 2274 /* receive numeric values */ 2275 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2276 2277 /* receive message of values*/ 2278 vals = buf; 2279 mycols = ibuf; 2280 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2281 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2282 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2283 2284 /* insert into matrix */ 2285 jj = rstart*bs; 2286 for (i=0; i<m; i++) { 2287 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2288 mycols += locrowlens[i]; 2289 vals += locrowlens[i]; 2290 jj++; 2291 } 2292 } 2293 2294 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2295 ierr = PetscFree(buf);CHKERRQ(ierr); 2296 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2297 ierr = PetscFree(rowners);CHKERRQ(ierr); 2298 ierr = PetscFree(dlens);CHKERRQ(ierr); 2299 ierr = PetscFree(mask);CHKERRQ(ierr); 2300 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2301 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2302 PetscFunctionReturn(0); 2303 } 2304 EXTERN_C_END 2305 2306 #undef __FUNCT__ 2307 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2308 /*@ 2309 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2310 2311 Input Parameters: 2312 . mat - the matrix 2313 . fact - factor 2314 2315 Collective on Mat 2316 2317 Level: advanced 2318 2319 Notes: 2320 This can also be set by the command line option: -mat_use_hash_table fact 2321 2322 .keywords: matrix, hashtable, factor, HT 2323 2324 .seealso: MatSetOption() 2325 @*/ 2326 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2327 { 2328 PetscFunctionBegin; 2329 SETERRQ(1,"Function not yet written for SBAIJ format"); 2330 /* PetscFunctionReturn(0); */ 2331 } 2332 2333 #undef __FUNCT__ 2334 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2335 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2336 { 2337 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2338 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2339 PetscReal atmp; 2340 PetscReal *work,*svalues,*rvalues; 2341 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2342 int rank,size,*rowners_bs,dest,count,source; 2343 PetscScalar *va; 2344 MatScalar *ba; 2345 MPI_Status stat; 2346 2347 PetscFunctionBegin; 2348 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2349 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2350 2351 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 2352 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 2353 2354 bs = a->bs; 2355 mbs = a->mbs; 2356 Mbs = a->Mbs; 2357 ba = b->a; 2358 bi = b->i; 2359 bj = b->j; 2360 /* 2361 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2362 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2363 */ 2364 2365 /* find ownerships */ 2366 rowners_bs = a->rowners_bs; 2367 /* 2368 if (!rank){ 2369 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2370 } 2371 */ 2372 2373 /* each proc creates an array to be distributed */ 2374 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2375 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2376 2377 /* row_max for B */ 2378 if (rank != size-1){ 2379 for (i=0; i<mbs; i++) { 2380 ncols = bi[1] - bi[0]; bi++; 2381 brow = bs*i; 2382 for (j=0; j<ncols; j++){ 2383 bcol = bs*(*bj); 2384 for (kcol=0; kcol<bs; kcol++){ 2385 col = bcol + kcol; /* local col index */ 2386 col += rowners_bs[rank+1]; /* global col index */ 2387 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2388 for (krow=0; krow<bs; krow++){ 2389 atmp = PetscAbsScalar(*ba); ba++; 2390 row = brow + krow; /* local row index */ 2391 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2392 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2393 if (work[col] < atmp) work[col] = atmp; 2394 } 2395 } 2396 bj++; 2397 } 2398 } 2399 /* 2400 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2401 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2402 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2403 */ 2404 2405 /* send values to its owners */ 2406 for (dest=rank+1; dest<size; dest++){ 2407 svalues = work + rowners_bs[dest]; 2408 count = rowners_bs[dest+1]-rowners_bs[dest]; 2409 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PETSC_COMM_WORLD);CHKERRQ(ierr); 2410 /* 2411 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2412 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2413 */ 2414 } 2415 } 2416 2417 /* receive values */ 2418 if (rank){ 2419 rvalues = work; 2420 count = rowners_bs[rank+1]-rowners_bs[rank]; 2421 for (source=0; source<rank; source++){ 2422 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);CHKERRQ(ierr); 2423 /* process values */ 2424 for (i=0; i<count; i++){ 2425 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2426 } 2427 /* 2428 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2429 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2430 */ 2431 } 2432 } 2433 2434 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2435 ierr = PetscFree(work);CHKERRQ(ierr); 2436 PetscFunctionReturn(0); 2437 } 2438 2439 #undef __FUNCT__ 2440 #define __FUNCT__ "MatRelax_MPISBAIJ" 2441 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2442 { 2443 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2444 int ierr,mbs=mat->mbs,bs=mat->bs; 2445 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2446 Vec bb1; 2447 2448 PetscFunctionBegin; 2449 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2450 if (bs > 1) 2451 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2452 2453 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2454 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2455 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2456 its--; 2457 } 2458 2459 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2460 while (its--){ 2461 2462 /* lower triangular part: slvec0b = - B^T*xx */ 2463 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2464 2465 /* copy xx into slvec0a */ 2466 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2467 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2468 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2469 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2470 2471 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2472 2473 /* copy bb into slvec1a */ 2474 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2475 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2476 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2477 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2478 2479 /* set slvec1b = 0 */ 2480 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2481 2482 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2483 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2484 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2485 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2486 2487 /* upper triangular part: bb1 = bb1 - B*x */ 2488 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2489 2490 /* local diagonal sweep */ 2491 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2492 } 2493 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2494 } else { 2495 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2496 } 2497 PetscFunctionReturn(0); 2498 } 2499 2500 #undef __FUNCT__ 2501 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2502 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2503 { 2504 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2505 int ierr; 2506 PetscScalar mone=-1.0; 2507 Vec lvec1,bb1; 2508 2509 PetscFunctionBegin; 2510 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2511 if (mat->bs > 1) 2512 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2513 2514 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2515 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2516 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2517 its--; 2518 } 2519 2520 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2521 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2522 while (its--){ 2523 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2524 2525 /* lower diagonal part: bb1 = bb - B^T*xx */ 2526 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2527 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2528 2529 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2530 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2531 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2532 2533 /* upper diagonal part: bb1 = bb1 - B*x */ 2534 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2535 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2536 2537 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2538 2539 /* diagonal sweep */ 2540 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2541 } 2542 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2543 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2544 } else { 2545 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2546 } 2547 PetscFunctionReturn(0); 2548 } 2549 2550