1 /*$Id: mpisbaij.c,v 1.16 2000/08/30 20:48:25 hzhang Exp hzhang $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int DisAssemble_MPISBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqSBAIJ(Mat); 18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*); 19 20 /* UGLY, ugly, ugly 21 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 22 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 23 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 24 converts the entries into single precision and then calls ..._MatScalar() to put them 25 into the single precision data structures. 26 */ 27 #if defined(PETSC_USE_MAT_SINGLE) 28 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 32 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 #else 34 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 35 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 36 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 37 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 38 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 39 #endif 40 41 EXTERN_C_BEGIN 42 #undef __FUNC__ 43 #define __FUNC__ /*<a name="MatStoreValues_MPISBAIJ"></a>*/"MatStoreValues_MPISBAIJ" 44 int MatStoreValues_MPISBAIJ(Mat mat) 45 { 46 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 47 int ierr; 48 49 PetscFunctionBegin; 50 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 51 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 52 PetscFunctionReturn(0); 53 } 54 EXTERN_C_END 55 56 EXTERN_C_BEGIN 57 #undef __FUNC__ 58 #define __FUNC__ /*<a name="MatRetrieveValues_MPISBAIJ"></a>*/"MatRetrieveValues_MPISBAIJ" 59 int MatRetrieveValues_MPISBAIJ(Mat mat) 60 { 61 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 62 int ierr; 63 64 PetscFunctionBegin; 65 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 66 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 71 /* 72 Local utility routine that creates a mapping from the global column 73 number to the local number in the off-diagonal part of the local 74 storage of the matrix. This is done in a non scable way since the 75 length of colmap equals the global matrix length. 76 */ 77 #undef __FUNC__ 78 #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private" 79 static int CreateColmap_MPISBAIJ_Private(Mat mat) 80 { 81 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 82 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 83 int nbs = B->nbs,i,bs=B->bs,ierr; 84 85 PetscFunctionBegin; 86 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 87 PetscFunctionReturn(0); 88 } 89 90 #define CHUNKSIZE 10 91 92 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 93 { \ 94 \ 95 brow = row/bs; \ 96 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 97 rmax = aimax[brow]; nrow = ailen[brow]; \ 98 bcol = col/bs; \ 99 ridx = row % bs; cidx = col % bs; \ 100 low = 0; high = nrow; \ 101 while (high-low > 3) { \ 102 t = (low+high)/2; \ 103 if (rp[t] > bcol) high = t; \ 104 else low = t; \ 105 } \ 106 for (_i=low; _i<high; _i++) { \ 107 if (rp[_i] > bcol) break; \ 108 if (rp[_i] == bcol) { \ 109 bap = ap + bs2*_i + bs*cidx + ridx; \ 110 if (addv == ADD_VALUES) *bap += value; \ 111 else *bap = value; \ 112 goto a_noinsert; \ 113 } \ 114 } \ 115 if (a->nonew == 1) goto a_noinsert; \ 116 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 117 if (nrow >= rmax) { \ 118 /* there is no extra room in row, therefore enlarge */ \ 119 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 120 MatScalar *new_a; \ 121 \ 122 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 123 \ 124 /* malloc new storage space */ \ 125 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 126 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 127 new_j = (int*)(new_a + bs2*new_nz); \ 128 new_i = new_j + new_nz; \ 129 \ 130 /* copy over old data into new slots */ \ 131 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 132 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 133 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 134 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 135 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 136 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 137 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 138 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 139 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 140 /* free up old matrix storage */ \ 141 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 142 if (!a->singlemalloc) { \ 143 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 144 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 145 } \ 146 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 147 a->singlemalloc = PETSC_TRUE; \ 148 \ 149 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 150 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 151 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 152 a->s_maxnz += bs2*CHUNKSIZE; \ 153 a->reallocs++; \ 154 a->s_nz++; \ 155 } \ 156 N = nrow++ - 1; \ 157 /* shift up all the later entries in this row */ \ 158 for (ii=N; ii>=_i; ii--) { \ 159 rp[ii+1] = rp[ii]; \ 160 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 161 } \ 162 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 163 rp[_i] = bcol; \ 164 ap[bs2*_i + bs*cidx + ridx] = value; \ 165 a_noinsert:; \ 166 ailen[brow] = nrow; \ 167 } 168 #ifndef MatSetValues_SeqBAIJ_B_Private 169 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 170 { \ 171 brow = row/bs; \ 172 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 173 rmax = bimax[brow]; nrow = bilen[brow]; \ 174 bcol = col/bs; \ 175 ridx = row % bs; cidx = col % bs; \ 176 low = 0; high = nrow; \ 177 while (high-low > 3) { \ 178 t = (low+high)/2; \ 179 if (rp[t] > bcol) high = t; \ 180 else low = t; \ 181 } \ 182 for (_i=low; _i<high; _i++) { \ 183 if (rp[_i] > bcol) break; \ 184 if (rp[_i] == bcol) { \ 185 bap = ap + bs2*_i + bs*cidx + ridx; \ 186 if (addv == ADD_VALUES) *bap += value; \ 187 else *bap = value; \ 188 goto b_noinsert; \ 189 } \ 190 } \ 191 if (b->nonew == 1) goto b_noinsert; \ 192 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 193 if (nrow >= rmax) { \ 194 /* there is no extra room in row, therefore enlarge */ \ 195 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 196 MatScalar *new_a; \ 197 \ 198 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 199 \ 200 /* malloc new storage space */ \ 201 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 202 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 203 new_j = (int*)(new_a + bs2*new_nz); \ 204 new_i = new_j + new_nz; \ 205 \ 206 /* copy over old data into new slots */ \ 207 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 208 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 209 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 210 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 211 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 212 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 213 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 214 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 215 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 216 /* free up old matrix storage */ \ 217 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 218 if (!b->singlemalloc) { \ 219 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 220 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 221 } \ 222 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 223 b->singlemalloc = PETSC_TRUE; \ 224 \ 225 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 226 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 227 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 228 b->maxnz += bs2*CHUNKSIZE; \ 229 b->reallocs++; \ 230 b->nz++; \ 231 } \ 232 N = nrow++ - 1; \ 233 /* shift up all the later entries in this row */ \ 234 for (ii=N; ii>=_i; ii--) { \ 235 rp[ii+1] = rp[ii]; \ 236 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 237 } \ 238 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 239 rp[_i] = bcol; \ 240 ap[bs2*_i + bs*cidx + ridx] = value; \ 241 b_noinsert:; \ 242 bilen[brow] = nrow; \ 243 } 244 #endif 245 246 #if defined(PETSC_USE_MAT_SINGLE) 247 #undef __FUNC__ 248 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ" 249 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 250 { 251 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 252 int ierr,i,N = m*n; 253 MatScalar *vsingle; 254 255 PetscFunctionBegin; 256 if (N > b->setvalueslen) { 257 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 258 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 259 b->setvalueslen = N; 260 } 261 vsingle = b->setvaluescopy; 262 263 for (i=0; i<N; i++) { 264 vsingle[i] = v[i]; 265 } 266 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNC__ 271 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 272 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 273 { 274 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 275 int ierr,i,N = m*n*b->bs2; 276 MatScalar *vsingle; 277 278 PetscFunctionBegin; 279 if (N > b->setvalueslen) { 280 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 281 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 282 b->setvalueslen = N; 283 } 284 vsingle = b->setvaluescopy; 285 for (i=0; i<N; i++) { 286 vsingle[i] = v[i]; 287 } 288 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNC__ 293 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT" 294 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 295 { 296 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 297 int ierr,i,N = m*n; 298 MatScalar *vsingle; 299 300 PetscFunctionBegin; 301 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 302 PetscFunctionReturn(0); 303 } 304 305 #undef __FUNC__ 306 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT" 307 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 308 { 309 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 310 int ierr,i,N = m*n*b->bs2; 311 MatScalar *vsingle; 312 313 PetscFunctionBegin; 314 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 315 PetscFunctionReturn(0); 316 } 317 #endif 318 319 /* Only add/insert a(i,j) with i<=j (blocks). 320 Any a(i,j) with i>j input by user is ingored. 321 */ 322 #undef __FUNC__ 323 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 324 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 325 { 326 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 327 MatScalar value; 328 int ierr,i,j,row,col; 329 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 330 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 331 int cend_orig=baij->cend_bs,bs=baij->bs; 332 333 /* Some Variables required in the macro */ 334 Mat A = baij->A; 335 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 336 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 337 MatScalar *aa=a->a; 338 339 Mat B = baij->B; 340 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 341 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 342 MatScalar *ba=b->a; 343 344 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 345 int low,high,t,ridx,cidx,bs2=a->bs2; 346 MatScalar *ap,*bap; 347 348 /* for stash */ 349 int n_loc, *in_loc=0; 350 MatScalar *v_loc=0; 351 352 PetscFunctionBegin; 353 354 if(!baij->donotstash){ 355 in_loc = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(in_loc); 356 v_loc = (MatScalar*)PetscMalloc(n*sizeof(MatScalar));CHKPTRQ(v_loc); 357 } 358 359 for (i=0; i<m; i++) { 360 if (im[i] < 0) continue; 361 #if defined(PETSC_USE_BOPT_g) 362 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 363 #endif 364 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 365 row = im[i] - rstart_orig; /* local row index */ 366 for (j=0; j<n; j++) { 367 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 368 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 369 col = in[j] - cstart_orig; /* local col index */ 370 brow = row/bs; bcol = col/bs; 371 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 372 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 373 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 374 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 375 } else if (in[j] < 0) continue; 376 #if defined(PETSC_USE_BOPT_g) 377 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 378 #endif 379 else { /* off-diag entry (B) */ 380 if (mat->was_assembled) { 381 if (!baij->colmap) { 382 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 383 } 384 #if defined (PETSC_USE_CTABLE) 385 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 386 col = col - 1 + in[j]%bs; 387 #else 388 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 389 #endif 390 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 391 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 392 col = in[j]; 393 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 394 B = baij->B; 395 b = (Mat_SeqBAIJ*)(B)->data; 396 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 397 ba=b->a; 398 } 399 } else col = in[j]; 400 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 401 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 402 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 403 } 404 } 405 } else { /* off processor entry */ 406 if (!baij->donotstash) { 407 n_loc = 0; 408 for (j=0; j<n; j++){ 409 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 410 in_loc[n_loc] = in[j]; 411 if (roworiented) { 412 v_loc[n_loc] = v[i*n+j]; 413 } else { 414 v_loc[n_loc] = v[j*m+i]; 415 } 416 n_loc++; 417 } 418 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 419 } 420 } 421 } 422 423 if(!baij->donotstash){ 424 ierr = PetscFree(in_loc);CHKERRQ(ierr); 425 ierr = PetscFree(v_loc);CHKERRQ(ierr); 426 } 427 PetscFunctionReturn(0); 428 } 429 430 #undef __FUNC__ 431 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 432 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 433 { 434 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 435 MatScalar *value,*barray=baij->barray; 436 int ierr,i,j,ii,jj,row,col; 437 int roworiented = baij->roworiented,rstart=baij->rstart ; 438 int rend=baij->rend,cstart=baij->cstart,stepval; 439 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 440 441 PetscFunctionBegin; 442 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 443 PetscFunctionReturn(0); 444 } 445 446 #define HASH_KEY 0.6180339887 447 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 448 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 449 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 450 #undef __FUNC__ 451 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar" 452 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 453 { 454 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 455 int ierr,i,j,row,col; 456 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 457 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 458 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 459 PetscReal tmp; 460 MatScalar **HD = baij->hd,value; 461 #if defined(PETSC_USE_BOPT_g) 462 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 463 #endif 464 465 PetscFunctionBegin; 466 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 467 PetscFunctionReturn(0); 468 } 469 470 #undef __FUNC__ 471 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 472 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 473 { 474 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 475 int ierr,i,j,ii,jj,row,col; 476 int roworiented = baij->roworiented,rstart=baij->rstart ; 477 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 478 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 479 PetscReal tmp; 480 MatScalar **HD = baij->hd,*baij_a; 481 MatScalar *v_t,*value; 482 #if defined(PETSC_USE_BOPT_g) 483 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 484 #endif 485 486 PetscFunctionBegin; 487 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNC__ 492 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ" 493 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 494 { 495 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 496 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 497 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 498 499 PetscFunctionBegin; 500 for (i=0; i<m; i++) { 501 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 502 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 503 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 504 row = idxm[i] - bsrstart; 505 for (j=0; j<n; j++) { 506 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 507 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 508 if (idxn[j] >= bscstart && idxn[j] < bscend){ 509 col = idxn[j] - bscstart; 510 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 511 } else { 512 if (!baij->colmap) { 513 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 514 } 515 #if defined (PETSC_USE_CTABLE) 516 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 517 data --; 518 #else 519 data = baij->colmap[idxn[j]/bs]-1; 520 #endif 521 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 522 else { 523 col = data + idxn[j]%bs; 524 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 525 } 526 } 527 } 528 } else { 529 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 530 } 531 } 532 PetscFunctionReturn(0); 533 } 534 535 #undef __FUNC__ 536 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ" 537 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 538 { 539 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 540 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 541 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 542 int ierr; 543 PetscReal sum[2],*lnorm2; 544 545 PetscFunctionBegin; 546 if (baij->size == 1) { 547 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 548 } else { 549 if (type == NORM_FROBENIUS) { 550 lnorm2 = (double*)PetscMalloc(2*sizeof(double));CHKPTRQ(lnorm2); 551 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 552 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 553 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 554 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 555 /* 556 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 557 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 558 */ 559 ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 560 /* 561 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 562 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 563 564 *norm = sqrt(sum[0] + 2*sum[1]); 565 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 566 } else { 567 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 568 } 569 } 570 PetscFunctionReturn(0); 571 } 572 573 /* 574 Creates the hash table, and sets the table 575 This table is created only once. 576 If new entried need to be added to the matrix 577 then the hash table has to be destroyed and 578 recreated. 579 */ 580 #undef __FUNC__ 581 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private" 582 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 583 { 584 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 585 Mat A = baij->A,B=baij->B; 586 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 587 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 588 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 589 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 590 int *HT,key; 591 MatScalar **HD; 592 PetscReal tmp; 593 #if defined(PETSC_USE_BOPT_g) 594 int ct=0,max=0; 595 #endif 596 597 PetscFunctionBegin; 598 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 599 PetscFunctionReturn(0); 600 } 601 602 #undef __FUNC__ 603 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ" 604 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 605 { 606 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 607 int ierr,nstash,reallocs; 608 InsertMode addv; 609 610 PetscFunctionBegin; 611 if (baij->donotstash) { 612 PetscFunctionReturn(0); 613 } 614 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) { 618 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 619 } 620 mat->insertmode = addv; /* in case this processor had no cache */ 621 622 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 623 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 624 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 625 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNC__ 632 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ" 633 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 636 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 637 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 638 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 639 int *row,*col,other_disassembled; 640 PetscTruth r1,r2,r3; 641 MatScalar *val; 642 InsertMode addv = mat->insertmode; 643 int rank; 644 645 PetscFunctionBegin; 646 /* remove 2 line below later */ 647 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 648 649 if (!baij->donotstash) { 650 while (1) { 651 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 652 /* 653 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 654 PetscSynchronizedFlush(PETSC_COMM_WORLD); 655 */ 656 if (!flg) break; 657 658 for (i=0; i<n;) { 659 /* Now identify the consecutive vals belonging to the same row */ 660 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 661 if (j < n) ncols = j-i; 662 else ncols = n-i; 663 /* Now assemble all these values with a single function call */ 664 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 665 i = j; 666 } 667 } 668 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 669 /* Now process the block-stash. Since the values are stashed column-oriented, 670 set the roworiented flag to column oriented, and after MatSetValues() 671 restore the original flags */ 672 r1 = baij->roworiented; 673 r2 = a->roworiented; 674 r3 = b->roworiented; 675 baij->roworiented = PETSC_FALSE; 676 a->roworiented = PETSC_FALSE; 677 b->roworiented = PETSC_FALSE; 678 while (1) { 679 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 680 if (!flg) break; 681 682 for (i=0; i<n;) { 683 /* Now identify the consecutive vals belonging to the same row */ 684 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 685 if (j < n) ncols = j-i; 686 else ncols = n-i; 687 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 688 i = j; 689 } 690 } 691 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 692 baij->roworiented = r1; 693 a->roworiented = r2; 694 b->roworiented = r3; 695 } 696 697 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 698 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 699 700 /* determine if any processor has disassembled, if so we must 701 also disassemble ourselfs, in order that we may reassemble. */ 702 /* 703 if nonzero structure of submatrix B cannot change then we know that 704 no processor disassembled thus we can skip this stuff 705 */ 706 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 707 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 708 if (mat->was_assembled && !other_disassembled) { 709 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 710 } 711 } 712 713 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 714 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); 715 } 716 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 717 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 718 719 #if defined(PETSC_USE_BOPT_g) 720 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 721 PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 722 baij->ht_total_ct = 0; 723 baij->ht_insert_ct = 0; 724 } 725 #endif 726 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 727 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 728 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 729 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 730 } 731 732 if (baij->rowvalues) { 733 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 734 baij->rowvalues = 0; 735 } 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNC__ 740 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket" 741 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 742 { 743 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 744 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 745 PetscTruth isascii,isdraw; 746 Viewer sviewer; 747 748 PetscFunctionBegin; 749 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 750 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 751 if (isascii) { 752 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 753 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 754 MatInfo info; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 757 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 758 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 759 baij->bs,(int)info.memory);CHKERRQ(ierr); 760 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 761 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 762 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 763 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 764 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 765 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 766 PetscFunctionReturn(0); 767 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 768 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 } 772 773 if (isdraw) { 774 Draw draw; 775 PetscTruth isnull; 776 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 777 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 778 } 779 780 if (size == 1) { 781 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 782 } else { 783 /* assemble the entire matrix onto first processor. */ 784 Mat A; 785 Mat_SeqSBAIJ *Aloc; 786 Mat_SeqBAIJ *Bloc; 787 int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 788 MatScalar *a; 789 790 if (!rank) { 791 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 792 } else { 793 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 794 } 795 PLogObjectParent(mat,A); 796 797 /* copy over the A part */ 798 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 799 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 800 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 801 802 for (i=0; i<mbs; i++) { 803 rvals[0] = bs*(baij->rstart + i); 804 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 805 for (j=ai[i]; j<ai[i+1]; j++) { 806 col = (baij->cstart+aj[j])*bs; 807 for (k=0; k<bs; k++) { 808 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 809 col++; a += bs; 810 } 811 } 812 } 813 /* copy over the B part */ 814 Bloc = (Mat_SeqBAIJ*)baij->B->data; 815 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 816 for (i=0; i<mbs; i++) { 817 rvals[0] = bs*(baij->rstart + i); 818 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 819 for (j=ai[i]; j<ai[i+1]; j++) { 820 col = baij->garray[aj[j]]*bs; 821 for (k=0; k<bs; k++) { 822 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 823 col++; a += bs; 824 } 825 } 826 } 827 ierr = PetscFree(rvals);CHKERRQ(ierr); 828 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 829 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 830 /* 831 Everyone has to call to draw the matrix since the graphics waits are 832 synchronized across all processors that share the Draw object 833 */ 834 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 835 if (!rank) { 836 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 837 } 838 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 839 ierr = MatDestroy(A);CHKERRQ(ierr); 840 } 841 PetscFunctionReturn(0); 842 } 843 844 #undef __FUNC__ 845 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ" 846 int MatView_MPISBAIJ(Mat mat,Viewer viewer) 847 { 848 int ierr; 849 PetscTruth isascii,isdraw,issocket,isbinary; 850 851 PetscFunctionBegin; 852 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 853 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 854 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 855 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 856 if (isascii || isdraw || issocket || isbinary) { 857 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 858 } else { 859 SETERRQ1(1,1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 860 } 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNC__ 865 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ" 866 int MatDestroy_MPISBAIJ(Mat mat) 867 { 868 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 869 int ierr; 870 871 PetscFunctionBegin; 872 873 if (mat->mapping) { 874 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 875 } 876 if (mat->bmapping) { 877 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 878 } 879 if (mat->rmap) { 880 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 881 } 882 if (mat->cmap) { 883 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 884 } 885 #if defined(PETSC_USE_LOG) 886 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 887 #endif 888 889 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 890 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 891 892 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 893 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 894 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 895 #if defined (PETSC_USE_CTABLE) 896 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 897 #else 898 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 899 #endif 900 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 901 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 902 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 903 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 904 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 905 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 906 #if defined(PETSC_USE_MAT_SINGLE) 907 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 908 #endif 909 ierr = PetscFree(baij);CHKERRQ(ierr); 910 PLogObjectDestroy(mat); 911 PetscHeaderDestroy(mat); 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNC__ 916 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ" 917 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 918 { 919 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 920 int ierr,nt; 921 922 PetscFunctionBegin; 923 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 924 if (nt != a->n) { 925 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 926 } 927 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 928 if (nt != a->m) { 929 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 930 } 931 932 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 933 /* do diagonal part */ 934 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 935 /* do supperdiagonal part */ 936 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 937 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 938 /* do subdiagonal part */ 939 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 940 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 941 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 942 943 PetscFunctionReturn(0); 944 } 945 946 #undef __FUNC__ 947 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ" 948 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 949 { 950 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 951 int ierr; 952 953 PetscFunctionBegin; 954 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 955 /* do diagonal part */ 956 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 957 /* do supperdiagonal part */ 958 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 959 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 960 961 /* do subdiagonal part */ 962 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 963 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 964 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 965 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNC__ 970 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ" 971 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 972 { 973 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 974 int ierr; 975 976 PetscFunctionBegin; 977 SETERRQ(1,1,"Matrix is symmetric. Call MatMult()."); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNC__ 982 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ" 983 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 986 int ierr; 987 988 PetscFunctionBegin; 989 SETERRQ(1,1,"Matrix is symmetric. Call MatMultAdd()."); 990 PetscFunctionReturn(0); 991 } 992 993 /* 994 This only works correctly for square matrices where the subblock A->A is the 995 diagonal block 996 */ 997 #undef __FUNC__ 998 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ" 999 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1000 { 1001 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1002 int ierr; 1003 1004 PetscFunctionBegin; 1005 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); */ 1006 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNC__ 1011 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ" 1012 int MatScale_MPISBAIJ(Scalar *aa,Mat A) 1013 { 1014 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1015 int ierr; 1016 1017 PetscFunctionBegin; 1018 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1019 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 #undef __FUNC__ 1024 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ" 1025 int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n) 1026 { 1027 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1028 1029 PetscFunctionBegin; 1030 if (m) *m = mat->M; 1031 if (n) *n = mat->N; 1032 PetscFunctionReturn(0); 1033 } 1034 1035 #undef __FUNC__ 1036 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ" 1037 int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n) 1038 { 1039 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1040 1041 PetscFunctionBegin; 1042 *m = mat->m; *n = mat->n; 1043 PetscFunctionReturn(0); 1044 } 1045 1046 #undef __FUNC__ 1047 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ" 1048 int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n) 1049 { 1050 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1051 1052 PetscFunctionBegin; 1053 if (m) *m = mat->rstart*mat->bs; 1054 if (n) *n = mat->rend*mat->bs; 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNC__ 1059 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ" 1060 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1061 { 1062 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1063 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1064 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1065 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1066 int *cmap,*idx_p,cstart = mat->cstart; 1067 1068 PetscFunctionBegin; 1069 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1070 mat->getrowactive = PETSC_TRUE; 1071 1072 if (!mat->rowvalues && (idx || v)) { 1073 /* 1074 allocate enough space to hold information from the longest row. 1075 */ 1076 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1077 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1078 int max = 1,mbs = mat->mbs,tmp; 1079 for (i=0; i<mbs; i++) { 1080 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1081 if (max < tmp) { max = tmp; } 1082 } 1083 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1084 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1085 } 1086 1087 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1088 lrow = row - brstart; /* local row index */ 1089 1090 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1091 if (!v) {pvA = 0; pvB = 0;} 1092 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1093 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1094 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1095 nztot = nzA + nzB; 1096 1097 cmap = mat->garray; 1098 if (v || idx) { 1099 if (nztot) { 1100 /* Sort by increasing column numbers, assuming A and B already sorted */ 1101 int imark = -1; 1102 if (v) { 1103 *v = v_p = mat->rowvalues; 1104 for (i=0; i<nzB; i++) { 1105 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1106 else break; 1107 } 1108 imark = i; 1109 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1110 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1111 } 1112 if (idx) { 1113 *idx = idx_p = mat->rowindices; 1114 if (imark > -1) { 1115 for (i=0; i<imark; i++) { 1116 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1117 } 1118 } else { 1119 for (i=0; i<nzB; i++) { 1120 if (cmap[cworkB[i]/bs] < cstart) 1121 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1122 else break; 1123 } 1124 imark = i; 1125 } 1126 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1127 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1128 } 1129 } else { 1130 if (idx) *idx = 0; 1131 if (v) *v = 0; 1132 } 1133 } 1134 *nz = nztot; 1135 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1136 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNC__ 1141 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ" 1142 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1143 { 1144 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1145 1146 PetscFunctionBegin; 1147 if (baij->getrowactive == PETSC_FALSE) { 1148 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1149 } 1150 baij->getrowactive = PETSC_FALSE; 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNC__ 1155 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ" 1156 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1157 { 1158 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1159 1160 PetscFunctionBegin; 1161 *bs = baij->bs; 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNC__ 1166 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ" 1167 int MatZeroEntries_MPISBAIJ(Mat A) 1168 { 1169 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1170 int ierr; 1171 1172 PetscFunctionBegin; 1173 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1174 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNC__ 1179 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ" 1180 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1181 { 1182 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1183 Mat A = a->A,B = a->B; 1184 int ierr; 1185 PetscReal isend[5],irecv[5]; 1186 1187 PetscFunctionBegin; 1188 info->block_size = (double)a->bs; 1189 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1190 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1191 isend[3] = info->memory; isend[4] = info->mallocs; 1192 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1193 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1194 isend[3] += info->memory; isend[4] += info->mallocs; 1195 if (flag == MAT_LOCAL) { 1196 info->nz_used = isend[0]; 1197 info->nz_allocated = isend[1]; 1198 info->nz_unneeded = isend[2]; 1199 info->memory = isend[3]; 1200 info->mallocs = isend[4]; 1201 } else if (flag == MAT_GLOBAL_MAX) { 1202 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1203 info->nz_used = irecv[0]; 1204 info->nz_allocated = irecv[1]; 1205 info->nz_unneeded = irecv[2]; 1206 info->memory = irecv[3]; 1207 info->mallocs = irecv[4]; 1208 } else if (flag == MAT_GLOBAL_SUM) { 1209 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1210 info->nz_used = irecv[0]; 1211 info->nz_allocated = irecv[1]; 1212 info->nz_unneeded = irecv[2]; 1213 info->memory = irecv[3]; 1214 info->mallocs = irecv[4]; 1215 } else { 1216 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1217 } 1218 info->rows_global = (double)a->M; 1219 info->columns_global = (double)a->N; 1220 info->rows_local = (double)a->m; 1221 info->columns_local = (double)a->N; 1222 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1223 info->fill_ratio_needed = 0; 1224 info->factor_mallocs = 0; 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNC__ 1229 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ" 1230 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1231 { 1232 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1233 int ierr; 1234 1235 PetscFunctionBegin; 1236 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1237 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1238 op == MAT_COLUMNS_UNSORTED || 1239 op == MAT_COLUMNS_SORTED || 1240 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1241 op == MAT_KEEP_ZEROED_ROWS || 1242 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1243 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1244 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1245 } else if (op == MAT_ROW_ORIENTED) { 1246 a->roworiented = PETSC_TRUE; 1247 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1248 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1249 } else if (op == MAT_ROWS_SORTED || 1250 op == MAT_ROWS_UNSORTED || 1251 op == MAT_SYMMETRIC || 1252 op == MAT_STRUCTURALLY_SYMMETRIC || 1253 op == MAT_YES_NEW_DIAGONALS || 1254 op == MAT_USE_HASH_TABLE) { 1255 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1256 } else if (op == MAT_COLUMN_ORIENTED) { 1257 a->roworiented = PETSC_FALSE; 1258 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1259 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1260 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1261 a->donotstash = PETSC_TRUE; 1262 } else if (op == MAT_NO_NEW_DIAGONALS) { 1263 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1264 } else if (op == MAT_USE_HASH_TABLE) { 1265 a->ht_flag = PETSC_TRUE; 1266 } else { 1267 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1268 } 1269 PetscFunctionReturn(0); 1270 } 1271 1272 #undef __FUNC__ 1273 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ(" 1274 int MatTranspose_MPISBAIJ(Mat A,Mat *matout) 1275 { 1276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1277 Mat_SeqBAIJ *Aloc; 1278 Mat B; 1279 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1280 int bs=baij->bs,mbs=baij->mbs; 1281 MatScalar *a; 1282 1283 PetscFunctionBegin; 1284 SETERRQ(1,1,"Matrix is symmetric. MatTranspose() should not be called"); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNC__ 1289 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ" 1290 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1291 { 1292 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1293 Mat a = baij->A,b = baij->B; 1294 int ierr,s1,s2,s3; 1295 1296 PetscFunctionBegin; 1297 if (ll != rr) { 1298 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n"); 1299 } 1300 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1301 if (rr) { 1302 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1303 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1304 /* Overlap communication with computation. */ 1305 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1306 /*} if (ll) { */ 1307 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1308 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1309 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1310 /* } */ 1311 /* scale the diagonal block */ 1312 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1313 1314 /* if (rr) { */ 1315 /* Do a scatter end and then right scale the off-diagonal block */ 1316 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1317 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1318 } 1319 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNC__ 1324 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ" 1325 int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag) 1326 { 1327 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1328 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1329 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1330 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1331 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1332 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1333 MPI_Comm comm = A->comm; 1334 MPI_Request *send_waits,*recv_waits; 1335 MPI_Status recv_status,*send_status; 1336 IS istmp; 1337 1338 PetscFunctionBegin; 1339 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1340 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1341 1342 /* first count number of contributors to each processor */ 1343 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1344 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1345 procs = nprocs + size; 1346 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1347 for (i=0; i<N; i++) { 1348 idx = rows[i]; 1349 found = 0; 1350 for (j=0; j<size; j++) { 1351 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1352 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1353 } 1354 } 1355 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1356 } 1357 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1358 1359 /* inform other processors of number of messages and max length*/ 1360 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1361 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1362 nmax = work[rank]; 1363 nrecvs = work[size+rank]; 1364 ierr = PetscFree(work);CHKERRQ(ierr); 1365 1366 /* post receives: */ 1367 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1368 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1369 for (i=0; i<nrecvs; i++) { 1370 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1371 } 1372 1373 /* do sends: 1374 1) starts[i] gives the starting index in svalues for stuff going to 1375 the ith processor 1376 */ 1377 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1378 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1379 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1380 starts[0] = 0; 1381 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1382 for (i=0; i<N; i++) { 1383 svalues[starts[owner[i]]++] = rows[i]; 1384 } 1385 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1386 1387 starts[0] = 0; 1388 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1389 count = 0; 1390 for (i=0; i<size; i++) { 1391 if (procs[i]) { 1392 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1393 } 1394 } 1395 ierr = PetscFree(starts);CHKERRQ(ierr); 1396 1397 base = owners[rank]*bs; 1398 1399 /* wait on receives */ 1400 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1401 source = lens + nrecvs; 1402 count = nrecvs; slen = 0; 1403 while (count) { 1404 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1405 /* unpack receives into our local space */ 1406 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1407 source[imdex] = recv_status.MPI_SOURCE; 1408 lens[imdex] = n; 1409 slen += n; 1410 count--; 1411 } 1412 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1413 1414 /* move the data into the send scatter */ 1415 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1416 count = 0; 1417 for (i=0; i<nrecvs; i++) { 1418 values = rvalues + i*nmax; 1419 for (j=0; j<lens[i]; j++) { 1420 lrows[count++] = values[j] - base; 1421 } 1422 } 1423 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1424 ierr = PetscFree(lens);CHKERRQ(ierr); 1425 ierr = PetscFree(owner);CHKERRQ(ierr); 1426 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1427 1428 /* actually zap the local rows */ 1429 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1430 PLogObjectParent(A,istmp); 1431 1432 /* 1433 Zero the required rows. If the "diagonal block" of the matrix 1434 is square and the user wishes to set the diagonal we use seperate 1435 code so that MatSetValues() is not called for each diagonal allocating 1436 new memory, thus calling lots of mallocs and slowing things down. 1437 1438 Contributed by: Mathew Knepley 1439 */ 1440 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1441 ierr = MatZeroRows_SeqSBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1442 if (diag && (l->A->M == l->A->N)) { 1443 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1444 } else if (diag) { 1445 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1446 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1447 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1448 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1449 } 1450 for (i=0; i<slen; i++) { 1451 row = lrows[i] + rstart_bs; 1452 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1453 } 1454 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1455 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1456 } else { 1457 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1458 } 1459 1460 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1461 ierr = PetscFree(lrows);CHKERRQ(ierr); 1462 1463 /* wait on sends */ 1464 if (nsends) { 1465 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1466 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1467 ierr = PetscFree(send_status);CHKERRQ(ierr); 1468 } 1469 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1470 ierr = PetscFree(svalues);CHKERRQ(ierr); 1471 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNC__ 1476 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ" 1477 int MatPrintHelp_MPISBAIJ(Mat A) 1478 { 1479 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1480 MPI_Comm comm = A->comm; 1481 static int called = 0; 1482 int ierr; 1483 1484 PetscFunctionBegin; 1485 if (!a->rank) { 1486 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1487 } 1488 if (called) {PetscFunctionReturn(0);} else called = 1; 1489 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1490 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNC__ 1495 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ" 1496 int MatSetUnfactored_MPISBAIJ(Mat A) 1497 { 1498 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1499 int ierr; 1500 1501 PetscFunctionBegin; 1502 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1507 1508 #undef __FUNC__ 1509 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ" 1510 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1511 { 1512 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1513 Mat a,b,c,d; 1514 PetscTruth flg; 1515 int ierr; 1516 1517 PetscFunctionBegin; 1518 if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1519 a = matA->A; b = matA->B; 1520 c = matB->A; d = matB->B; 1521 1522 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1523 if (flg == PETSC_TRUE) { 1524 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1525 } 1526 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 1530 /* -------------------------------------------------------------------*/ 1531 static struct _MatOps MatOps_Values = { 1532 MatSetValues_MPISBAIJ, 1533 MatGetRow_MPISBAIJ, 1534 MatRestoreRow_MPISBAIJ, 1535 MatMult_MPISBAIJ, 1536 MatMultAdd_MPISBAIJ, 1537 MatMultTranspose_MPISBAIJ, 1538 MatMultTransposeAdd_MPISBAIJ, 1539 0, 1540 0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 MatTranspose_MPISBAIJ, 1547 MatGetInfo_MPISBAIJ, 1548 MatEqual_MPISBAIJ, 1549 MatGetDiagonal_MPISBAIJ, 1550 MatDiagonalScale_MPISBAIJ, 1551 MatNorm_MPISBAIJ, 1552 MatAssemblyBegin_MPISBAIJ, 1553 MatAssemblyEnd_MPISBAIJ, 1554 0, 1555 MatSetOption_MPISBAIJ, 1556 MatZeroEntries_MPISBAIJ, 1557 MatZeroRows_MPISBAIJ, 1558 0, 1559 0, 1560 0, 1561 0, 1562 MatGetSize_MPISBAIJ, 1563 MatGetLocalSize_MPISBAIJ, 1564 MatGetOwnershipRange_MPISBAIJ, 1565 0, 1566 0, 1567 0, 1568 0, 1569 MatDuplicate_MPISBAIJ, 1570 0, 1571 0, 1572 0, 1573 0, 1574 0, 1575 MatGetSubMatrices_MPISBAIJ, 1576 MatIncreaseOverlap_MPISBAIJ, 1577 MatGetValues_MPISBAIJ, 1578 0, 1579 MatPrintHelp_MPISBAIJ, 1580 MatScale_MPISBAIJ, 1581 0, 1582 0, 1583 0, 1584 MatGetBlockSize_MPISBAIJ, 1585 0, 1586 0, 1587 0, 1588 0, 1589 0, 1590 0, 1591 MatSetUnfactored_MPISBAIJ, 1592 0, 1593 MatSetValuesBlocked_MPISBAIJ, 1594 0, 1595 0, 1596 0, 1597 MatGetMaps_Petsc}; 1598 1599 1600 EXTERN_C_BEGIN 1601 #undef __FUNC__ 1602 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ" 1603 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1604 { 1605 PetscFunctionBegin; 1606 *a = ((Mat_MPISBAIJ *)A->data)->A; 1607 *iscopy = PETSC_FALSE; 1608 PetscFunctionReturn(0); 1609 } 1610 EXTERN_C_END 1611 1612 #undef __FUNC__ 1613 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ" 1614 /*@C 1615 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1616 (block compressed row). For good matrix assembly performance 1617 the user should preallocate the matrix storage by setting the parameters 1618 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1619 performance can be increased by more than a factor of 50. 1620 1621 Collective on MPI_Comm 1622 1623 Input Parameters: 1624 + comm - MPI communicator 1625 . bs - size of blockk 1626 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1627 This value should be the same as the local size used in creating the 1628 y vector for the matrix-vector product y = Ax. 1629 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1630 This value should be the same as the local size used in creating the 1631 x vector for the matrix-vector product y = Ax. 1632 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1633 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1634 . d_nz - number of block nonzeros per block row in diagonal portion of local 1635 submatrix (same for all local rows) 1636 . d_nnz - array containing the number of block nonzeros in the various block rows 1637 of the in diagonal portion of the local (possibly different for each block 1638 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1639 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1640 submatrix (same for all local rows). 1641 - o_nnz - array containing the number of nonzeros in the various block rows of the 1642 off-diagonal portion of the local submatrix (possibly different for 1643 each block row) or PETSC_NULL. 1644 1645 Output Parameter: 1646 . A - the matrix 1647 1648 Options Database Keys: 1649 . -mat_no_unroll - uses code that does not unroll the loops in the 1650 block calculations (much slower) 1651 . -mat_block_size - size of the blocks to use 1652 . -mat_mpi - use the parallel matrix data structures even on one processor 1653 (defaults to using SeqBAIJ format on one processor) 1654 1655 Notes: 1656 The user MUST specify either the local or global matrix dimensions 1657 (possibly both). 1658 1659 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1660 than it must be used on all processors that share the object for that argument. 1661 1662 Storage Information: 1663 For a square global matrix we define each processor's diagonal portion 1664 to be its local rows and the corresponding columns (a square submatrix); 1665 each processor's off-diagonal portion encompasses the remainder of the 1666 local matrix (a rectangular submatrix). 1667 1668 The user can specify preallocated storage for the diagonal part of 1669 the local submatrix with either d_nz or d_nnz (not both). Set 1670 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1671 memory allocation. Likewise, specify preallocated storage for the 1672 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1673 1674 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1675 the figure below we depict these three local rows and all columns (0-11). 1676 1677 .vb 1678 0 1 2 3 4 5 6 7 8 9 10 11 1679 ------------------- 1680 row 3 | o o o d d d o o o o o o 1681 row 4 | o o o d d d o o o o o o 1682 row 5 | o o o d d d o o o o o o 1683 ------------------- 1684 .ve 1685 1686 Thus, any entries in the d locations are stored in the d (diagonal) 1687 submatrix, and any entries in the o locations are stored in the 1688 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1689 stored simply in the MATSEQBAIJ format for compressed row storage. 1690 1691 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1692 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1693 In general, for PDE problems in which most nonzeros are near the diagonal, 1694 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1695 or you will get TERRIBLE performance; see the users' manual chapter on 1696 matrices. 1697 1698 Level: intermediate 1699 1700 .keywords: matrix, block, aij, compressed row, sparse, parallel 1701 1702 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1703 @*/ 1704 1705 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1706 { 1707 Mat B; 1708 Mat_MPISBAIJ *b; 1709 int ierr,i,sum[1],work[1],mbs,Mbs=PETSC_DECIDE,size; 1710 PetscTruth flag1,flag2,flg; 1711 1712 PetscFunctionBegin; 1713 if (M != N || m != n){ /* N and n are not used after this */ 1714 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, set M=N and m=n"); 1715 } 1716 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1717 1718 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1719 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 1720 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 1721 if (d_nnz) { 1722 for (i=0; i<m/bs; i++) { 1723 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1724 } 1725 } 1726 if (o_nnz) { 1727 for (i=0; i<m/bs; i++) { 1728 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1729 } 1730 } 1731 1732 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1733 ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr); 1734 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1735 if (!flag1 && !flag2 && size == 1) { 1736 if (M == PETSC_DECIDE) M = m; 1737 ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr); 1738 PetscFunctionReturn(0); 1739 } 1740 1741 *A = 0; 1742 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView); 1743 PLogObjectCreate(B); 1744 B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b); 1745 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1746 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1747 1748 B->ops->destroy = MatDestroy_MPISBAIJ; 1749 B->ops->view = MatView_MPISBAIJ; 1750 B->mapping = 0; 1751 B->factor = 0; 1752 B->assembled = PETSC_FALSE; 1753 1754 B->insertmode = NOT_SET_VALUES; 1755 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1756 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 1757 1758 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 1759 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1760 } 1761 if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 1762 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1763 } 1764 if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1765 1766 if (M == PETSC_DECIDE) { 1767 work[0] = m; mbs = m/bs; 1768 ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1769 M = sum[0]; Mbs = M/bs; 1770 } else { /* M is specified */ 1771 Mbs = M/bs; 1772 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1773 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1774 m = mbs*bs; 1775 } 1776 1777 if (mbs*bs != m) { 1778 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows/cols must be divisible by blocksize"); 1779 } 1780 1781 b->m = m; B->m = m; 1782 b->n = m; B->n = m; 1783 b->N = M; B->N = M; 1784 b->M = M; 1785 B->M = M; 1786 b->bs = bs; 1787 b->bs2 = bs*bs; 1788 b->mbs = mbs; 1789 b->nbs = mbs; 1790 b->Mbs = Mbs; 1791 b->Nbs = Mbs; 1792 1793 /* the information in the maps duplicates the information computed below, eventually 1794 we should remove the duplicate information that is not contained in the maps */ 1795 ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr); 1796 ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr); 1797 1798 /* build local table of row and column ownerships */ 1799 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1800 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1801 b->cowners = b->rowners + b->size + 2; 1802 b->rowners_bs = b->cowners + b->size + 2; 1803 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1804 b->rowners[0] = 0; 1805 for (i=2; i<=b->size; i++) { 1806 b->rowners[i] += b->rowners[i-1]; 1807 } 1808 for (i=0; i<=b->size; i++) { 1809 b->rowners_bs[i] = b->rowners[i]*bs; 1810 } 1811 b->rstart = b->rowners[b->rank]; 1812 b->rend = b->rowners[b->rank+1]; 1813 b->rstart_bs = b->rstart * bs; 1814 b->rend_bs = b->rend * bs; 1815 1816 b->cstart = b->rstart; 1817 b->cend = b->rend; 1818 b->cstart_bs = b->cstart * bs; 1819 b->cend_bs = b->cend * bs; 1820 1821 1822 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1823 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1824 PLogObjectParent(B,b->A); 1825 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1826 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1827 PLogObjectParent(B,b->B); 1828 1829 /* build cache for off array entries formed */ 1830 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1831 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1832 b->donotstash = PETSC_FALSE; 1833 b->colmap = PETSC_NULL; 1834 b->garray = PETSC_NULL; 1835 b->roworiented = PETSC_TRUE; 1836 1837 #if defined(PEYSC_USE_MAT_SINGLE) 1838 /* stuff for MatSetValues_XXX in single precision */ 1839 b->lensetvalues = 0; 1840 b->setvaluescopy = PETSC_NULL; 1841 #endif 1842 1843 /* stuff used in block assembly */ 1844 b->barray = 0; 1845 1846 /* stuff used for matrix vector multiply */ 1847 b->lvec = 0; 1848 b->Mvctx = 0; 1849 1850 /* stuff for MatGetRow() */ 1851 b->rowindices = 0; 1852 b->rowvalues = 0; 1853 b->getrowactive = PETSC_FALSE; 1854 1855 /* hash table stuff */ 1856 b->ht = 0; 1857 b->hd = 0; 1858 b->ht_size = 0; 1859 b->ht_flag = PETSC_FALSE; 1860 b->ht_fact = 0; 1861 b->ht_total_ct = 0; 1862 b->ht_insert_ct = 0; 1863 1864 *A = B; 1865 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1866 if (flg) { 1867 double fact = 1.39; 1868 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1869 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1870 if (fact <= 1.0) fact = 1.39; 1871 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1872 PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1873 } 1874 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1875 "MatStoreValues_MPISBAIJ", 1876 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1877 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1878 "MatRetrieveValues_MPISBAIJ", 1879 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1880 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1881 "MatGetDiagonalBlock_MPISBAIJ", 1882 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1883 PetscFunctionReturn(0); 1884 } 1885 1886 1887 #undef __FUNC__ 1888 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ" 1889 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1890 { 1891 Mat mat; 1892 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1893 int ierr,len=0; 1894 PetscTruth flg; 1895 1896 PetscFunctionBegin; 1897 *newmat = 0; 1898 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView); 1899 PLogObjectCreate(mat); 1900 mat->data = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a); 1901 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1902 mat->ops->destroy = MatDestroy_MPISBAIJ; 1903 mat->ops->view = MatView_MPISBAIJ; 1904 mat->factor = matin->factor; 1905 mat->assembled = PETSC_TRUE; 1906 mat->insertmode = NOT_SET_VALUES; 1907 1908 a->m = mat->m = oldmat->m; 1909 a->n = mat->n = oldmat->n; 1910 a->M = mat->M = oldmat->M; 1911 a->N = mat->N = oldmat->N; 1912 1913 a->bs = oldmat->bs; 1914 a->bs2 = oldmat->bs2; 1915 a->mbs = oldmat->mbs; 1916 a->nbs = oldmat->nbs; 1917 a->Mbs = oldmat->Mbs; 1918 a->Nbs = oldmat->Nbs; 1919 1920 a->rstart = oldmat->rstart; 1921 a->rend = oldmat->rend; 1922 a->cstart = oldmat->cstart; 1923 a->cend = oldmat->cend; 1924 a->size = oldmat->size; 1925 a->rank = oldmat->rank; 1926 a->donotstash = oldmat->donotstash; 1927 a->roworiented = oldmat->roworiented; 1928 a->rowindices = 0; 1929 a->rowvalues = 0; 1930 a->getrowactive = PETSC_FALSE; 1931 a->barray = 0; 1932 a->rstart_bs = oldmat->rstart_bs; 1933 a->rend_bs = oldmat->rend_bs; 1934 a->cstart_bs = oldmat->cstart_bs; 1935 a->cend_bs = oldmat->cend_bs; 1936 1937 /* hash table stuff */ 1938 a->ht = 0; 1939 a->hd = 0; 1940 a->ht_size = 0; 1941 a->ht_flag = oldmat->ht_flag; 1942 a->ht_fact = oldmat->ht_fact; 1943 a->ht_total_ct = 0; 1944 a->ht_insert_ct = 0; 1945 1946 1947 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1948 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1949 a->cowners = a->rowners + a->size + 2; 1950 a->rowners_bs = a->cowners + a->size + 2; 1951 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1952 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1953 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 1954 if (oldmat->colmap) { 1955 #if defined (PETSC_USE_CTABLE) 1956 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1957 #else 1958 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 1959 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1960 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 1961 #endif 1962 } else a->colmap = 0; 1963 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 1964 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 1965 PLogObjectMemory(mat,len*sizeof(int)); 1966 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 1967 } else a->garray = 0; 1968 1969 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1970 PLogObjectParent(mat,a->lvec); 1971 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1972 1973 PLogObjectParent(mat,a->Mvctx); 1974 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1975 PLogObjectParent(mat,a->A); 1976 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1977 PLogObjectParent(mat,a->B); 1978 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1979 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 1980 if (flg) { 1981 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1982 } 1983 *newmat = mat; 1984 PetscFunctionReturn(0); 1985 } 1986 1987 #include "petscsys.h" 1988 1989 #undef __FUNC__ 1990 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ" 1991 int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat) 1992 { 1993 Mat A; 1994 int i,nz,ierr,j,rstart,rend,fd; 1995 Scalar *vals,*buf; 1996 MPI_Comm comm = ((PetscObject)viewer)->comm; 1997 MPI_Status status; 1998 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 1999 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2000 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2001 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2002 int dcount,kmax,k,nzcount,tmp; 2003 2004 PetscFunctionBegin; 2005 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2006 2007 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2008 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2009 if (!rank) { 2010 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2011 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2012 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2013 if (header[3] < 0) { 2014 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPISBAIJ"); 2015 } 2016 } 2017 2018 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2019 M = header[1]; N = header[2]; 2020 2021 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2022 2023 /* 2024 This code adds extra rows to make sure the number of rows is 2025 divisible by the blocksize 2026 */ 2027 Mbs = M/bs; 2028 extra_rows = bs - M + bs*(Mbs); 2029 if (extra_rows == bs) extra_rows = 0; 2030 else Mbs++; 2031 if (extra_rows &&!rank) { 2032 PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2033 } 2034 2035 /* determine ownership of all rows */ 2036 mbs = Mbs/size + ((Mbs % size) > rank); 2037 m = mbs * bs; 2038 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2039 browners = rowners + size + 1; 2040 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2041 rowners[0] = 0; 2042 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2043 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2044 rstart = rowners[rank]; 2045 rend = rowners[rank+1]; 2046 2047 /* distribute row lengths to all processors */ 2048 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 2049 if (!rank) { 2050 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2051 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2052 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2053 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2054 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2055 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2056 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2057 } else { 2058 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2059 } 2060 2061 if (!rank) { /* procs[0] */ 2062 /* calculate the number of nonzeros on each processor */ 2063 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2064 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2065 for (i=0; i<size; i++) { 2066 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2067 procsnz[i] += rowlengths[j]; 2068 } 2069 } 2070 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2071 2072 /* determine max buffer needed and allocate it */ 2073 maxnz = 0; 2074 for (i=0; i<size; i++) { 2075 maxnz = PetscMax(maxnz,procsnz[i]); 2076 } 2077 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2078 2079 /* read in my part of the matrix column indices */ 2080 nz = procsnz[0]; 2081 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2082 mycols = ibuf; 2083 if (size == 1) nz -= extra_rows; 2084 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2085 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2086 2087 /* read in every ones (except the last) and ship off */ 2088 for (i=1; i<size-1; i++) { 2089 nz = procsnz[i]; 2090 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2091 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2092 } 2093 /* read in the stuff for the last proc */ 2094 if (size != 1) { 2095 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2096 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2097 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2098 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2099 } 2100 ierr = PetscFree(cols);CHKERRQ(ierr); 2101 } else { /* procs[i], i>0 */ 2102 /* determine buffer space needed for message */ 2103 nz = 0; 2104 for (i=0; i<m; i++) { 2105 nz += locrowlens[i]; 2106 } 2107 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2108 mycols = ibuf; 2109 /* receive message of column indices*/ 2110 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2111 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2112 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2113 } 2114 2115 /* loop over local rows, determining number of off diagonal entries */ 2116 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2117 odlens = dlens + (rend-rstart); 2118 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2119 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2120 masked1 = mask + Mbs; 2121 masked2 = masked1 + Mbs; 2122 rowcount = 0; nzcount = 0; 2123 for (i=0; i<mbs; i++) { 2124 dcount = 0; 2125 odcount = 0; 2126 for (j=0; j<bs; j++) { 2127 kmax = locrowlens[rowcount]; 2128 for (k=0; k<kmax; k++) { 2129 tmp = mycols[nzcount++]/bs; /* block col. index */ 2130 if (!mask[tmp]) { 2131 mask[tmp] = 1; 2132 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2133 else masked1[dcount++] = tmp; /* entry in diag portion */ 2134 } 2135 } 2136 rowcount++; 2137 } 2138 2139 dlens[i] = dcount; /* d_nzz[i] */ 2140 odlens[i] = odcount; /* o_nzz[i] */ 2141 2142 /* zero out the mask elements we set */ 2143 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2144 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2145 } 2146 2147 /* create our matrix */ 2148 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2149 CHKERRQ(ierr); 2150 A = *newmat; 2151 MatSetOption(A,MAT_COLUMNS_SORTED); 2152 2153 if (!rank) { 2154 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2155 /* read in my part of the matrix numerical values */ 2156 nz = procsnz[0]; 2157 vals = buf; 2158 mycols = ibuf; 2159 if (size == 1) nz -= extra_rows; 2160 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2161 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2162 2163 /* insert into matrix */ 2164 jj = rstart*bs; 2165 for (i=0; i<m; i++) { 2166 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2167 mycols += locrowlens[i]; 2168 vals += locrowlens[i]; 2169 jj++; 2170 } 2171 2172 /* read in other processors (except the last one) and ship out */ 2173 for (i=1; i<size-1; i++) { 2174 nz = procsnz[i]; 2175 vals = buf; 2176 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2177 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2178 } 2179 /* the last proc */ 2180 if (size != 1){ 2181 nz = procsnz[i] - extra_rows; 2182 vals = buf; 2183 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2184 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2185 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2186 } 2187 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2188 2189 } else { 2190 /* receive numeric values */ 2191 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2192 2193 /* receive message of values*/ 2194 vals = buf; 2195 mycols = ibuf; 2196 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2197 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2198 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2199 2200 /* insert into matrix */ 2201 jj = rstart*bs; 2202 for (i=0; i<m; i++) { 2203 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2204 mycols += locrowlens[i]; 2205 vals += locrowlens[i]; 2206 jj++; 2207 } 2208 } 2209 2210 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2211 ierr = PetscFree(buf);CHKERRQ(ierr); 2212 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2213 ierr = PetscFree(rowners);CHKERRQ(ierr); 2214 ierr = PetscFree(dlens);CHKERRQ(ierr); 2215 ierr = PetscFree(mask);CHKERRQ(ierr); 2216 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2217 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 #undef __FUNC__ 2222 #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor" 2223 /*@ 2224 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2225 2226 Input Parameters: 2227 . mat - the matrix 2228 . fact - factor 2229 2230 Collective on Mat 2231 2232 Level: advanced 2233 2234 Notes: 2235 This can also be set by the command line option: -mat_use_hash_table fact 2236 2237 .keywords: matrix, hashtable, factor, HT 2238 2239 .seealso: MatSetOption() 2240 @*/ 2241 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2242 { 2243 Mat_MPIBAIJ *baij; 2244 2245 PetscFunctionBegin; 2246 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 2247 PetscFunctionReturn(0); 2248 } 2249