1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.165 1999/03/19 21:19:45 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 EXTERN_C_BEGIN 20 #undef __FUNC__ 21 #define __FUNC__ "MatStoreValues_MPIBAIJ" 22 int MatStoreValues_MPIBAIJ(Mat mat) 23 { 24 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 25 int ierr; 26 27 PetscFunctionBegin; 28 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 29 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 EXTERN_C_END 33 34 EXTERN_C_BEGIN 35 #undef __FUNC__ 36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 37 int MatRetrieveValues_MPIBAIJ(Mat mat) 38 { 39 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 40 int ierr; 41 42 PetscFunctionBegin; 43 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 44 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 45 PetscFunctionReturn(0); 46 } 47 EXTERN_C_END 48 49 /* 50 Local utility routine that creates a mapping from the global column 51 number to the local number in the off-diagonal part of the local 52 storage of the matrix. This is done in a non scable way since the 53 length of colmap equals the global matrix length. 54 */ 55 #undef __FUNC__ 56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 57 static int CreateColmap_MPIBAIJ_Private(Mat mat) 58 { 59 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 60 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 61 int nbs = B->nbs,i,bs=B->bs; 62 #if defined (USE_CTABLE) 63 int ierr; 64 #endif 65 66 PetscFunctionBegin; 67 #if defined (USE_CTABLE) 68 ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr); 69 for ( i=0; i<nbs; i++ ){ 70 ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 71 } 72 #else 73 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 74 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 75 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 76 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 77 #endif 78 PetscFunctionReturn(0); 79 } 80 81 #define CHUNKSIZE 10 82 83 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 84 { \ 85 \ 86 brow = row/bs; \ 87 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 88 rmax = aimax[brow]; nrow = ailen[brow]; \ 89 bcol = col/bs; \ 90 ridx = row % bs; cidx = col % bs; \ 91 low = 0; high = nrow; \ 92 while (high-low > 3) { \ 93 t = (low+high)/2; \ 94 if (rp[t] > bcol) high = t; \ 95 else low = t; \ 96 } \ 97 for ( _i=low; _i<high; _i++ ) { \ 98 if (rp[_i] > bcol) break; \ 99 if (rp[_i] == bcol) { \ 100 bap = ap + bs2*_i + bs*cidx + ridx; \ 101 if (addv == ADD_VALUES) *bap += value; \ 102 else *bap = value; \ 103 goto a_noinsert; \ 104 } \ 105 } \ 106 if (a->nonew == 1) goto a_noinsert; \ 107 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 108 if (nrow >= rmax) { \ 109 /* there is no extra room in row, therefore enlarge */ \ 110 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 111 Scalar *new_a; \ 112 \ 113 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 114 \ 115 /* malloc new storage space */ \ 116 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 117 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 118 new_j = (int *) (new_a + bs2*new_nz); \ 119 new_i = new_j + new_nz; \ 120 \ 121 /* copy over old data into new slots */ \ 122 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 123 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 124 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 125 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 126 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 127 len*sizeof(int)); \ 128 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 129 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 130 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 131 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 132 /* free up old matrix storage */ \ 133 PetscFree(a->a); \ 134 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 135 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 136 a->singlemalloc = 1; \ 137 \ 138 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 139 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 140 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 141 a->maxnz += bs2*CHUNKSIZE; \ 142 a->reallocs++; \ 143 a->nz++; \ 144 } \ 145 N = nrow++ - 1; \ 146 /* shift up all the later entries in this row */ \ 147 for ( ii=N; ii>=_i; ii-- ) { \ 148 rp[ii+1] = rp[ii]; \ 149 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 150 } \ 151 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 152 rp[_i] = bcol; \ 153 ap[bs2*_i + bs*cidx + ridx] = value; \ 154 a_noinsert:; \ 155 ailen[brow] = nrow; \ 156 } 157 158 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 159 { \ 160 \ 161 brow = row/bs; \ 162 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 163 rmax = bimax[brow]; nrow = bilen[brow]; \ 164 bcol = col/bs; \ 165 ridx = row % bs; cidx = col % bs; \ 166 low = 0; high = nrow; \ 167 while (high-low > 3) { \ 168 t = (low+high)/2; \ 169 if (rp[t] > bcol) high = t; \ 170 else low = t; \ 171 } \ 172 for ( _i=low; _i<high; _i++ ) { \ 173 if (rp[_i] > bcol) break; \ 174 if (rp[_i] == bcol) { \ 175 bap = ap + bs2*_i + bs*cidx + ridx; \ 176 if (addv == ADD_VALUES) *bap += value; \ 177 else *bap = value; \ 178 goto b_noinsert; \ 179 } \ 180 } \ 181 if (b->nonew == 1) goto b_noinsert; \ 182 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 183 if (nrow >= rmax) { \ 184 /* there is no extra room in row, therefore enlarge */ \ 185 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 186 Scalar *new_a; \ 187 \ 188 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 189 \ 190 /* malloc new storage space */ \ 191 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 192 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 193 new_j = (int *) (new_a + bs2*new_nz); \ 194 new_i = new_j + new_nz; \ 195 \ 196 /* copy over old data into new slots */ \ 197 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 198 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 199 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 200 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 201 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 202 len*sizeof(int)); \ 203 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 204 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 205 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 206 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 207 /* free up old matrix storage */ \ 208 PetscFree(b->a); \ 209 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 210 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 211 b->singlemalloc = 1; \ 212 \ 213 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 214 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 215 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 216 b->maxnz += bs2*CHUNKSIZE; \ 217 b->reallocs++; \ 218 b->nz++; \ 219 } \ 220 N = nrow++ - 1; \ 221 /* shift up all the later entries in this row */ \ 222 for ( ii=N; ii>=_i; ii-- ) { \ 223 rp[ii+1] = rp[ii]; \ 224 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 225 } \ 226 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 227 rp[_i] = bcol; \ 228 ap[bs2*_i + bs*cidx + ridx] = value; \ 229 b_noinsert:; \ 230 bilen[brow] = nrow; \ 231 } 232 233 #undef __FUNC__ 234 #define __FUNC__ "MatSetValues_MPIBAIJ" 235 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 236 { 237 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 238 Scalar value; 239 int ierr,i,j,row,col; 240 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 241 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 242 int cend_orig=baij->cend_bs,bs=baij->bs; 243 244 /* Some Variables required in the macro */ 245 Mat A = baij->A; 246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 247 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 248 Scalar *aa=a->a; 249 250 Mat B = baij->B; 251 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 252 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 253 Scalar *ba=b->a; 254 255 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 256 int low,high,t,ridx,cidx,bs2=a->bs2; 257 Scalar *ap,*bap; 258 259 PetscFunctionBegin; 260 for ( i=0; i<m; i++ ) { 261 if (im[i] < 0) continue; 262 #if defined(USE_PETSC_BOPT_g) 263 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 264 #endif 265 if (im[i] >= rstart_orig && im[i] < rend_orig) { 266 row = im[i] - rstart_orig; 267 for ( j=0; j<n; j++ ) { 268 if (in[j] >= cstart_orig && in[j] < cend_orig){ 269 col = in[j] - cstart_orig; 270 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 271 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 272 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 273 } else if (in[j] < 0) continue; 274 #if defined(USE_PETSC_BOPT_g) 275 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 276 #endif 277 else { 278 if (mat->was_assembled) { 279 if (!baij->colmap) { 280 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 281 } 282 #if defined (USE_CTABLE) 283 ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr); 284 col = col - 1 + in[j]%bs; 285 #else 286 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 287 #endif 288 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 289 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 290 col = in[j]; 291 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 292 B = baij->B; 293 b = (Mat_SeqBAIJ *) (B)->data; 294 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 295 ba=b->a; 296 } 297 } else col = in[j]; 298 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 299 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 300 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 301 } 302 } 303 } else { 304 if ( !baij->donotstash) { 305 if (roworiented) { 306 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 307 } else { 308 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 309 } 310 } 311 } 312 } 313 PetscFunctionReturn(0); 314 } 315 316 #undef __FUNC__ 317 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 318 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 319 { 320 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 321 Scalar *value,*barray=baij->barray; 322 int ierr,i,j,ii,jj,row,col; 323 int roworiented = baij->roworiented,rstart=baij->rstart ; 324 int rend=baij->rend,cstart=baij->cstart,stepval; 325 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 326 327 if(!barray) { 328 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 329 } 330 331 if (roworiented) { 332 stepval = (n-1)*bs; 333 } else { 334 stepval = (m-1)*bs; 335 } 336 for ( i=0; i<m; i++ ) { 337 if (im[i] < 0) continue; 338 #if defined(USE_PETSC_BOPT_g) 339 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 340 #endif 341 if (im[i] >= rstart && im[i] < rend) { 342 row = im[i] - rstart; 343 for ( j=0; j<n; j++ ) { 344 /* If NumCol = 1 then a copy is not required */ 345 if ((roworiented) && (n == 1)) { 346 barray = v + i*bs2; 347 } else if((!roworiented) && (m == 1)) { 348 barray = v + j*bs2; 349 } else { /* Here a copy is required */ 350 if (roworiented) { 351 value = v + i*(stepval+bs)*bs + j*bs; 352 } else { 353 value = v + j*(stepval+bs)*bs + i*bs; 354 } 355 for ( ii=0; ii<bs; ii++,value+=stepval ) { 356 for (jj=0; jj<bs; jj++ ) { 357 *barray++ = *value++; 358 } 359 } 360 barray -=bs2; 361 } 362 363 if (in[j] >= cstart && in[j] < cend){ 364 col = in[j] - cstart; 365 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 366 } 367 else if (in[j] < 0) continue; 368 #if defined(USE_PETSC_BOPT_g) 369 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 370 #endif 371 else { 372 if (mat->was_assembled) { 373 if (!baij->colmap) { 374 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 375 } 376 377 #if defined(USE_PETSC_BOPT_g) 378 #if defined (USE_CTABLE) 379 { int data; 380 ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr); 381 if((data - 1) % bs) 382 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 383 } 384 #else 385 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 386 #endif 387 #endif 388 #if defined (USE_CTABLE) 389 ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr); 390 col = (col - 1)/bs; 391 #else 392 col = (baij->colmap[in[j]] - 1)/bs; 393 #endif 394 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 395 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 396 col = in[j]; 397 } 398 } 399 else col = in[j]; 400 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 401 } 402 } 403 } else { 404 if (!baij->donotstash) { 405 if (roworiented) { 406 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 407 } else { 408 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 409 } 410 } 411 } 412 } 413 PetscFunctionReturn(0); 414 } 415 #define HASH_KEY 0.6180339887 416 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 417 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 418 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 419 #undef __FUNC__ 420 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 421 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 422 { 423 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 424 int ierr,i,j,row,col; 425 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 426 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 427 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 428 double tmp; 429 Scalar ** HD = baij->hd,value; 430 #if defined(USE_PETSC_BOPT_g) 431 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 432 #endif 433 434 PetscFunctionBegin; 435 436 for ( i=0; i<m; i++ ) { 437 #if defined(USE_PETSC_BOPT_g) 438 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 439 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 440 #endif 441 row = im[i]; 442 if (row >= rstart_orig && row < rend_orig) { 443 for ( j=0; j<n; j++ ) { 444 col = in[j]; 445 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 446 /* Look up into the Hash Table */ 447 key = (row/bs)*Nbs+(col/bs)+1; 448 h1 = HASH(size,key,tmp); 449 450 451 idx = h1; 452 #if defined(USE_PETSC_BOPT_g) 453 insert_ct++; 454 total_ct++; 455 if (HT[idx] != key) { 456 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 457 if (idx == size) { 458 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 459 if (idx == h1) { 460 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 461 } 462 } 463 } 464 #else 465 if (HT[idx] != key) { 466 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 467 if (idx == size) { 468 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 469 if (idx == h1) { 470 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 471 } 472 } 473 } 474 #endif 475 /* A HASH table entry is found, so insert the values at the correct address */ 476 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 477 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 478 } 479 } else { 480 if (!baij->donotstash) { 481 if (roworiented) { 482 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 483 } else { 484 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 485 } 486 } 487 } 488 } 489 #if defined(USE_PETSC_BOPT_g) 490 baij->ht_total_ct = total_ct; 491 baij->ht_insert_ct = insert_ct; 492 #endif 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNC__ 497 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 498 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 499 { 500 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 501 int ierr,i,j,ii,jj,row,col; 502 int roworiented = baij->roworiented,rstart=baij->rstart ; 503 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 504 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 505 double tmp; 506 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 507 #if defined(USE_PETSC_BOPT_g) 508 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 509 #endif 510 511 PetscFunctionBegin; 512 513 if (roworiented) { 514 stepval = (n-1)*bs; 515 } else { 516 stepval = (m-1)*bs; 517 } 518 for ( i=0; i<m; i++ ) { 519 #if defined(USE_PETSC_BOPT_g) 520 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 521 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 522 #endif 523 row = im[i]; 524 v_t = v + i*bs2; 525 if (row >= rstart && row < rend) { 526 for ( j=0; j<n; j++ ) { 527 col = in[j]; 528 529 /* Look up into the Hash Table */ 530 key = row*Nbs+col+1; 531 h1 = HASH(size,key,tmp); 532 533 idx = h1; 534 #if defined(USE_PETSC_BOPT_g) 535 total_ct++; 536 insert_ct++; 537 if (HT[idx] != key) { 538 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 539 if (idx == size) { 540 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 541 if (idx == h1) { 542 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 543 } 544 } 545 } 546 #else 547 if (HT[idx] != key) { 548 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 549 if (idx == size) { 550 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 551 if (idx == h1) { 552 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 553 } 554 } 555 } 556 #endif 557 baij_a = HD[idx]; 558 if (roworiented) { 559 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 560 /* value = v + (i*(stepval+bs)+j)*bs; */ 561 value = v_t; 562 v_t += bs; 563 if (addv == ADD_VALUES) { 564 for ( ii=0; ii<bs; ii++,value+=stepval) { 565 for ( jj=ii; jj<bs2; jj+=bs ) { 566 baij_a[jj] += *value++; 567 } 568 } 569 } else { 570 for ( ii=0; ii<bs; ii++,value+=stepval) { 571 for ( jj=ii; jj<bs2; jj+=bs ) { 572 baij_a[jj] = *value++; 573 } 574 } 575 } 576 } else { 577 value = v + j*(stepval+bs)*bs + i*bs; 578 if (addv == ADD_VALUES) { 579 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 580 for ( jj=0; jj<bs; jj++ ) { 581 baij_a[jj] += *value++; 582 } 583 } 584 } else { 585 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 586 for ( jj=0; jj<bs; jj++ ) { 587 baij_a[jj] = *value++; 588 } 589 } 590 } 591 } 592 } 593 } else { 594 if (!baij->donotstash) { 595 if (roworiented) { 596 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 597 } else { 598 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 599 } 600 } 601 } 602 } 603 #if defined(USE_PETSC_BOPT_g) 604 baij->ht_total_ct = total_ct; 605 baij->ht_insert_ct = insert_ct; 606 #endif 607 PetscFunctionReturn(0); 608 } 609 610 #undef __FUNC__ 611 #define __FUNC__ "MatGetValues_MPIBAIJ" 612 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 613 { 614 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 615 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 616 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 617 618 PetscFunctionBegin; 619 for ( i=0; i<m; i++ ) { 620 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 621 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 622 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 623 row = idxm[i] - bsrstart; 624 for ( j=0; j<n; j++ ) { 625 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 626 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 627 if (idxn[j] >= bscstart && idxn[j] < bscend){ 628 col = idxn[j] - bscstart; 629 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 630 } else { 631 if (!baij->colmap) { 632 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 633 } 634 #if defined (USE_CTABLE) 635 ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr); 636 data --; 637 #else 638 data = baij->colmap[idxn[j]/bs]-1; 639 #endif 640 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 641 else { 642 col = data + idxn[j]%bs; 643 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 644 } 645 } 646 } 647 } else { 648 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 649 } 650 } 651 PetscFunctionReturn(0); 652 } 653 654 #undef __FUNC__ 655 #define __FUNC__ "MatNorm_MPIBAIJ" 656 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 657 { 658 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 659 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 660 int ierr, i,bs2=baij->bs2; 661 double sum = 0.0; 662 Scalar *v; 663 664 PetscFunctionBegin; 665 if (baij->size == 1) { 666 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 667 } else { 668 if (type == NORM_FROBENIUS) { 669 v = amat->a; 670 for (i=0; i<amat->nz*bs2; i++ ) { 671 #if defined(USE_PETSC_COMPLEX) 672 sum += PetscReal(PetscConj(*v)*(*v)); v++; 673 #else 674 sum += (*v)*(*v); v++; 675 #endif 676 } 677 v = bmat->a; 678 for (i=0; i<bmat->nz*bs2; i++ ) { 679 #if defined(USE_PETSC_COMPLEX) 680 sum += PetscReal(PetscConj(*v)*(*v)); v++; 681 #else 682 sum += (*v)*(*v); v++; 683 #endif 684 } 685 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 686 *norm = sqrt(*norm); 687 } else { 688 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 689 } 690 } 691 PetscFunctionReturn(0); 692 } 693 694 695 /* 696 Creates the hash table, and sets the table 697 This table is created only once. 698 If new entried need to be added to the matrix 699 then the hash table has to be destroyed and 700 recreated. 701 */ 702 #undef __FUNC__ 703 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 704 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 705 { 706 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 707 Mat A = baij->A, B=baij->B; 708 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 709 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 710 int size,bs2=baij->bs2,rstart=baij->rstart; 711 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 712 int *HT,key; 713 Scalar **HD; 714 double tmp; 715 #if defined(USE_PETSC_BOPT_g) 716 int ct=0,max=0; 717 #endif 718 719 PetscFunctionBegin; 720 baij->ht_size=(int)(factor*nz); 721 size = baij->ht_size; 722 723 if (baij->ht) { 724 PetscFunctionReturn(0); 725 } 726 727 /* Allocate Memory for Hash Table */ 728 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 729 baij->ht = (int*)(baij->hd + size); 730 HD = baij->hd; 731 HT = baij->ht; 732 733 734 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 735 736 737 /* Loop Over A */ 738 for ( i=0; i<a->mbs; i++ ) { 739 for ( j=ai[i]; j<ai[i+1]; j++ ) { 740 row = i+rstart; 741 col = aj[j]+cstart; 742 743 key = row*Nbs + col + 1; 744 h1 = HASH(size,key,tmp); 745 for ( k=0; k<size; k++ ){ 746 if (HT[(h1+k)%size] == 0.0) { 747 HT[(h1+k)%size] = key; 748 HD[(h1+k)%size] = a->a + j*bs2; 749 break; 750 #if defined(USE_PETSC_BOPT_g) 751 } else { 752 ct++; 753 #endif 754 } 755 } 756 #if defined(USE_PETSC_BOPT_g) 757 if (k> max) max = k; 758 #endif 759 } 760 } 761 /* Loop Over B */ 762 for ( i=0; i<b->mbs; i++ ) { 763 for ( j=bi[i]; j<bi[i+1]; j++ ) { 764 row = i+rstart; 765 col = garray[bj[j]]; 766 key = row*Nbs + col + 1; 767 h1 = HASH(size,key,tmp); 768 for ( k=0; k<size; k++ ){ 769 if (HT[(h1+k)%size] == 0.0) { 770 HT[(h1+k)%size] = key; 771 HD[(h1+k)%size] = b->a + j*bs2; 772 break; 773 #if defined(USE_PETSC_BOPT_g) 774 } else { 775 ct++; 776 #endif 777 } 778 } 779 #if defined(USE_PETSC_BOPT_g) 780 if (k> max) max = k; 781 #endif 782 } 783 } 784 785 /* Print Summary */ 786 #if defined(USE_PETSC_BOPT_g) 787 for ( i=0,j=0; i<size; i++) 788 if (HT[i]) {j++;} 789 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 790 (j== 0)? 0.0:((double)(ct+j))/j,max); 791 #endif 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNC__ 796 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 797 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 800 int ierr,nstash,reallocs; 801 InsertMode addv; 802 803 PetscFunctionBegin; 804 if (baij->donotstash) { 805 PetscFunctionReturn(0); 806 } 807 808 /* make sure all processors are either in INSERTMODE or ADDMODE */ 809 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 810 if (addv == (ADD_VALUES|INSERT_VALUES)) { 811 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 812 } 813 mat->insertmode = addv; /* in case this processor had no cache */ 814 815 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs); CHKERRQ(ierr); 816 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners); CHKERRQ(ierr); 817 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr); 818 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 819 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr); 820 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNC__ 825 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 826 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 827 { 828 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 829 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 830 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 831 int *row,*col,other_disassembled,r1,r2,r3; 832 Scalar *val; 833 InsertMode addv = mat->insertmode; 834 835 PetscFunctionBegin; 836 if (!baij->donotstash) { 837 while (1) { 838 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 839 if (!flg) break; 840 841 for ( i=0; i<n; ) { 842 /* Now identify the consecutive vals belonging to the same row */ 843 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 844 if (j < n) ncols = j-i; 845 else ncols = n-i; 846 /* Now assemble all these values with a single function call */ 847 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr); 848 i = j; 849 } 850 } 851 ierr = MatStashScatterEnd_Private(&mat->stash); CHKERRQ(ierr); 852 /* Now process the block-stash. Since the values are stashed column-oriented, 853 set the roworiented flag to column oriented, and after MatSetValues() 854 restore the original flags */ 855 r1 = baij->roworiented; 856 r2 = a->roworiented; 857 r3 = b->roworiented; 858 baij->roworiented = 0; 859 a->roworiented = 0; 860 b->roworiented = 0; 861 while (1) { 862 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 863 if (!flg) break; 864 865 for ( i=0; i<n; ) { 866 /* Now identify the consecutive vals belonging to the same row */ 867 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 868 if (j < n) ncols = j-i; 869 else ncols = n-i; 870 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv); CHKERRQ(ierr); 871 i = j; 872 } 873 } 874 ierr = MatStashScatterEnd_Private(&mat->bstash); CHKERRQ(ierr); 875 baij->roworiented = r1; 876 a->roworiented = r2; 877 b->roworiented = r3; 878 } 879 880 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 881 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 882 883 /* determine if any processor has disassembled, if so we must 884 also disassemble ourselfs, in order that we may reassemble. */ 885 /* 886 if nonzero structure of submatrix B cannot change then we know that 887 no processor disassembled thus we can skip this stuff 888 */ 889 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 890 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 891 if (mat->was_assembled && !other_disassembled) { 892 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 893 } 894 } 895 896 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 897 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 898 } 899 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 900 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 901 902 #if defined(USE_PETSC_BOPT_g) 903 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 904 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 905 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 906 baij->ht_total_ct = 0; 907 baij->ht_insert_ct = 0; 908 } 909 #endif 910 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 911 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 912 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 913 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 914 } 915 916 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 917 PetscFunctionReturn(0); 918 } 919 920 #undef __FUNC__ 921 #define __FUNC__ "MatView_MPIBAIJ_Binary" 922 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 923 { 924 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 925 int ierr; 926 927 PetscFunctionBegin; 928 if (baij->size == 1) { 929 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 930 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNC__ 935 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 936 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 937 { 938 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 939 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 940 FILE *fd; 941 ViewerType vtype; 942 943 PetscFunctionBegin; 944 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 945 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 946 ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr); 947 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 948 MatInfo info; 949 MPI_Comm_rank(mat->comm,&rank); 950 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 951 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 952 PetscSequentialPhaseBegin(mat->comm,1); 953 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 954 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 955 baij->bs,(int)info.memory); 956 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 957 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 958 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 959 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 960 fflush(fd); 961 PetscSequentialPhaseEnd(mat->comm,1); 962 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 965 PetscPrintf(mat->comm," block size is %d\n",bs); 966 PetscFunctionReturn(0); 967 } 968 } 969 970 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 971 Draw draw; 972 PetscTruth isnull; 973 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 974 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 975 } 976 977 if (size == 1) { 978 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 979 } else { 980 /* assemble the entire matrix onto first processor. */ 981 Mat A; 982 Mat_SeqBAIJ *Aloc; 983 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 984 int mbs = baij->mbs; 985 Scalar *a; 986 987 if (!rank) { 988 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 989 } else { 990 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 991 } 992 PLogObjectParent(mat,A); 993 994 /* copy over the A part */ 995 Aloc = (Mat_SeqBAIJ*) baij->A->data; 996 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 997 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 998 999 for ( i=0; i<mbs; i++ ) { 1000 rvals[0] = bs*(baij->rstart + i); 1001 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1002 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1003 col = (baij->cstart+aj[j])*bs; 1004 for (k=0; k<bs; k++ ) { 1005 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1006 col++; a += bs; 1007 } 1008 } 1009 } 1010 /* copy over the B part */ 1011 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1012 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1013 for ( i=0; i<mbs; i++ ) { 1014 rvals[0] = bs*(baij->rstart + i); 1015 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1016 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1017 col = baij->garray[aj[j]]*bs; 1018 for (k=0; k<bs; k++ ) { 1019 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1020 col++; a += bs; 1021 } 1022 } 1023 } 1024 PetscFree(rvals); 1025 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1026 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1027 /* 1028 Everyone has to call to draw the matrix since the graphics waits are 1029 synchronized across all processors that share the Draw object 1030 */ 1031 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1032 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1033 } 1034 ierr = MatDestroy(A); CHKERRQ(ierr); 1035 } 1036 PetscFunctionReturn(0); 1037 } 1038 1039 1040 1041 #undef __FUNC__ 1042 #define __FUNC__ "MatView_MPIBAIJ" 1043 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1044 { 1045 int ierr; 1046 ViewerType vtype; 1047 1048 PetscFunctionBegin; 1049 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1050 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1051 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 1052 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 1053 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1054 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1055 } else { 1056 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNC__ 1062 #define __FUNC__ "MatDestroy_MPIBAIJ" 1063 int MatDestroy_MPIBAIJ(Mat mat) 1064 { 1065 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1066 int ierr; 1067 1068 PetscFunctionBegin; 1069 if (--mat->refct > 0) PetscFunctionReturn(0); 1070 1071 if (mat->mapping) { 1072 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 1073 } 1074 if (mat->bmapping) { 1075 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 1076 } 1077 if (mat->rmap) { 1078 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1079 } 1080 if (mat->cmap) { 1081 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1082 } 1083 #if defined(USE_PETSC_LOG) 1084 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1085 #endif 1086 1087 ierr = MatStashDestroy_Private(&mat->stash); CHKERRQ(ierr); 1088 ierr = MatStashDestroy_Private(&mat->bstash); CHKERRQ(ierr); 1089 1090 PetscFree(baij->rowners); 1091 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1092 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1093 #if defined (USE_CTABLE) 1094 if (baij->colmap) TableDelete(baij->colmap); 1095 #else 1096 if (baij->colmap) PetscFree(baij->colmap); 1097 #endif 1098 if (baij->garray) PetscFree(baij->garray); 1099 if (baij->lvec) VecDestroy(baij->lvec); 1100 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1101 if (baij->rowvalues) PetscFree(baij->rowvalues); 1102 if (baij->barray) PetscFree(baij->barray); 1103 if (baij->hd) PetscFree(baij->hd); 1104 PetscFree(baij); 1105 PLogObjectDestroy(mat); 1106 PetscHeaderDestroy(mat); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNC__ 1111 #define __FUNC__ "MatMult_MPIBAIJ" 1112 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1113 { 1114 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1115 int ierr, nt; 1116 1117 PetscFunctionBegin; 1118 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1119 if (nt != a->n) { 1120 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1121 } 1122 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1123 if (nt != a->m) { 1124 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1125 } 1126 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1127 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1128 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1129 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1130 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNC__ 1135 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1136 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1137 { 1138 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1139 int ierr; 1140 1141 PetscFunctionBegin; 1142 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1143 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1144 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1145 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNC__ 1150 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1151 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1152 { 1153 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1154 int ierr; 1155 1156 PetscFunctionBegin; 1157 /* do nondiagonal part */ 1158 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1159 /* send it on its way */ 1160 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1161 /* do local part */ 1162 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1163 /* receive remote parts: note this assumes the values are not actually */ 1164 /* inserted in yy until the next line, which is true for my implementation*/ 1165 /* but is not perhaps always true. */ 1166 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNC__ 1171 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1172 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1173 { 1174 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1175 int ierr; 1176 1177 PetscFunctionBegin; 1178 /* do nondiagonal part */ 1179 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1180 /* send it on its way */ 1181 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1182 /* do local part */ 1183 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1184 /* receive remote parts: note this assumes the values are not actually */ 1185 /* inserted in yy until the next line, which is true for my implementation*/ 1186 /* but is not perhaps always true. */ 1187 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 /* 1192 This only works correctly for square matrices where the subblock A->A is the 1193 diagonal block 1194 */ 1195 #undef __FUNC__ 1196 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1197 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1198 { 1199 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1200 int ierr; 1201 1202 PetscFunctionBegin; 1203 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1204 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNC__ 1209 #define __FUNC__ "MatScale_MPIBAIJ" 1210 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1211 { 1212 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1213 int ierr; 1214 1215 PetscFunctionBegin; 1216 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1217 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #undef __FUNC__ 1222 #define __FUNC__ "MatGetSize_MPIBAIJ" 1223 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1224 { 1225 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1226 1227 PetscFunctionBegin; 1228 if (m) *m = mat->M; 1229 if (n) *n = mat->N; 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNC__ 1234 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1235 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1236 { 1237 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1238 1239 PetscFunctionBegin; 1240 *m = mat->m; *n = mat->n; 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNC__ 1245 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1246 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1247 { 1248 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1249 1250 PetscFunctionBegin; 1251 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNC__ 1256 #define __FUNC__ "MatGetRow_MPIBAIJ" 1257 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1258 { 1259 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1260 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1261 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1262 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1263 int *cmap, *idx_p,cstart = mat->cstart; 1264 1265 PetscFunctionBegin; 1266 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1267 mat->getrowactive = PETSC_TRUE; 1268 1269 if (!mat->rowvalues && (idx || v)) { 1270 /* 1271 allocate enough space to hold information from the longest row. 1272 */ 1273 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1274 int max = 1,mbs = mat->mbs,tmp; 1275 for ( i=0; i<mbs; i++ ) { 1276 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1277 if (max < tmp) { max = tmp; } 1278 } 1279 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1280 CHKPTRQ(mat->rowvalues); 1281 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1282 } 1283 1284 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1285 lrow = row - brstart; 1286 1287 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1288 if (!v) {pvA = 0; pvB = 0;} 1289 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1290 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1291 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1292 nztot = nzA + nzB; 1293 1294 cmap = mat->garray; 1295 if (v || idx) { 1296 if (nztot) { 1297 /* Sort by increasing column numbers, assuming A and B already sorted */ 1298 int imark = -1; 1299 if (v) { 1300 *v = v_p = mat->rowvalues; 1301 for ( i=0; i<nzB; i++ ) { 1302 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1303 else break; 1304 } 1305 imark = i; 1306 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1307 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1308 } 1309 if (idx) { 1310 *idx = idx_p = mat->rowindices; 1311 if (imark > -1) { 1312 for ( i=0; i<imark; i++ ) { 1313 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1314 } 1315 } else { 1316 for ( i=0; i<nzB; i++ ) { 1317 if (cmap[cworkB[i]/bs] < cstart) 1318 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1319 else break; 1320 } 1321 imark = i; 1322 } 1323 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1324 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1325 } 1326 } else { 1327 if (idx) *idx = 0; 1328 if (v) *v = 0; 1329 } 1330 } 1331 *nz = nztot; 1332 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1333 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNC__ 1338 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1339 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1340 { 1341 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1342 1343 PetscFunctionBegin; 1344 if (baij->getrowactive == PETSC_FALSE) { 1345 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1346 } 1347 baij->getrowactive = PETSC_FALSE; 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNC__ 1352 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1353 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1354 { 1355 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1356 1357 PetscFunctionBegin; 1358 *bs = baij->bs; 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNC__ 1363 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1364 int MatZeroEntries_MPIBAIJ(Mat A) 1365 { 1366 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1367 int ierr; 1368 1369 PetscFunctionBegin; 1370 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1371 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNC__ 1376 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1377 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1378 { 1379 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1380 Mat A = a->A, B = a->B; 1381 int ierr; 1382 double isend[5], irecv[5]; 1383 1384 PetscFunctionBegin; 1385 info->block_size = (double)a->bs; 1386 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1387 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1388 isend[3] = info->memory; isend[4] = info->mallocs; 1389 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1390 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1391 isend[3] += info->memory; isend[4] += info->mallocs; 1392 if (flag == MAT_LOCAL) { 1393 info->nz_used = isend[0]; 1394 info->nz_allocated = isend[1]; 1395 info->nz_unneeded = isend[2]; 1396 info->memory = isend[3]; 1397 info->mallocs = isend[4]; 1398 } else if (flag == MAT_GLOBAL_MAX) { 1399 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1400 info->nz_used = irecv[0]; 1401 info->nz_allocated = irecv[1]; 1402 info->nz_unneeded = irecv[2]; 1403 info->memory = irecv[3]; 1404 info->mallocs = irecv[4]; 1405 } else if (flag == MAT_GLOBAL_SUM) { 1406 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1407 info->nz_used = irecv[0]; 1408 info->nz_allocated = irecv[1]; 1409 info->nz_unneeded = irecv[2]; 1410 info->memory = irecv[3]; 1411 info->mallocs = irecv[4]; 1412 } else { 1413 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1414 } 1415 info->rows_global = (double)a->M; 1416 info->columns_global = (double)a->N; 1417 info->rows_local = (double)a->m; 1418 info->columns_local = (double)a->N; 1419 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1420 info->fill_ratio_needed = 0; 1421 info->factor_mallocs = 0; 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNC__ 1426 #define __FUNC__ "MatSetOption_MPIBAIJ" 1427 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1428 { 1429 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1430 int ierr; 1431 1432 PetscFunctionBegin; 1433 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1434 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1435 op == MAT_COLUMNS_UNSORTED || 1436 op == MAT_COLUMNS_SORTED || 1437 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1438 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1439 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1440 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1441 } else if (op == MAT_ROW_ORIENTED) { 1442 a->roworiented = 1; 1443 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1444 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1445 } else if (op == MAT_ROWS_SORTED || 1446 op == MAT_ROWS_UNSORTED || 1447 op == MAT_SYMMETRIC || 1448 op == MAT_STRUCTURALLY_SYMMETRIC || 1449 op == MAT_YES_NEW_DIAGONALS || 1450 op == MAT_USE_HASH_TABLE) { 1451 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1452 } else if (op == MAT_COLUMN_ORIENTED) { 1453 a->roworiented = 0; 1454 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1455 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1456 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1457 a->donotstash = 1; 1458 } else if (op == MAT_NO_NEW_DIAGONALS) { 1459 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1460 } else if (op == MAT_USE_HASH_TABLE) { 1461 a->ht_flag = 1; 1462 } else { 1463 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNC__ 1469 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1470 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1471 { 1472 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1473 Mat_SeqBAIJ *Aloc; 1474 Mat B; 1475 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1476 int bs=baij->bs,mbs=baij->mbs; 1477 Scalar *a; 1478 1479 PetscFunctionBegin; 1480 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1481 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1482 CHKERRQ(ierr); 1483 1484 /* copy over the A part */ 1485 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1486 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1487 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1488 1489 for ( i=0; i<mbs; i++ ) { 1490 rvals[0] = bs*(baij->rstart + i); 1491 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1492 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1493 col = (baij->cstart+aj[j])*bs; 1494 for (k=0; k<bs; k++ ) { 1495 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1496 col++; a += bs; 1497 } 1498 } 1499 } 1500 /* copy over the B part */ 1501 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1502 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1503 for ( i=0; i<mbs; i++ ) { 1504 rvals[0] = bs*(baij->rstart + i); 1505 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1506 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1507 col = baij->garray[aj[j]]*bs; 1508 for (k=0; k<bs; k++ ) { 1509 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1510 col++; a += bs; 1511 } 1512 } 1513 } 1514 PetscFree(rvals); 1515 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1516 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1517 1518 if (matout != PETSC_NULL) { 1519 *matout = B; 1520 } else { 1521 PetscOps *Abops; 1522 MatOps Aops; 1523 1524 /* This isn't really an in-place transpose .... but free data structures from baij */ 1525 PetscFree(baij->rowners); 1526 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1527 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1528 #if defined (USE_CTABLE) 1529 if (baij->colmap) TableDelete(baij->colmap); 1530 #else 1531 if (baij->colmap) PetscFree(baij->colmap); 1532 #endif 1533 if (baij->garray) PetscFree(baij->garray); 1534 if (baij->lvec) VecDestroy(baij->lvec); 1535 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1536 PetscFree(baij); 1537 1538 /* 1539 This is horrible, horrible code. We need to keep the 1540 A pointers for the bops and ops but copy everything 1541 else from C. 1542 */ 1543 Abops = A->bops; 1544 Aops = A->ops; 1545 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1546 A->bops = Abops; 1547 A->ops = Aops; 1548 1549 PetscHeaderDestroy(B); 1550 } 1551 PetscFunctionReturn(0); 1552 } 1553 1554 #undef __FUNC__ 1555 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1556 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1557 { 1558 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1559 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1560 int ierr,s1,s2,s3; 1561 1562 PetscFunctionBegin; 1563 if (ll) { 1564 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1565 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1566 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1567 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1568 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1569 } 1570 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 #undef __FUNC__ 1575 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1576 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1577 { 1578 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1579 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1580 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1581 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1582 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1583 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1584 MPI_Comm comm = A->comm; 1585 MPI_Request *send_waits,*recv_waits; 1586 MPI_Status recv_status,*send_status; 1587 IS istmp; 1588 1589 PetscFunctionBegin; 1590 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1591 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1592 1593 /* first count number of contributors to each processor */ 1594 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1595 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1596 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1597 for ( i=0; i<N; i++ ) { 1598 idx = rows[i]; 1599 found = 0; 1600 for ( j=0; j<size; j++ ) { 1601 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1602 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1603 } 1604 } 1605 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1606 } 1607 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1608 1609 /* inform other processors of number of messages and max length*/ 1610 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1611 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1612 nrecvs = work[rank]; 1613 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1614 nmax = work[rank]; 1615 PetscFree(work); 1616 1617 /* post receives: */ 1618 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1619 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1620 for ( i=0; i<nrecvs; i++ ) { 1621 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1622 } 1623 1624 /* do sends: 1625 1) starts[i] gives the starting index in svalues for stuff going to 1626 the ith processor 1627 */ 1628 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1629 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1630 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1631 starts[0] = 0; 1632 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1633 for ( i=0; i<N; i++ ) { 1634 svalues[starts[owner[i]]++] = rows[i]; 1635 } 1636 ISRestoreIndices(is,&rows); 1637 1638 starts[0] = 0; 1639 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1640 count = 0; 1641 for ( i=0; i<size; i++ ) { 1642 if (procs[i]) { 1643 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1644 } 1645 } 1646 PetscFree(starts); 1647 1648 base = owners[rank]*bs; 1649 1650 /* wait on receives */ 1651 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1652 source = lens + nrecvs; 1653 count = nrecvs; slen = 0; 1654 while (count) { 1655 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1656 /* unpack receives into our local space */ 1657 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1658 source[imdex] = recv_status.MPI_SOURCE; 1659 lens[imdex] = n; 1660 slen += n; 1661 count--; 1662 } 1663 PetscFree(recv_waits); 1664 1665 /* move the data into the send scatter */ 1666 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1667 count = 0; 1668 for ( i=0; i<nrecvs; i++ ) { 1669 values = rvalues + i*nmax; 1670 for ( j=0; j<lens[i]; j++ ) { 1671 lrows[count++] = values[j] - base; 1672 } 1673 } 1674 PetscFree(rvalues); PetscFree(lens); 1675 PetscFree(owner); PetscFree(nprocs); 1676 1677 /* actually zap the local rows */ 1678 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1679 PLogObjectParent(A,istmp); 1680 1681 /* 1682 Zero the required rows. If the "diagonal block" of the matrix 1683 is square and the user wishes to set the diagonal we use seperate 1684 code so that MatSetValues() is not called for each diagonal allocating 1685 new memory, thus calling lots of mallocs and slowing things down. 1686 1687 Contributed by: Mathew Knepley 1688 */ 1689 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1690 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1691 if (diag && (l->A->M == l->A->N)) { 1692 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1693 } else if (diag) { 1694 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1695 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1696 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1697 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1698 } 1699 for ( i = 0; i < slen; i++ ) { 1700 row = lrows[i] + rstart_bs; 1701 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1702 } 1703 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1704 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1705 } else { 1706 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1707 } 1708 1709 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1710 PetscFree(lrows); 1711 1712 /* wait on sends */ 1713 if (nsends) { 1714 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1715 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1716 PetscFree(send_status); 1717 } 1718 PetscFree(send_waits); PetscFree(svalues); 1719 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNC__ 1724 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1725 int MatPrintHelp_MPIBAIJ(Mat A) 1726 { 1727 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1728 MPI_Comm comm = A->comm; 1729 static int called = 0; 1730 int ierr; 1731 1732 PetscFunctionBegin; 1733 if (!a->rank) { 1734 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1735 } 1736 if (called) {PetscFunctionReturn(0);} else called = 1; 1737 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1738 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNC__ 1743 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1744 int MatSetUnfactored_MPIBAIJ(Mat A) 1745 { 1746 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1747 int ierr; 1748 1749 PetscFunctionBegin; 1750 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1751 PetscFunctionReturn(0); 1752 } 1753 1754 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1755 1756 #undef __FUNC__ 1757 #define __FUNC__ "MatEqual_MPIBAIJ" 1758 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag) 1759 { 1760 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data; 1761 Mat a, b, c, d; 1762 PetscTruth flg; 1763 int ierr; 1764 1765 PetscFunctionBegin; 1766 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1767 a = matA->A; b = matA->B; 1768 c = matB->A; d = matB->B; 1769 1770 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1771 if (flg == PETSC_TRUE) { 1772 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1773 } 1774 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 /* -------------------------------------------------------------------*/ 1779 static struct _MatOps MatOps_Values = { 1780 MatSetValues_MPIBAIJ, 1781 MatGetRow_MPIBAIJ, 1782 MatRestoreRow_MPIBAIJ, 1783 MatMult_MPIBAIJ, 1784 MatMultAdd_MPIBAIJ, 1785 MatMultTrans_MPIBAIJ, 1786 MatMultTransAdd_MPIBAIJ, 1787 0, 1788 0, 1789 0, 1790 0, 1791 0, 1792 0, 1793 0, 1794 MatTranspose_MPIBAIJ, 1795 MatGetInfo_MPIBAIJ, 1796 MatEqual_MPIBAIJ, 1797 MatGetDiagonal_MPIBAIJ, 1798 MatDiagonalScale_MPIBAIJ, 1799 MatNorm_MPIBAIJ, 1800 MatAssemblyBegin_MPIBAIJ, 1801 MatAssemblyEnd_MPIBAIJ, 1802 0, 1803 MatSetOption_MPIBAIJ, 1804 MatZeroEntries_MPIBAIJ, 1805 MatZeroRows_MPIBAIJ, 1806 0, 1807 0, 1808 0, 1809 0, 1810 MatGetSize_MPIBAIJ, 1811 MatGetLocalSize_MPIBAIJ, 1812 MatGetOwnershipRange_MPIBAIJ, 1813 0, 1814 0, 1815 0, 1816 0, 1817 MatDuplicate_MPIBAIJ, 1818 0, 1819 0, 1820 0, 1821 0, 1822 0, 1823 MatGetSubMatrices_MPIBAIJ, 1824 MatIncreaseOverlap_MPIBAIJ, 1825 MatGetValues_MPIBAIJ, 1826 0, 1827 MatPrintHelp_MPIBAIJ, 1828 MatScale_MPIBAIJ, 1829 0, 1830 0, 1831 0, 1832 MatGetBlockSize_MPIBAIJ, 1833 0, 1834 0, 1835 0, 1836 0, 1837 0, 1838 0, 1839 MatSetUnfactored_MPIBAIJ, 1840 0, 1841 MatSetValuesBlocked_MPIBAIJ, 1842 0, 1843 0, 1844 0, 1845 MatGetMaps_Petsc}; 1846 1847 1848 EXTERN_C_BEGIN 1849 #undef __FUNC__ 1850 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1851 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1852 { 1853 PetscFunctionBegin; 1854 *a = ((Mat_MPIBAIJ *)A->data)->A; 1855 *iscopy = PETSC_FALSE; 1856 PetscFunctionReturn(0); 1857 } 1858 EXTERN_C_END 1859 1860 #undef __FUNC__ 1861 #define __FUNC__ "MatCreateMPIBAIJ" 1862 /*@C 1863 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1864 (block compressed row). For good matrix assembly performance 1865 the user should preallocate the matrix storage by setting the parameters 1866 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1867 performance can be increased by more than a factor of 50. 1868 1869 Collective on MPI_Comm 1870 1871 Input Parameters: 1872 + comm - MPI communicator 1873 . bs - size of blockk 1874 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1875 This value should be the same as the local size used in creating the 1876 y vector for the matrix-vector product y = Ax. 1877 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1878 This value should be the same as the local size used in creating the 1879 x vector for the matrix-vector product y = Ax. 1880 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1881 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1882 . d_nz - number of block nonzeros per block row in diagonal portion of local 1883 submatrix (same for all local rows) 1884 . d_nnz - array containing the number of block nonzeros in the various block rows 1885 of the in diagonal portion of the local (possibly different for each block 1886 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1887 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1888 submatrix (same for all local rows). 1889 - o_nnz - array containing the number of nonzeros in the various block rows of the 1890 off-diagonal portion of the local submatrix (possibly different for 1891 each block row) or PETSC_NULL. 1892 1893 Output Parameter: 1894 . A - the matrix 1895 1896 Options Database Keys: 1897 . -mat_no_unroll - uses code that does not unroll the loops in the 1898 block calculations (much slower) 1899 . -mat_block_size - size of the blocks to use 1900 . -mat_mpi - use the parallel matrix data structures even on one processor 1901 (defaults to using SeqBAIJ format on one processor) 1902 1903 Notes: 1904 The user MUST specify either the local or global matrix dimensions 1905 (possibly both). 1906 1907 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1908 than it must be used on all processors that share the object for that argument. 1909 1910 Storage Information: 1911 For a square global matrix we define each processor's diagonal portion 1912 to be its local rows and the corresponding columns (a square submatrix); 1913 each processor's off-diagonal portion encompasses the remainder of the 1914 local matrix (a rectangular submatrix). 1915 1916 The user can specify preallocated storage for the diagonal part of 1917 the local submatrix with either d_nz or d_nnz (not both). Set 1918 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1919 memory allocation. Likewise, specify preallocated storage for the 1920 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1921 1922 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1923 the figure below we depict these three local rows and all columns (0-11). 1924 1925 .vb 1926 0 1 2 3 4 5 6 7 8 9 10 11 1927 ------------------- 1928 row 3 | o o o d d d o o o o o o 1929 row 4 | o o o d d d o o o o o o 1930 row 5 | o o o d d d o o o o o o 1931 ------------------- 1932 .ve 1933 1934 Thus, any entries in the d locations are stored in the d (diagonal) 1935 submatrix, and any entries in the o locations are stored in the 1936 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1937 stored simply in the MATSEQBAIJ format for compressed row storage. 1938 1939 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1940 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1941 In general, for PDE problems in which most nonzeros are near the diagonal, 1942 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1943 or you will get TERRIBLE performance; see the users' manual chapter on 1944 matrices. 1945 1946 Level: intermediate 1947 1948 .keywords: matrix, block, aij, compressed row, sparse, parallel 1949 1950 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1951 @*/ 1952 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1953 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1954 { 1955 Mat B; 1956 Mat_MPIBAIJ *b; 1957 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1958 int flag1 = 0,flag2 = 0; 1959 1960 PetscFunctionBegin; 1961 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1962 1963 MPI_Comm_size(comm,&size); 1964 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr); 1965 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1966 if (!flag1 && !flag2 && size == 1) { 1967 if (M == PETSC_DECIDE) M = m; 1968 if (N == PETSC_DECIDE) N = n; 1969 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1970 PetscFunctionReturn(0); 1971 } 1972 1973 *A = 0; 1974 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 1975 PLogObjectCreate(B); 1976 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1977 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1978 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1979 1980 B->ops->destroy = MatDestroy_MPIBAIJ; 1981 B->ops->view = MatView_MPIBAIJ; 1982 B->mapping = 0; 1983 B->factor = 0; 1984 B->assembled = PETSC_FALSE; 1985 1986 B->insertmode = NOT_SET_VALUES; 1987 MPI_Comm_rank(comm,&b->rank); 1988 MPI_Comm_size(comm,&b->size); 1989 1990 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1991 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1992 } 1993 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1994 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1995 } 1996 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1997 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1998 } 1999 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2000 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2001 2002 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2003 work[0] = m; work[1] = n; 2004 mbs = m/bs; nbs = n/bs; 2005 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2006 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2007 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2008 } 2009 if (m == PETSC_DECIDE) { 2010 Mbs = M/bs; 2011 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2012 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2013 m = mbs*bs; 2014 } 2015 if (n == PETSC_DECIDE) { 2016 Nbs = N/bs; 2017 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2018 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2019 n = nbs*bs; 2020 } 2021 if (mbs*bs != m || nbs*bs != n) { 2022 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2023 } 2024 2025 b->m = m; B->m = m; 2026 b->n = n; B->n = n; 2027 b->N = N; B->N = N; 2028 b->M = M; B->M = M; 2029 b->bs = bs; 2030 b->bs2 = bs*bs; 2031 b->mbs = mbs; 2032 b->nbs = nbs; 2033 b->Mbs = Mbs; 2034 b->Nbs = Nbs; 2035 2036 /* the information in the maps duplicates the information computed below, eventually 2037 we should remove the duplicate information that is not contained in the maps */ 2038 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2039 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2040 2041 /* build local table of row and column ownerships */ 2042 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 2043 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2044 b->cowners = b->rowners + b->size + 2; 2045 b->rowners_bs = b->cowners + b->size + 2; 2046 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2047 b->rowners[0] = 0; 2048 for ( i=2; i<=b->size; i++ ) { 2049 b->rowners[i] += b->rowners[i-1]; 2050 } 2051 for ( i=0; i<=b->size; i++ ) { 2052 b->rowners_bs[i] = b->rowners[i]*bs; 2053 } 2054 b->rstart = b->rowners[b->rank]; 2055 b->rend = b->rowners[b->rank+1]; 2056 b->rstart_bs = b->rstart * bs; 2057 b->rend_bs = b->rend * bs; 2058 2059 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2060 b->cowners[0] = 0; 2061 for ( i=2; i<=b->size; i++ ) { 2062 b->cowners[i] += b->cowners[i-1]; 2063 } 2064 b->cstart = b->cowners[b->rank]; 2065 b->cend = b->cowners[b->rank+1]; 2066 b->cstart_bs = b->cstart * bs; 2067 b->cend_bs = b->cend * bs; 2068 2069 2070 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2071 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 2072 PLogObjectParent(B,b->A); 2073 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2074 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 2075 PLogObjectParent(B,b->B); 2076 2077 /* build cache for off array entries formed */ 2078 ierr = MatStashCreate_Private(comm,1,&B->stash); CHKERRQ(ierr); 2079 ierr = MatStashCreate_Private(comm,bs,&B->bstash); CHKERRQ(ierr); 2080 b->donotstash = 0; 2081 b->colmap = 0; 2082 b->garray = 0; 2083 b->roworiented = 1; 2084 2085 /* stuff used in block assembly */ 2086 b->barray = 0; 2087 2088 /* stuff used for matrix vector multiply */ 2089 b->lvec = 0; 2090 b->Mvctx = 0; 2091 2092 /* stuff for MatGetRow() */ 2093 b->rowindices = 0; 2094 b->rowvalues = 0; 2095 b->getrowactive = PETSC_FALSE; 2096 2097 /* hash table stuff */ 2098 b->ht = 0; 2099 b->hd = 0; 2100 b->ht_size = 0; 2101 b->ht_flag = 0; 2102 b->ht_fact = 0; 2103 b->ht_total_ct = 0; 2104 b->ht_insert_ct = 0; 2105 2106 *A = B; 2107 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2108 if (flg) { 2109 double fact = 1.39; 2110 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2111 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2112 if (fact <= 1.0) fact = 1.39; 2113 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2114 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2115 } 2116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 2117 "MatStoreValues_MPIBAIJ", 2118 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2119 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 2120 "MatRetrieveValues_MPIBAIJ", 2121 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2122 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2123 "MatGetDiagonalBlock_MPIBAIJ", 2124 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 #undef __FUNC__ 2129 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2130 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2131 { 2132 Mat mat; 2133 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2134 int ierr, len=0, flg; 2135 2136 PetscFunctionBegin; 2137 *newmat = 0; 2138 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2139 PLogObjectCreate(mat); 2140 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2141 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2142 mat->ops->destroy = MatDestroy_MPIBAIJ; 2143 mat->ops->view = MatView_MPIBAIJ; 2144 mat->factor = matin->factor; 2145 mat->assembled = PETSC_TRUE; 2146 mat->insertmode = NOT_SET_VALUES; 2147 2148 a->m = mat->m = oldmat->m; 2149 a->n = mat->n = oldmat->n; 2150 a->M = mat->M = oldmat->M; 2151 a->N = mat->N = oldmat->N; 2152 2153 a->bs = oldmat->bs; 2154 a->bs2 = oldmat->bs2; 2155 a->mbs = oldmat->mbs; 2156 a->nbs = oldmat->nbs; 2157 a->Mbs = oldmat->Mbs; 2158 a->Nbs = oldmat->Nbs; 2159 2160 a->rstart = oldmat->rstart; 2161 a->rend = oldmat->rend; 2162 a->cstart = oldmat->cstart; 2163 a->cend = oldmat->cend; 2164 a->size = oldmat->size; 2165 a->rank = oldmat->rank; 2166 a->donotstash = oldmat->donotstash; 2167 a->roworiented = oldmat->roworiented; 2168 a->rowindices = 0; 2169 a->rowvalues = 0; 2170 a->getrowactive = PETSC_FALSE; 2171 a->barray = 0; 2172 a->rstart_bs = oldmat->rstart_bs; 2173 a->rend_bs = oldmat->rend_bs; 2174 a->cstart_bs = oldmat->cstart_bs; 2175 a->cend_bs = oldmat->cend_bs; 2176 2177 /* hash table stuff */ 2178 a->ht = 0; 2179 a->hd = 0; 2180 a->ht_size = 0; 2181 a->ht_flag = oldmat->ht_flag; 2182 a->ht_fact = oldmat->ht_fact; 2183 a->ht_total_ct = 0; 2184 a->ht_insert_ct = 0; 2185 2186 2187 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2188 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2189 a->cowners = a->rowners + a->size + 2; 2190 a->rowners_bs = a->cowners + a->size + 2; 2191 PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int)); 2192 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash); CHKERRQ(ierr); 2193 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash); CHKERRQ(ierr); 2194 if (oldmat->colmap) { 2195 #if defined (USE_CTABLE) 2196 ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr); 2197 #else 2198 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2199 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2200 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2201 #endif 2202 } else a->colmap = 0; 2203 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2204 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2205 PLogObjectMemory(mat,len*sizeof(int)); 2206 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2207 } else a->garray = 0; 2208 2209 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2210 PLogObjectParent(mat,a->lvec); 2211 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2212 2213 PLogObjectParent(mat,a->Mvctx); 2214 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2215 PLogObjectParent(mat,a->A); 2216 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2217 PLogObjectParent(mat,a->B); 2218 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2219 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2220 if (flg) { 2221 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2222 } 2223 *newmat = mat; 2224 PetscFunctionReturn(0); 2225 } 2226 2227 #include "sys.h" 2228 2229 #undef __FUNC__ 2230 #define __FUNC__ "MatLoad_MPIBAIJ" 2231 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2232 { 2233 Mat A; 2234 int i, nz, ierr, j,rstart, rend, fd; 2235 Scalar *vals,*buf; 2236 MPI_Comm comm = ((PetscObject)viewer)->comm; 2237 MPI_Status status; 2238 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2239 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2240 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2241 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2242 int dcount,kmax,k,nzcount,tmp; 2243 2244 PetscFunctionBegin; 2245 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2246 2247 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2248 if (!rank) { 2249 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2250 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2251 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2252 if (header[3] < 0) { 2253 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2254 } 2255 } 2256 2257 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2258 M = header[1]; N = header[2]; 2259 2260 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2261 2262 /* 2263 This code adds extra rows to make sure the number of rows is 2264 divisible by the blocksize 2265 */ 2266 Mbs = M/bs; 2267 extra_rows = bs - M + bs*(Mbs); 2268 if (extra_rows == bs) extra_rows = 0; 2269 else Mbs++; 2270 if (extra_rows &&!rank) { 2271 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2272 } 2273 2274 /* determine ownership of all rows */ 2275 mbs = Mbs/size + ((Mbs % size) > rank); 2276 m = mbs * bs; 2277 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2278 browners = rowners + size + 1; 2279 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2280 rowners[0] = 0; 2281 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2282 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2283 rstart = rowners[rank]; 2284 rend = rowners[rank+1]; 2285 2286 /* distribute row lengths to all processors */ 2287 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2288 if (!rank) { 2289 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2290 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2291 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2292 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2293 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2294 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2295 PetscFree(sndcounts); 2296 } else { 2297 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2298 } 2299 2300 if (!rank) { 2301 /* calculate the number of nonzeros on each processor */ 2302 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2303 PetscMemzero(procsnz,size*sizeof(int)); 2304 for ( i=0; i<size; i++ ) { 2305 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2306 procsnz[i] += rowlengths[j]; 2307 } 2308 } 2309 PetscFree(rowlengths); 2310 2311 /* determine max buffer needed and allocate it */ 2312 maxnz = 0; 2313 for ( i=0; i<size; i++ ) { 2314 maxnz = PetscMax(maxnz,procsnz[i]); 2315 } 2316 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2317 2318 /* read in my part of the matrix column indices */ 2319 nz = procsnz[0]; 2320 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2321 mycols = ibuf; 2322 if (size == 1) nz -= extra_rows; 2323 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2324 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2325 2326 /* read in every ones (except the last) and ship off */ 2327 for ( i=1; i<size-1; i++ ) { 2328 nz = procsnz[i]; 2329 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2330 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2331 } 2332 /* read in the stuff for the last proc */ 2333 if ( size != 1 ) { 2334 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2335 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2336 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2337 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2338 } 2339 PetscFree(cols); 2340 } else { 2341 /* determine buffer space needed for message */ 2342 nz = 0; 2343 for ( i=0; i<m; i++ ) { 2344 nz += locrowlens[i]; 2345 } 2346 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2347 mycols = ibuf; 2348 /* receive message of column indices*/ 2349 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2350 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2351 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2352 } 2353 2354 /* loop over local rows, determining number of off diagonal entries */ 2355 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2356 odlens = dlens + (rend-rstart); 2357 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2358 PetscMemzero(mask,3*Mbs*sizeof(int)); 2359 masked1 = mask + Mbs; 2360 masked2 = masked1 + Mbs; 2361 rowcount = 0; nzcount = 0; 2362 for ( i=0; i<mbs; i++ ) { 2363 dcount = 0; 2364 odcount = 0; 2365 for ( j=0; j<bs; j++ ) { 2366 kmax = locrowlens[rowcount]; 2367 for ( k=0; k<kmax; k++ ) { 2368 tmp = mycols[nzcount++]/bs; 2369 if (!mask[tmp]) { 2370 mask[tmp] = 1; 2371 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2372 else masked1[dcount++] = tmp; 2373 } 2374 } 2375 rowcount++; 2376 } 2377 2378 dlens[i] = dcount; 2379 odlens[i] = odcount; 2380 2381 /* zero out the mask elements we set */ 2382 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2383 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2384 } 2385 2386 /* create our matrix */ 2387 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2388 CHKERRQ(ierr); 2389 A = *newmat; 2390 MatSetOption(A,MAT_COLUMNS_SORTED); 2391 2392 if (!rank) { 2393 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2394 /* read in my part of the matrix numerical values */ 2395 nz = procsnz[0]; 2396 vals = buf; 2397 mycols = ibuf; 2398 if (size == 1) nz -= extra_rows; 2399 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2400 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2401 2402 /* insert into matrix */ 2403 jj = rstart*bs; 2404 for ( i=0; i<m; i++ ) { 2405 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2406 mycols += locrowlens[i]; 2407 vals += locrowlens[i]; 2408 jj++; 2409 } 2410 /* read in other processors (except the last one) and ship out */ 2411 for ( i=1; i<size-1; i++ ) { 2412 nz = procsnz[i]; 2413 vals = buf; 2414 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2415 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2416 } 2417 /* the last proc */ 2418 if ( size != 1 ){ 2419 nz = procsnz[i] - extra_rows; 2420 vals = buf; 2421 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2422 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2423 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2424 } 2425 PetscFree(procsnz); 2426 } else { 2427 /* receive numeric values */ 2428 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2429 2430 /* receive message of values*/ 2431 vals = buf; 2432 mycols = ibuf; 2433 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2434 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2435 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2436 2437 /* insert into matrix */ 2438 jj = rstart*bs; 2439 for ( i=0; i<m; i++ ) { 2440 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2441 mycols += locrowlens[i]; 2442 vals += locrowlens[i]; 2443 jj++; 2444 } 2445 } 2446 PetscFree(locrowlens); 2447 PetscFree(buf); 2448 PetscFree(ibuf); 2449 PetscFree(rowners); 2450 PetscFree(dlens); 2451 PetscFree(mask); 2452 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2453 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 2458 2459 #undef __FUNC__ 2460 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2461 /*@ 2462 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2463 2464 Input Parameters: 2465 . mat - the matrix 2466 . fact - factor 2467 2468 Collective on Mat 2469 2470 Level: advanced 2471 2472 Notes: 2473 This can also be set by the command line option: -mat_use_hash_table fact 2474 2475 .keywords: matrix, hashtable, factor, HT 2476 2477 .seealso: MatSetOption() 2478 @*/ 2479 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2480 { 2481 Mat_MPIBAIJ *baij; 2482 2483 PetscFunctionBegin; 2484 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2485 if (mat->type != MATMPIBAIJ) { 2486 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2487 } 2488 baij = (Mat_MPIBAIJ*) mat->data; 2489 baij->ht_fact = fact; 2490 PetscFunctionReturn(0); 2491 } 2492