1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.140 1999/01/08 16:55:04 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. This is done in a non scable way since the 18 length of colmap equals the global matrix length. 19 */ 20 #undef __FUNC__ 21 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 22 static int CreateColmap_MPIBAIJ_Private(Mat mat) 23 { 24 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 25 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 26 int nbs = B->nbs,i,bs=B->bs,ierr; 27 28 PetscFunctionBegin; 29 #if defined (USE_CTABLE) 30 ierr = TableCreate( &baij->colmap, baij->nbs/5 ); CHKERRQ(ierr); 31 for ( i=0; i<nbs; i++ ){ 32 ierr = TableAdd( baij->colmap, baij->garray[i] + 1, i*bs+1 );CHKERRQ(ierr); 33 } 34 #else 35 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 36 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 37 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 38 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 10 44 45 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 brow = row/bs; \ 49 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 50 rmax = aimax[brow]; nrow = ailen[brow]; \ 51 bcol = col/bs; \ 52 ridx = row % bs; cidx = col % bs; \ 53 low = 0; high = nrow; \ 54 while (high-low > 3) { \ 55 t = (low+high)/2; \ 56 if (rp[t] > bcol) high = t; \ 57 else low = t; \ 58 } \ 59 for ( _i=low; _i<high; _i++ ) { \ 60 if (rp[_i] > bcol) break; \ 61 if (rp[_i] == bcol) { \ 62 bap = ap + bs2*_i + bs*cidx + ridx; \ 63 if (addv == ADD_VALUES) *bap += value; \ 64 else *bap = value; \ 65 goto a_noinsert; \ 66 } \ 67 } \ 68 if (a->nonew == 1) goto a_noinsert; \ 69 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 70 if (nrow >= rmax) { \ 71 /* there is no extra room in row, therefore enlarge */ \ 72 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 73 Scalar *new_a; \ 74 \ 75 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 76 \ 77 /* malloc new storage space */ \ 78 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 79 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 80 new_j = (int *) (new_a + bs2*new_nz); \ 81 new_i = new_j + new_nz; \ 82 \ 83 /* copy over old data into new slots */ \ 84 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 85 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 86 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 87 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 88 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 89 len*sizeof(int)); \ 90 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 91 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 92 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 93 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 94 /* free up old matrix storage */ \ 95 PetscFree(a->a); \ 96 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = 1; \ 99 \ 100 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 101 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 102 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 103 a->maxnz += bs2*CHUNKSIZE; \ 104 a->reallocs++; \ 105 a->nz++; \ 106 } \ 107 N = nrow++ - 1; \ 108 /* shift up all the later entries in this row */ \ 109 for ( ii=N; ii>=_i; ii-- ) { \ 110 rp[ii+1] = rp[ii]; \ 111 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 112 } \ 113 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 114 rp[_i] = bcol; \ 115 ap[bs2*_i + bs*cidx + ridx] = value; \ 116 a_noinsert:; \ 117 ailen[brow] = nrow; \ 118 } 119 120 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 121 { \ 122 \ 123 brow = row/bs; \ 124 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 125 rmax = bimax[brow]; nrow = bilen[brow]; \ 126 bcol = col/bs; \ 127 ridx = row % bs; cidx = col % bs; \ 128 low = 0; high = nrow; \ 129 while (high-low > 3) { \ 130 t = (low+high)/2; \ 131 if (rp[t] > bcol) high = t; \ 132 else low = t; \ 133 } \ 134 for ( _i=low; _i<high; _i++ ) { \ 135 if (rp[_i] > bcol) break; \ 136 if (rp[_i] == bcol) { \ 137 bap = ap + bs2*_i + bs*cidx + ridx; \ 138 if (addv == ADD_VALUES) *bap += value; \ 139 else *bap = value; \ 140 goto b_noinsert; \ 141 } \ 142 } \ 143 if (b->nonew == 1) goto b_noinsert; \ 144 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 145 if (nrow >= rmax) { \ 146 /* there is no extra room in row, therefore enlarge */ \ 147 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 148 Scalar *new_a; \ 149 \ 150 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 151 \ 152 /* malloc new storage space */ \ 153 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 154 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 155 new_j = (int *) (new_a + bs2*new_nz); \ 156 new_i = new_j + new_nz; \ 157 \ 158 /* copy over old data into new slots */ \ 159 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 160 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 161 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 162 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 163 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 164 len*sizeof(int)); \ 165 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 166 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 167 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 168 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 169 /* free up old matrix storage */ \ 170 PetscFree(b->a); \ 171 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = 1; \ 174 \ 175 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 176 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 177 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 178 b->maxnz += bs2*CHUNKSIZE; \ 179 b->reallocs++; \ 180 b->nz++; \ 181 } \ 182 N = nrow++ - 1; \ 183 /* shift up all the later entries in this row */ \ 184 for ( ii=N; ii>=_i; ii-- ) { \ 185 rp[ii+1] = rp[ii]; \ 186 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 187 } \ 188 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 189 rp[_i] = bcol; \ 190 ap[bs2*_i + bs*cidx + ridx] = value; \ 191 b_noinsert:; \ 192 bilen[brow] = nrow; \ 193 } 194 195 #undef __FUNC__ 196 #define __FUNC__ "MatSetValues_MPIBAIJ" 197 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 198 { 199 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 200 Scalar value; 201 int ierr,i,j,row,col; 202 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 203 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 204 int cend_orig=baij->cend_bs,bs=baij->bs; 205 206 /* Some Variables required in the macro */ 207 Mat A = baij->A; 208 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 209 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 210 Scalar *aa=a->a; 211 212 Mat B = baij->B; 213 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 214 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 215 Scalar *ba=b->a; 216 217 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 218 int low,high,t,ridx,cidx,bs2=a->bs2; 219 Scalar *ap,*bap; 220 221 PetscFunctionBegin; 222 for ( i=0; i<m; i++ ) { 223 #if defined(USE_PETSC_BOPT_g) 224 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 225 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 226 #endif 227 if (im[i] >= rstart_orig && im[i] < rend_orig) { 228 row = im[i] - rstart_orig; 229 for ( j=0; j<n; j++ ) { 230 if (in[j] >= cstart_orig && in[j] < cend_orig){ 231 col = in[j] - cstart_orig; 232 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 233 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 234 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 235 } 236 #if defined(USE_PETSC_BOPT_g) 237 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 238 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 239 #endif 240 else { 241 if (mat->was_assembled) { 242 if (!baij->colmap) { 243 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 244 } 245 #if defined (USE_CTABLE) 246 col = TableFind( baij->colmap, in[j]/bs + 1 ) - 1 + in[j]%bs; 247 #else 248 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 249 #endif 250 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 251 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 252 col = in[j]; 253 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 254 B = baij->B; 255 b = (Mat_SeqBAIJ *) (B)->data; 256 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 257 ba=b->a; 258 } 259 } else col = in[j]; 260 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 261 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } 264 } 265 } else { 266 if (roworiented && !baij->donotstash) { 267 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 268 } else { 269 if (!baij->donotstash) { 270 row = im[i]; 271 for ( j=0; j<n; j++ ) { 272 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 273 } 274 } 275 } 276 } 277 } 278 PetscFunctionReturn(0); 279 } 280 281 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 282 #undef __FUNC__ 283 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 284 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 285 { 286 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 287 Scalar *value,*barray=baij->barray; 288 int ierr,i,j,ii,jj,row,col,k,l; 289 int roworiented = baij->roworiented,rstart=baij->rstart ; 290 int rend=baij->rend,cstart=baij->cstart,stepval; 291 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 292 293 if(!barray) { 294 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 295 } 296 297 if (roworiented) { 298 stepval = (n-1)*bs; 299 } else { 300 stepval = (m-1)*bs; 301 } 302 for ( i=0; i<m; i++ ) { 303 #if defined(USE_PETSC_BOPT_g) 304 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 305 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 306 #endif 307 if (im[i] >= rstart && im[i] < rend) { 308 row = im[i] - rstart; 309 for ( j=0; j<n; j++ ) { 310 /* If NumCol = 1 then a copy is not required */ 311 if ((roworiented) && (n == 1)) { 312 barray = v + i*bs2; 313 } else if((!roworiented) && (m == 1)) { 314 barray = v + j*bs2; 315 } else { /* Here a copy is required */ 316 if (roworiented) { 317 value = v + i*(stepval+bs)*bs + j*bs; 318 } else { 319 value = v + j*(stepval+bs)*bs + i*bs; 320 } 321 for ( ii=0; ii<bs; ii++,value+=stepval ) { 322 for (jj=0; jj<bs; jj++ ) { 323 *barray++ = *value++; 324 } 325 } 326 barray -=bs2; 327 } 328 329 if (in[j] >= cstart && in[j] < cend){ 330 col = in[j] - cstart; 331 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 332 } 333 #if defined(USE_PETSC_BOPT_g) 334 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 335 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 336 #endif 337 else { 338 if (mat->was_assembled) { 339 if (!baij->colmap) { 340 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 341 } 342 343 #if defined(USE_PETSC_BOPT_g) 344 #if defined (USE_CTABLE) 345 if( (TableFind( baij->colmap, in[j] + 1 ) - 1) % bs) 346 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 349 #endif 350 #endif 351 #if defined (USE_CTABLE) 352 col = (TableFind( baij->colmap, in[j] + 1 ) - 1)/bs; 353 #else 354 col = (baij->colmap[in[j]] - 1)/bs; 355 #endif 356 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 357 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 358 col = in[j]; 359 } 360 } 361 else col = in[j]; 362 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 363 } 364 } 365 } else { 366 if (!baij->donotstash) { 367 if (roworiented ) { 368 row = im[i]*bs; 369 value = v + i*(stepval+bs)*bs; 370 for ( j=0; j<bs; j++,row++ ) { 371 for ( k=0; k<n; k++ ) { 372 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 373 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 374 } 375 } 376 } 377 } else { 378 for ( j=0; j<n; j++ ) { 379 value = v + j*(stepval+bs)*bs + i*bs; 380 col = in[j]*bs; 381 for ( k=0; k<bs; k++,col++,value+=stepval) { 382 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 383 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 384 } 385 } 386 } 387 } 388 } 389 } 390 } 391 PetscFunctionReturn(0); 392 } 393 #define HASH_KEY 0.6180339887 394 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 395 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 396 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 397 #undef __FUNC__ 398 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 399 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 400 { 401 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 402 int ierr,i,j,row,col; 403 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 404 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 405 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 406 double tmp; 407 Scalar ** HD = baij->hd,value; 408 #if defined(USE_PETSC_BOPT_g) 409 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 410 #endif 411 412 PetscFunctionBegin; 413 414 for ( i=0; i<m; i++ ) { 415 #if defined(USE_PETSC_BOPT_g) 416 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 417 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 418 #endif 419 row = im[i]; 420 if (row >= rstart_orig && row < rend_orig) { 421 for ( j=0; j<n; j++ ) { 422 col = in[j]; 423 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 424 /* Look up into the Hash Table */ 425 key = (row/bs)*Nbs+(col/bs)+1; 426 h1 = HASH(size,key,tmp); 427 428 429 idx = h1; 430 #if defined(USE_PETSC_BOPT_g) 431 insert_ct++; 432 total_ct++; 433 if (HT[idx] != key) { 434 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 435 if (idx == size) { 436 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 437 if (idx == h1) { 438 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 439 } 440 } 441 } 442 #else 443 if (HT[idx] != key) { 444 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 445 if (idx == size) { 446 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 447 if (idx == h1) { 448 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 449 } 450 } 451 } 452 #endif 453 /* A HASH table entry is found, so insert the values at the correct address */ 454 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 455 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 456 } 457 } else { 458 if (roworiented && !baij->donotstash) { 459 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 460 } else { 461 if (!baij->donotstash) { 462 row = im[i]; 463 for ( j=0; j<n; j++ ) { 464 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 465 } 466 } 467 } 468 } 469 } 470 #if defined(USE_PETSC_BOPT_g) 471 baij->ht_total_ct = total_ct; 472 baij->ht_insert_ct = insert_ct; 473 #endif 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNC__ 478 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 479 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 480 { 481 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 482 int ierr,i,j,ii,jj,row,col,k,l; 483 int roworiented = baij->roworiented,rstart=baij->rstart ; 484 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 485 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 486 double tmp; 487 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 488 #if defined(USE_PETSC_BOPT_g) 489 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 490 #endif 491 492 PetscFunctionBegin; 493 494 if (roworiented) { 495 stepval = (n-1)*bs; 496 } else { 497 stepval = (m-1)*bs; 498 } 499 for ( i=0; i<m; i++ ) { 500 #if defined(USE_PETSC_BOPT_g) 501 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 502 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 503 #endif 504 row = im[i]; 505 v_t = v + i*bs2; 506 if (row >= rstart && row < rend) { 507 for ( j=0; j<n; j++ ) { 508 col = in[j]; 509 510 /* Look up into the Hash Table */ 511 key = row*Nbs+col+1; 512 h1 = HASH(size,key,tmp); 513 514 idx = h1; 515 #if defined(USE_PETSC_BOPT_g) 516 total_ct++; 517 insert_ct++; 518 if (HT[idx] != key) { 519 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 520 if (idx == size) { 521 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 522 if (idx == h1) { 523 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 524 } 525 } 526 } 527 #else 528 if (HT[idx] != key) { 529 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 530 if (idx == size) { 531 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 532 if (idx == h1) { 533 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 534 } 535 } 536 } 537 #endif 538 baij_a = HD[idx]; 539 if (roworiented) { 540 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 541 /* value = v + (i*(stepval+bs)+j)*bs; */ 542 value = v_t; 543 v_t += bs; 544 if (addv == ADD_VALUES) { 545 for ( ii=0; ii<bs; ii++,value+=stepval) { 546 for ( jj=ii; jj<bs2; jj+=bs ) { 547 baij_a[jj] += *value++; 548 } 549 } 550 } else { 551 for ( ii=0; ii<bs; ii++,value+=stepval) { 552 for ( jj=ii; jj<bs2; jj+=bs ) { 553 baij_a[jj] = *value++; 554 } 555 } 556 } 557 } else { 558 value = v + j*(stepval+bs)*bs + i*bs; 559 if (addv == ADD_VALUES) { 560 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 561 for ( jj=0; jj<bs; jj++ ) { 562 baij_a[jj] += *value++; 563 } 564 } 565 } else { 566 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 567 for ( jj=0; jj<bs; jj++ ) { 568 baij_a[jj] = *value++; 569 } 570 } 571 } 572 } 573 } 574 } else { 575 if (!baij->donotstash) { 576 if (roworiented ) { 577 row = im[i]*bs; 578 value = v + i*(stepval+bs)*bs; 579 for ( j=0; j<bs; j++,row++ ) { 580 for ( k=0; k<n; k++ ) { 581 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 582 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 583 } 584 } 585 } 586 } else { 587 for ( j=0; j<n; j++ ) { 588 value = v + j*(stepval+bs)*bs + i*bs; 589 col = in[j]*bs; 590 for ( k=0; k<bs; k++,col++,value+=stepval) { 591 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 592 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 593 } 594 } 595 } 596 } 597 } 598 } 599 } 600 #if defined(USE_PETSC_BOPT_g) 601 baij->ht_total_ct = total_ct; 602 baij->ht_insert_ct = insert_ct; 603 #endif 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNC__ 608 #define __FUNC__ "MatGetValues_MPIBAIJ" 609 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 610 { 611 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 612 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 613 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 614 615 PetscFunctionBegin; 616 for ( i=0; i<m; i++ ) { 617 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 618 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 619 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 620 row = idxm[i] - bsrstart; 621 for ( j=0; j<n; j++ ) { 622 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 623 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 624 if (idxn[j] >= bscstart && idxn[j] < bscend){ 625 col = idxn[j] - bscstart; 626 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 627 } else { 628 if (!baij->colmap) { 629 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 630 } 631 #if defined (USE_CTABLE) 632 data = TableFind( baij->colmap, idxn[j]/bs + 1 ) - 1; 633 #else 634 data = baij->colmap[idxn[j]/bs]-1; 635 #endif 636 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 637 else { 638 col = data + idxn[j]%bs; 639 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 640 } 641 } 642 } 643 } else { 644 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 645 } 646 } 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNC__ 651 #define __FUNC__ "MatNorm_MPIBAIJ" 652 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 655 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 656 int ierr, i,bs2=baij->bs2; 657 double sum = 0.0; 658 Scalar *v; 659 660 PetscFunctionBegin; 661 if (baij->size == 1) { 662 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 663 } else { 664 if (type == NORM_FROBENIUS) { 665 v = amat->a; 666 for (i=0; i<amat->nz*bs2; i++ ) { 667 #if defined(USE_PETSC_COMPLEX) 668 sum += PetscReal(PetscConj(*v)*(*v)); v++; 669 #else 670 sum += (*v)*(*v); v++; 671 #endif 672 } 673 v = bmat->a; 674 for (i=0; i<bmat->nz*bs2; i++ ) { 675 #if defined(USE_PETSC_COMPLEX) 676 sum += PetscReal(PetscConj(*v)*(*v)); v++; 677 #else 678 sum += (*v)*(*v); v++; 679 #endif 680 } 681 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 682 *norm = sqrt(*norm); 683 } else { 684 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 685 } 686 } 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNC__ 691 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 692 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 693 { 694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 695 MPI_Comm comm = mat->comm; 696 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 697 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 698 MPI_Request *send_waits,*recv_waits; 699 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 700 InsertMode addv; 701 Scalar *rvalues,*svalues; 702 703 PetscFunctionBegin; 704 if (baij->donotstash) { 705 baij->svalues = 0; baij->rvalues = 0; 706 baij->nsends = 0; baij->nrecvs = 0; 707 baij->send_waits = 0; baij->recv_waits = 0; 708 baij->rmax = 0; 709 PetscFunctionReturn(0); 710 } 711 712 /* make sure all processors are either in INSERTMODE or ADDMODE */ 713 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 714 if (addv == (ADD_VALUES|INSERT_VALUES)) { 715 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 716 } 717 mat->insertmode = addv; /* in case this processor had no cache */ 718 719 /* first count number of contributors to each processor */ 720 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 721 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 722 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 723 for ( i=0; i<baij->stash.n; i++ ) { 724 idx = baij->stash.idx[i]; 725 for ( j=0; j<size; j++ ) { 726 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 727 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 728 } 729 } 730 } 731 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 732 733 /* inform other processors of number of messages and max length*/ 734 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 735 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 736 nreceives = work[rank]; 737 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 738 nmax = work[rank]; 739 PetscFree(work); 740 741 /* post receives: 742 1) each message will consist of ordered pairs 743 (global index,value) we store the global index as a double 744 to simplify the message passing. 745 2) since we don't know how long each individual message is we 746 allocate the largest needed buffer for each receive. Potentially 747 this is a lot of wasted space. 748 749 750 This could be done better. 751 */ 752 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 753 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 754 for ( i=0; i<nreceives; i++ ) { 755 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 756 } 757 758 /* do sends: 759 1) starts[i] gives the starting index in svalues for stuff going to 760 the ith processor 761 */ 762 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 763 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 764 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 765 starts[0] = 0; 766 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 767 for ( i=0; i<baij->stash.n; i++ ) { 768 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 769 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 770 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 771 } 772 PetscFree(owner); 773 starts[0] = 0; 774 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 775 count = 0; 776 for ( i=0; i<size; i++ ) { 777 if (procs[i]) { 778 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 779 } 780 } 781 PetscFree(starts); PetscFree(nprocs); 782 783 /* Free cache space */ 784 PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 785 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 786 787 baij->svalues = svalues; baij->rvalues = rvalues; 788 baij->nsends = nsends; baij->nrecvs = nreceives; 789 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 790 baij->rmax = nmax; 791 792 PetscFunctionReturn(0); 793 } 794 795 /* 796 Creates the hash table, and sets the table 797 This table is created only once. 798 If new entried need to be added to the matrix 799 then the hash table has to be destroyed and 800 recreated. 801 */ 802 #undef __FUNC__ 803 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 804 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 805 { 806 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 807 Mat A = baij->A, B=baij->B; 808 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 809 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 810 int size,bs2=baij->bs2,rstart=baij->rstart; 811 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 812 int *HT,key; 813 Scalar **HD; 814 double tmp; 815 #if defined(USE_PETSC_BOPT_g) 816 int ct=0,max=0; 817 #endif 818 819 PetscFunctionBegin; 820 baij->ht_size=(int)(factor*nz); 821 size = baij->ht_size; 822 823 if (baij->ht) { 824 PetscFunctionReturn(0); 825 } 826 827 /* Allocate Memory for Hash Table */ 828 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 829 baij->ht = (int*)(baij->hd + size); 830 HD = baij->hd; 831 HT = baij->ht; 832 833 834 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 835 836 837 /* Loop Over A */ 838 for ( i=0; i<a->mbs; i++ ) { 839 for ( j=ai[i]; j<ai[i+1]; j++ ) { 840 row = i+rstart; 841 col = aj[j]+cstart; 842 843 key = row*Nbs + col + 1; 844 h1 = HASH(size,key,tmp); 845 for ( k=0; k<size; k++ ){ 846 if (HT[(h1+k)%size] == 0.0) { 847 HT[(h1+k)%size] = key; 848 HD[(h1+k)%size] = a->a + j*bs2; 849 break; 850 #if defined(USE_PETSC_BOPT_g) 851 } else { 852 ct++; 853 #endif 854 } 855 } 856 #if defined(USE_PETSC_BOPT_g) 857 if (k> max) max = k; 858 #endif 859 } 860 } 861 /* Loop Over B */ 862 for ( i=0; i<b->mbs; i++ ) { 863 for ( j=bi[i]; j<bi[i+1]; j++ ) { 864 row = i+rstart; 865 col = garray[bj[j]]; 866 key = row*Nbs + col + 1; 867 h1 = HASH(size,key,tmp); 868 for ( k=0; k<size; k++ ){ 869 if (HT[(h1+k)%size] == 0.0) { 870 HT[(h1+k)%size] = key; 871 HD[(h1+k)%size] = b->a + j*bs2; 872 break; 873 #if defined(USE_PETSC_BOPT_g) 874 } else { 875 ct++; 876 #endif 877 } 878 } 879 #if defined(USE_PETSC_BOPT_g) 880 if (k> max) max = k; 881 #endif 882 } 883 } 884 885 /* Print Summary */ 886 #if defined(USE_PETSC_BOPT_g) 887 for ( i=0,j=0; i<size; i++) 888 if (HT[i]) {j++;} 889 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 890 (j== 0)? 0.0:((double)(ct+j))/j,max); 891 #endif 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNC__ 896 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 897 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 898 { 899 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 900 MPI_Status *send_status,recv_status; 901 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 902 int bs=baij->bs,row,col,other_disassembled; 903 Scalar *values,val; 904 InsertMode addv = mat->insertmode; 905 906 PetscFunctionBegin; 907 /* wait on receives */ 908 while (count) { 909 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 910 /* unpack receives into our local space */ 911 values = baij->rvalues + 3*imdex*baij->rmax; 912 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 913 n = n/3; 914 for ( i=0; i<n; i++ ) { 915 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 916 col = (int) PetscReal(values[3*i+1]); 917 val = values[3*i+2]; 918 if (col >= baij->cstart*bs && col < baij->cend*bs) { 919 col -= baij->cstart*bs; 920 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 921 } else { 922 if (mat->was_assembled) { 923 if (!baij->colmap) { 924 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 925 } 926 #if defined (USE_CTABLE) 927 col = TableFind( baij->colmap, col/bs + 1 ) - 1 + col%bs; 928 #else 929 col = (baij->colmap[col/bs]) - 1 + col%bs; 930 #endif 931 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 932 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 933 col = (int) PetscReal(values[3*i+1]); 934 } 935 } 936 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 937 } 938 } 939 count--; 940 } 941 if (baij->recv_waits) PetscFree(baij->recv_waits); 942 if (baij->rvalues) PetscFree(baij->rvalues); 943 944 /* wait on sends */ 945 if (baij->nsends) { 946 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 947 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 948 PetscFree(send_status); 949 } 950 if (baij->send_waits) PetscFree(baij->send_waits); 951 if (baij->svalues) PetscFree(baij->svalues); 952 953 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 954 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 955 956 /* determine if any processor has disassembled, if so we must 957 also disassemble ourselfs, in order that we may reassemble. */ 958 /* 959 if nonzero structure of submatrix B cannot change then we know that 960 no processor disassembled thus we can skip this stuff 961 */ 962 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 963 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 964 if (mat->was_assembled && !other_disassembled) { 965 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 966 } 967 } 968 969 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 970 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 971 } 972 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 973 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 974 975 #if defined(USE_PETSC_BOPT_g) 976 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 977 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 978 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 979 baij->ht_total_ct = 0; 980 baij->ht_insert_ct = 0; 981 } 982 #endif 983 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 984 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 985 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 986 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 987 } 988 989 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNC__ 994 #define __FUNC__ "MatView_MPIBAIJ_Binary" 995 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 996 { 997 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 998 int ierr; 999 1000 PetscFunctionBegin; 1001 if (baij->size == 1) { 1002 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1003 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNC__ 1008 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 1009 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 1010 { 1011 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1012 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 1013 FILE *fd; 1014 ViewerType vtype; 1015 1016 PetscFunctionBegin; 1017 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1018 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 1019 ierr = ViewerGetFormat(viewer,&format); 1020 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1021 MatInfo info; 1022 MPI_Comm_rank(mat->comm,&rank); 1023 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1024 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 1025 PetscSequentialPhaseBegin(mat->comm,1); 1026 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1027 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1028 baij->bs,(int)info.memory); 1029 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 1030 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 1031 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 1032 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 1033 fflush(fd); 1034 PetscSequentialPhaseEnd(mat->comm,1); 1035 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1038 PetscPrintf(mat->comm," block size is %d\n",bs); 1039 PetscFunctionReturn(0); 1040 } 1041 } 1042 1043 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 1044 Draw draw; 1045 PetscTruth isnull; 1046 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 1047 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1048 } 1049 1050 if (size == 1) { 1051 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1052 } else { 1053 /* assemble the entire matrix onto first processor. */ 1054 Mat A; 1055 Mat_SeqBAIJ *Aloc; 1056 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 1057 int mbs = baij->mbs; 1058 Scalar *a; 1059 1060 if (!rank) { 1061 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1062 } else { 1063 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1064 } 1065 PLogObjectParent(mat,A); 1066 1067 /* copy over the A part */ 1068 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1069 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1070 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1071 1072 for ( i=0; i<mbs; i++ ) { 1073 rvals[0] = bs*(baij->rstart + i); 1074 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1075 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1076 col = (baij->cstart+aj[j])*bs; 1077 for (k=0; k<bs; k++ ) { 1078 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1079 col++; a += bs; 1080 } 1081 } 1082 } 1083 /* copy over the B part */ 1084 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1085 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1086 for ( i=0; i<mbs; i++ ) { 1087 rvals[0] = bs*(baij->rstart + i); 1088 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1089 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1090 col = baij->garray[aj[j]]*bs; 1091 for (k=0; k<bs; k++ ) { 1092 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1093 col++; a += bs; 1094 } 1095 } 1096 } 1097 PetscFree(rvals); 1098 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1099 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1100 /* 1101 Everyone has to call to draw the matrix since the graphics waits are 1102 synchronized across all processors that share the Draw object 1103 */ 1104 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1105 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1106 } 1107 ierr = MatDestroy(A); CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } 1111 1112 1113 1114 #undef __FUNC__ 1115 #define __FUNC__ "MatView_MPIBAIJ" 1116 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1117 { 1118 int ierr; 1119 ViewerType vtype; 1120 1121 PetscFunctionBegin; 1122 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1123 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1124 PetscTypeCompare(vtype,MATLAB_VIEWER)) { 1125 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1126 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1127 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1128 } else { 1129 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1130 } 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNC__ 1135 #define __FUNC__ "MatDestroy_MPIBAIJ" 1136 int MatDestroy_MPIBAIJ(Mat mat) 1137 { 1138 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1139 int ierr; 1140 1141 PetscFunctionBegin; 1142 if (--mat->refct > 0) PetscFunctionReturn(0); 1143 1144 if (mat->mapping) { 1145 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 1146 } 1147 if (mat->bmapping) { 1148 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 1149 } 1150 if (mat->rmap) { 1151 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1152 } 1153 if (mat->cmap) { 1154 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1155 } 1156 #if defined(USE_PETSC_LOG) 1157 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1158 #endif 1159 1160 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1161 PetscFree(baij->rowners); 1162 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1163 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1164 #if defined (USE_CTABLE) 1165 if (baij->colmap) TableDelete(baij->colmap); 1166 #else 1167 if (baij->colmap) PetscFree(baij->colmap); 1168 #endif 1169 if (baij->garray) PetscFree(baij->garray); 1170 if (baij->lvec) VecDestroy(baij->lvec); 1171 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1172 if (baij->rowvalues) PetscFree(baij->rowvalues); 1173 if (baij->barray) PetscFree(baij->barray); 1174 if (baij->hd) PetscFree(baij->hd); 1175 PetscFree(baij); 1176 PLogObjectDestroy(mat); 1177 PetscHeaderDestroy(mat); 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNC__ 1182 #define __FUNC__ "MatMult_MPIBAIJ" 1183 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1184 { 1185 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1186 int ierr, nt; 1187 1188 PetscFunctionBegin; 1189 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1190 if (nt != a->n) { 1191 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1192 } 1193 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1194 if (nt != a->m) { 1195 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1196 } 1197 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1198 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1199 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1200 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1201 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNC__ 1206 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1207 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1208 { 1209 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1210 int ierr; 1211 1212 PetscFunctionBegin; 1213 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1214 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1215 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1216 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNC__ 1221 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1222 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1223 { 1224 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1225 int ierr; 1226 1227 PetscFunctionBegin; 1228 /* do nondiagonal part */ 1229 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1230 /* send it on its way */ 1231 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1232 /* do local part */ 1233 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1234 /* receive remote parts: note this assumes the values are not actually */ 1235 /* inserted in yy until the next line, which is true for my implementation*/ 1236 /* but is not perhaps always true. */ 1237 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNC__ 1242 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1243 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1244 { 1245 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1246 int ierr; 1247 1248 PetscFunctionBegin; 1249 /* do nondiagonal part */ 1250 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1251 /* send it on its way */ 1252 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1253 /* do local part */ 1254 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1255 /* receive remote parts: note this assumes the values are not actually */ 1256 /* inserted in yy until the next line, which is true for my implementation*/ 1257 /* but is not perhaps always true. */ 1258 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 /* 1263 This only works correctly for square matrices where the subblock A->A is the 1264 diagonal block 1265 */ 1266 #undef __FUNC__ 1267 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1268 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1269 { 1270 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1271 int ierr; 1272 1273 PetscFunctionBegin; 1274 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1275 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNC__ 1280 #define __FUNC__ "MatScale_MPIBAIJ" 1281 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1282 { 1283 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1284 int ierr; 1285 1286 PetscFunctionBegin; 1287 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1288 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNC__ 1293 #define __FUNC__ "MatGetSize_MPIBAIJ" 1294 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1295 { 1296 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1297 1298 PetscFunctionBegin; 1299 if (m) *m = mat->M; 1300 if (n) *n = mat->N; 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNC__ 1305 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1306 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1307 { 1308 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1309 1310 PetscFunctionBegin; 1311 *m = mat->m; *n = mat->n; 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #undef __FUNC__ 1316 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1317 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1318 { 1319 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1320 1321 PetscFunctionBegin; 1322 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1323 PetscFunctionReturn(0); 1324 } 1325 1326 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1327 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1328 1329 #undef __FUNC__ 1330 #define __FUNC__ "MatGetRow_MPIBAIJ" 1331 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1332 { 1333 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1334 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1335 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1336 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1337 int *cmap, *idx_p,cstart = mat->cstart; 1338 1339 PetscFunctionBegin; 1340 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1341 mat->getrowactive = PETSC_TRUE; 1342 1343 if (!mat->rowvalues && (idx || v)) { 1344 /* 1345 allocate enough space to hold information from the longest row. 1346 */ 1347 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1348 int max = 1,mbs = mat->mbs,tmp; 1349 for ( i=0; i<mbs; i++ ) { 1350 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1351 if (max < tmp) { max = tmp; } 1352 } 1353 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1354 CHKPTRQ(mat->rowvalues); 1355 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1356 } 1357 1358 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1359 lrow = row - brstart; 1360 1361 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1362 if (!v) {pvA = 0; pvB = 0;} 1363 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1364 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1365 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1366 nztot = nzA + nzB; 1367 1368 cmap = mat->garray; 1369 if (v || idx) { 1370 if (nztot) { 1371 /* Sort by increasing column numbers, assuming A and B already sorted */ 1372 int imark = -1; 1373 if (v) { 1374 *v = v_p = mat->rowvalues; 1375 for ( i=0; i<nzB; i++ ) { 1376 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1377 else break; 1378 } 1379 imark = i; 1380 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1381 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1382 } 1383 if (idx) { 1384 *idx = idx_p = mat->rowindices; 1385 if (imark > -1) { 1386 for ( i=0; i<imark; i++ ) { 1387 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1388 } 1389 } else { 1390 for ( i=0; i<nzB; i++ ) { 1391 if (cmap[cworkB[i]/bs] < cstart) 1392 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1393 else break; 1394 } 1395 imark = i; 1396 } 1397 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1398 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1399 } 1400 } else { 1401 if (idx) *idx = 0; 1402 if (v) *v = 0; 1403 } 1404 } 1405 *nz = nztot; 1406 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1407 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNC__ 1412 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1413 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1414 { 1415 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1416 1417 PetscFunctionBegin; 1418 if (baij->getrowactive == PETSC_FALSE) { 1419 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1420 } 1421 baij->getrowactive = PETSC_FALSE; 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNC__ 1426 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1427 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1428 { 1429 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1430 1431 PetscFunctionBegin; 1432 *bs = baij->bs; 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNC__ 1437 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1438 int MatZeroEntries_MPIBAIJ(Mat A) 1439 { 1440 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1441 int ierr; 1442 1443 PetscFunctionBegin; 1444 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1445 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNC__ 1450 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1451 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1452 { 1453 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1454 Mat A = a->A, B = a->B; 1455 int ierr; 1456 double isend[5], irecv[5]; 1457 1458 PetscFunctionBegin; 1459 info->block_size = (double)a->bs; 1460 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1461 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1462 isend[3] = info->memory; isend[4] = info->mallocs; 1463 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1464 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1465 isend[3] += info->memory; isend[4] += info->mallocs; 1466 if (flag == MAT_LOCAL) { 1467 info->nz_used = isend[0]; 1468 info->nz_allocated = isend[1]; 1469 info->nz_unneeded = isend[2]; 1470 info->memory = isend[3]; 1471 info->mallocs = isend[4]; 1472 } else if (flag == MAT_GLOBAL_MAX) { 1473 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1474 info->nz_used = irecv[0]; 1475 info->nz_allocated = irecv[1]; 1476 info->nz_unneeded = irecv[2]; 1477 info->memory = irecv[3]; 1478 info->mallocs = irecv[4]; 1479 } else if (flag == MAT_GLOBAL_SUM) { 1480 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1481 info->nz_used = irecv[0]; 1482 info->nz_allocated = irecv[1]; 1483 info->nz_unneeded = irecv[2]; 1484 info->memory = irecv[3]; 1485 info->mallocs = irecv[4]; 1486 } 1487 info->rows_global = (double)a->M; 1488 info->columns_global = (double)a->N; 1489 info->rows_local = (double)a->m; 1490 info->columns_local = (double)a->N; 1491 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1492 info->fill_ratio_needed = 0; 1493 info->factor_mallocs = 0; 1494 PetscFunctionReturn(0); 1495 } 1496 1497 #undef __FUNC__ 1498 #define __FUNC__ "MatSetOption_MPIBAIJ" 1499 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1500 { 1501 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1502 1503 PetscFunctionBegin; 1504 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1505 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1506 op == MAT_COLUMNS_UNSORTED || 1507 op == MAT_COLUMNS_SORTED || 1508 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1509 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1510 MatSetOption(a->A,op); 1511 MatSetOption(a->B,op); 1512 } else if (op == MAT_ROW_ORIENTED) { 1513 a->roworiented = 1; 1514 MatSetOption(a->A,op); 1515 MatSetOption(a->B,op); 1516 } else if (op == MAT_ROWS_SORTED || 1517 op == MAT_ROWS_UNSORTED || 1518 op == MAT_SYMMETRIC || 1519 op == MAT_STRUCTURALLY_SYMMETRIC || 1520 op == MAT_YES_NEW_DIAGONALS || 1521 op == MAT_USE_HASH_TABLE) 1522 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1523 else if (op == MAT_COLUMN_ORIENTED) { 1524 a->roworiented = 0; 1525 MatSetOption(a->A,op); 1526 MatSetOption(a->B,op); 1527 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1528 a->donotstash = 1; 1529 } else if (op == MAT_NO_NEW_DIAGONALS) { 1530 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1531 } else if (op == MAT_USE_HASH_TABLE) { 1532 a->ht_flag = 1; 1533 } else { 1534 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1535 } 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNC__ 1540 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1541 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1542 { 1543 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1544 Mat_SeqBAIJ *Aloc; 1545 Mat B; 1546 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1547 int bs=baij->bs,mbs=baij->mbs; 1548 Scalar *a; 1549 1550 PetscFunctionBegin; 1551 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1552 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1553 CHKERRQ(ierr); 1554 1555 /* copy over the A part */ 1556 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1557 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1558 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1559 1560 for ( i=0; i<mbs; i++ ) { 1561 rvals[0] = bs*(baij->rstart + i); 1562 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1563 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1564 col = (baij->cstart+aj[j])*bs; 1565 for (k=0; k<bs; k++ ) { 1566 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1567 col++; a += bs; 1568 } 1569 } 1570 } 1571 /* copy over the B part */ 1572 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1573 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1574 for ( i=0; i<mbs; i++ ) { 1575 rvals[0] = bs*(baij->rstart + i); 1576 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1577 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1578 col = baij->garray[aj[j]]*bs; 1579 for (k=0; k<bs; k++ ) { 1580 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1581 col++; a += bs; 1582 } 1583 } 1584 } 1585 PetscFree(rvals); 1586 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1587 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1588 1589 if (matout != PETSC_NULL) { 1590 *matout = B; 1591 } else { 1592 PetscOps *Abops; 1593 MatOps Aops; 1594 1595 /* This isn't really an in-place transpose .... but free data structures from baij */ 1596 PetscFree(baij->rowners); 1597 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1598 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1599 if (baij->colmap) PetscFree(baij->colmap); 1600 if (baij->garray) PetscFree(baij->garray); 1601 if (baij->lvec) VecDestroy(baij->lvec); 1602 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1603 PetscFree(baij); 1604 1605 /* 1606 This is horrible, horrible code. We need to keep the 1607 A pointers for the bops and ops but copy everything 1608 else from C. 1609 */ 1610 Abops = A->bops; 1611 Aops = A->ops; 1612 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1613 A->bops = Abops; 1614 A->ops = Aops; 1615 1616 PetscHeaderDestroy(B); 1617 } 1618 PetscFunctionReturn(0); 1619 } 1620 1621 #undef __FUNC__ 1622 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1623 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1624 { 1625 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1626 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1627 int ierr,s1,s2,s3; 1628 1629 PetscFunctionBegin; 1630 if (ll) { 1631 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1632 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1633 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1634 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1635 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1636 } 1637 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1638 PetscFunctionReturn(0); 1639 } 1640 1641 #undef __FUNC__ 1642 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1643 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1644 { 1645 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1646 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1647 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1648 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1649 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1650 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1651 MPI_Comm comm = A->comm; 1652 MPI_Request *send_waits,*recv_waits; 1653 MPI_Status recv_status,*send_status; 1654 IS istmp; 1655 PetscTruth localdiag; 1656 1657 PetscFunctionBegin; 1658 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1659 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1660 1661 /* first count number of contributors to each processor */ 1662 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1663 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1664 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1665 for ( i=0; i<N; i++ ) { 1666 idx = rows[i]; 1667 found = 0; 1668 for ( j=0; j<size; j++ ) { 1669 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1670 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1671 } 1672 } 1673 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1674 } 1675 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1676 1677 /* inform other processors of number of messages and max length*/ 1678 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1679 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1680 nrecvs = work[rank]; 1681 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1682 nmax = work[rank]; 1683 PetscFree(work); 1684 1685 /* post receives: */ 1686 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1687 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1688 for ( i=0; i<nrecvs; i++ ) { 1689 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1690 } 1691 1692 /* do sends: 1693 1) starts[i] gives the starting index in svalues for stuff going to 1694 the ith processor 1695 */ 1696 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1697 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1698 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1699 starts[0] = 0; 1700 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1701 for ( i=0; i<N; i++ ) { 1702 svalues[starts[owner[i]]++] = rows[i]; 1703 } 1704 ISRestoreIndices(is,&rows); 1705 1706 starts[0] = 0; 1707 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1708 count = 0; 1709 for ( i=0; i<size; i++ ) { 1710 if (procs[i]) { 1711 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1712 } 1713 } 1714 PetscFree(starts); 1715 1716 base = owners[rank]*bs; 1717 1718 /* wait on receives */ 1719 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1720 source = lens + nrecvs; 1721 count = nrecvs; slen = 0; 1722 while (count) { 1723 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1724 /* unpack receives into our local space */ 1725 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1726 source[imdex] = recv_status.MPI_SOURCE; 1727 lens[imdex] = n; 1728 slen += n; 1729 count--; 1730 } 1731 PetscFree(recv_waits); 1732 1733 /* move the data into the send scatter */ 1734 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1735 count = 0; 1736 for ( i=0; i<nrecvs; i++ ) { 1737 values = rvalues + i*nmax; 1738 for ( j=0; j<lens[i]; j++ ) { 1739 lrows[count++] = values[j] - base; 1740 } 1741 } 1742 PetscFree(rvalues); PetscFree(lens); 1743 PetscFree(owner); PetscFree(nprocs); 1744 1745 /* actually zap the local rows */ 1746 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1747 PLogObjectParent(A,istmp); 1748 1749 /* 1750 Zero the required rows. If the "diagonal block" of the matrix 1751 is square and the user wishes to set the diagonal we use seperate 1752 code so that MatSetValues() is not called for each diagonal allocating 1753 new memory, thus calling lots of mallocs and slowing things down. 1754 1755 Contributed by: Mathew Knepley 1756 */ 1757 localdiag = PETSC_FALSE; 1758 if (diag && (l->A->M == l->A->N)) { 1759 localdiag = PETSC_TRUE; 1760 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1761 } else { 1762 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1763 } 1764 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1765 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1766 1767 if (diag && (localdiag == PETSC_FALSE)) { 1768 for ( i = 0; i < slen; i++ ) { 1769 row = lrows[i] + rstart_bs; 1770 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1771 } 1772 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1773 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1774 } 1775 PetscFree(lrows); 1776 1777 /* wait on sends */ 1778 if (nsends) { 1779 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1780 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1781 PetscFree(send_status); 1782 } 1783 PetscFree(send_waits); PetscFree(svalues); 1784 1785 PetscFunctionReturn(0); 1786 } 1787 1788 extern int MatPrintHelp_SeqBAIJ(Mat); 1789 #undef __FUNC__ 1790 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1791 int MatPrintHelp_MPIBAIJ(Mat A) 1792 { 1793 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1794 MPI_Comm comm = A->comm; 1795 static int called = 0; 1796 int ierr; 1797 1798 PetscFunctionBegin; 1799 if (!a->rank) { 1800 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1801 } 1802 if (called) {PetscFunctionReturn(0);} else called = 1; 1803 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1804 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNC__ 1809 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1810 int MatSetUnfactored_MPIBAIJ(Mat A) 1811 { 1812 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1813 int ierr; 1814 1815 PetscFunctionBegin; 1816 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1817 PetscFunctionReturn(0); 1818 } 1819 1820 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1821 1822 /* -------------------------------------------------------------------*/ 1823 static struct _MatOps MatOps_Values = { 1824 MatSetValues_MPIBAIJ, 1825 MatGetRow_MPIBAIJ, 1826 MatRestoreRow_MPIBAIJ, 1827 MatMult_MPIBAIJ, 1828 MatMultAdd_MPIBAIJ, 1829 MatMultTrans_MPIBAIJ, 1830 MatMultTransAdd_MPIBAIJ, 1831 0, 1832 0, 1833 0, 1834 0, 1835 0, 1836 0, 1837 0, 1838 MatTranspose_MPIBAIJ, 1839 MatGetInfo_MPIBAIJ, 1840 0, 1841 MatGetDiagonal_MPIBAIJ, 1842 MatDiagonalScale_MPIBAIJ, 1843 MatNorm_MPIBAIJ, 1844 MatAssemblyBegin_MPIBAIJ, 1845 MatAssemblyEnd_MPIBAIJ, 1846 0, 1847 MatSetOption_MPIBAIJ, 1848 MatZeroEntries_MPIBAIJ, 1849 MatZeroRows_MPIBAIJ, 1850 0, 1851 0, 1852 0, 1853 0, 1854 MatGetSize_MPIBAIJ, 1855 MatGetLocalSize_MPIBAIJ, 1856 MatGetOwnershipRange_MPIBAIJ, 1857 0, 1858 0, 1859 0, 1860 0, 1861 MatDuplicate_MPIBAIJ, 1862 0, 1863 0, 1864 0, 1865 0, 1866 0, 1867 MatGetSubMatrices_MPIBAIJ, 1868 MatIncreaseOverlap_MPIBAIJ, 1869 MatGetValues_MPIBAIJ, 1870 0, 1871 MatPrintHelp_MPIBAIJ, 1872 MatScale_MPIBAIJ, 1873 0, 1874 0, 1875 0, 1876 MatGetBlockSize_MPIBAIJ, 1877 0, 1878 0, 1879 0, 1880 0, 1881 0, 1882 0, 1883 MatSetUnfactored_MPIBAIJ, 1884 0, 1885 MatSetValuesBlocked_MPIBAIJ, 1886 0, 1887 0, 1888 0, 1889 MatGetMaps_Petsc}; 1890 1891 #include "pc.h" 1892 EXTERN_C_BEGIN 1893 extern int PCSetUp_BJacobi_BAIJ(PC); 1894 EXTERN_C_END 1895 1896 #undef __FUNC__ 1897 #define __FUNC__ "MatCreateMPIBAIJ" 1898 /*@C 1899 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1900 (block compressed row). For good matrix assembly performance 1901 the user should preallocate the matrix storage by setting the parameters 1902 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1903 performance can be increased by more than a factor of 50. 1904 1905 Collective on MPI_Comm 1906 1907 Input Parameters: 1908 + comm - MPI communicator 1909 . bs - size of blockk 1910 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1911 This value should be the same as the local size used in creating the 1912 y vector for the matrix-vector product y = Ax. 1913 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1914 This value should be the same as the local size used in creating the 1915 x vector for the matrix-vector product y = Ax. 1916 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1917 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1918 . d_nz - number of block nonzeros per block row in diagonal portion of local 1919 submatrix (same for all local rows) 1920 . d_nzz - array containing the number of block nonzeros in the various block rows 1921 of the in diagonal portion of the local (possibly different for each block 1922 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1923 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1924 submatrix (same for all local rows). 1925 - o_nzz - array containing the number of nonzeros in the various block rows of the 1926 off-diagonal portion of the local submatrix (possibly different for 1927 each block row) or PETSC_NULL. 1928 1929 Output Parameter: 1930 . A - the matrix 1931 1932 Options Database Keys: 1933 . -mat_no_unroll - uses code that does not unroll the loops in the 1934 block calculations (much slower) 1935 . -mat_block_size - size of the blocks to use 1936 . -mat_mpi - use the parallel matrix data structures even on one processor 1937 (defaults to using SeqBAIJ format on one processor) 1938 1939 Notes: 1940 The user MUST specify either the local or global matrix dimensions 1941 (possibly both). 1942 1943 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1944 than it must be used on all processors that share the object for that argument. 1945 1946 Storage Information: 1947 For a square global matrix we define each processor's diagonal portion 1948 to be its local rows and the corresponding columns (a square submatrix); 1949 each processor's off-diagonal portion encompasses the remainder of the 1950 local matrix (a rectangular submatrix). 1951 1952 The user can specify preallocated storage for the diagonal part of 1953 the local submatrix with either d_nz or d_nnz (not both). Set 1954 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1955 memory allocation. Likewise, specify preallocated storage for the 1956 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1957 1958 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1959 the figure below we depict these three local rows and all columns (0-11). 1960 1961 .vb 1962 0 1 2 3 4 5 6 7 8 9 10 11 1963 ------------------- 1964 row 3 | o o o d d d o o o o o o 1965 row 4 | o o o d d d o o o o o o 1966 row 5 | o o o d d d o o o o o o 1967 ------------------- 1968 .ve 1969 1970 Thus, any entries in the d locations are stored in the d (diagonal) 1971 submatrix, and any entries in the o locations are stored in the 1972 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1973 stored simply in the MATSEQBAIJ format for compressed row storage. 1974 1975 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1976 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1977 In general, for PDE problems in which most nonzeros are near the diagonal, 1978 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1979 or you will get TERRIBLE performance; see the users' manual chapter on 1980 matrices. 1981 1982 .keywords: matrix, block, aij, compressed row, sparse, parallel 1983 1984 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1985 @*/ 1986 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1987 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1988 { 1989 Mat B; 1990 Mat_MPIBAIJ *b; 1991 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1992 int flag1 = 0,flag2 = 0; 1993 1994 PetscFunctionBegin; 1995 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1996 1997 MPI_Comm_size(comm,&size); 1998 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr); 1999 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 2000 if (!flag1 && !flag2 && size == 1) { 2001 if (M == PETSC_DECIDE) M = m; 2002 if (N == PETSC_DECIDE) N = n; 2003 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 2004 PetscFunctionReturn(0); 2005 } 2006 2007 *A = 0; 2008 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2009 PLogObjectCreate(B); 2010 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 2011 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 2012 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 2013 2014 B->ops->destroy = MatDestroy_MPIBAIJ; 2015 B->ops->view = MatView_MPIBAIJ; 2016 B->mapping = 0; 2017 B->factor = 0; 2018 B->assembled = PETSC_FALSE; 2019 2020 B->insertmode = NOT_SET_VALUES; 2021 MPI_Comm_rank(comm,&b->rank); 2022 MPI_Comm_size(comm,&b->size); 2023 2024 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2025 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2026 } 2027 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2028 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2029 } 2030 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2031 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2032 } 2033 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2034 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2035 2036 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2037 work[0] = m; work[1] = n; 2038 mbs = m/bs; nbs = n/bs; 2039 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2040 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2041 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2042 } 2043 if (m == PETSC_DECIDE) { 2044 Mbs = M/bs; 2045 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2046 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2047 m = mbs*bs; 2048 } 2049 if (n == PETSC_DECIDE) { 2050 Nbs = N/bs; 2051 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2052 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2053 n = nbs*bs; 2054 } 2055 if (mbs*bs != m || nbs*bs != n) { 2056 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2057 } 2058 2059 b->m = m; B->m = m; 2060 b->n = n; B->n = n; 2061 b->N = N; B->N = N; 2062 b->M = M; B->M = M; 2063 b->bs = bs; 2064 b->bs2 = bs*bs; 2065 b->mbs = mbs; 2066 b->nbs = nbs; 2067 b->Mbs = Mbs; 2068 b->Nbs = Nbs; 2069 2070 /* the information in the maps duplicates the information computed below, eventually 2071 we should remove the duplicate information that is not contained in the maps */ 2072 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2073 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2074 2075 /* build local table of row and column ownerships */ 2076 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 2077 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2078 b->cowners = b->rowners + b->size + 2; 2079 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2080 b->rowners[0] = 0; 2081 for ( i=2; i<=b->size; i++ ) { 2082 b->rowners[i] += b->rowners[i-1]; 2083 } 2084 b->rstart = b->rowners[b->rank]; 2085 b->rend = b->rowners[b->rank+1]; 2086 b->rstart_bs = b->rstart * bs; 2087 b->rend_bs = b->rend * bs; 2088 2089 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2090 b->cowners[0] = 0; 2091 for ( i=2; i<=b->size; i++ ) { 2092 b->cowners[i] += b->cowners[i-1]; 2093 } 2094 b->cstart = b->cowners[b->rank]; 2095 b->cend = b->cowners[b->rank+1]; 2096 b->cstart_bs = b->cstart * bs; 2097 b->cend_bs = b->cend * bs; 2098 2099 2100 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2101 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 2102 PLogObjectParent(B,b->A); 2103 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2104 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 2105 PLogObjectParent(B,b->B); 2106 2107 /* build cache for off array entries formed */ 2108 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 2109 b->donotstash = 0; 2110 b->colmap = 0; 2111 b->garray = 0; 2112 b->roworiented = 1; 2113 2114 /* stuff used in block assembly */ 2115 b->barray = 0; 2116 2117 /* stuff used for matrix vector multiply */ 2118 b->lvec = 0; 2119 b->Mvctx = 0; 2120 2121 /* stuff for MatGetRow() */ 2122 b->rowindices = 0; 2123 b->rowvalues = 0; 2124 b->getrowactive = PETSC_FALSE; 2125 2126 /* hash table stuff */ 2127 b->ht = 0; 2128 b->hd = 0; 2129 b->ht_size = 0; 2130 b->ht_flag = 0; 2131 b->ht_fact = 0; 2132 b->ht_total_ct = 0; 2133 b->ht_insert_ct = 0; 2134 2135 *A = B; 2136 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2137 if (flg) { 2138 double fact = 1.39; 2139 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2140 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2141 if (fact <= 1.0) fact = 1.39; 2142 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2143 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2144 } 2145 ierr = PetscObjectComposeFunction((PetscObject)B,"PCSetUp_BJacobi_C", 2146 "PCSetUp_BJacobi_BAIJ", 2147 (void*)PCSetUp_BJacobi_BAIJ);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNC__ 2152 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2153 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2154 { 2155 Mat mat; 2156 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2157 int ierr, len=0, flg; 2158 2159 PetscFunctionBegin; 2160 *newmat = 0; 2161 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2162 PLogObjectCreate(mat); 2163 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2164 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2165 mat->ops->destroy = MatDestroy_MPIBAIJ; 2166 mat->ops->view = MatView_MPIBAIJ; 2167 mat->factor = matin->factor; 2168 mat->assembled = PETSC_TRUE; 2169 2170 a->m = mat->m = oldmat->m; 2171 a->n = mat->n = oldmat->n; 2172 a->M = mat->M = oldmat->M; 2173 a->N = mat->N = oldmat->N; 2174 2175 a->bs = oldmat->bs; 2176 a->bs2 = oldmat->bs2; 2177 a->mbs = oldmat->mbs; 2178 a->nbs = oldmat->nbs; 2179 a->Mbs = oldmat->Mbs; 2180 a->Nbs = oldmat->Nbs; 2181 2182 a->rstart = oldmat->rstart; 2183 a->rend = oldmat->rend; 2184 a->cstart = oldmat->cstart; 2185 a->cend = oldmat->cend; 2186 a->size = oldmat->size; 2187 a->rank = oldmat->rank; 2188 mat->insertmode = NOT_SET_VALUES; 2189 a->rowvalues = 0; 2190 a->getrowactive = PETSC_FALSE; 2191 a->barray = 0; 2192 2193 /* hash table stuff */ 2194 a->ht = 0; 2195 a->hd = 0; 2196 a->ht_size = 0; 2197 a->ht_flag = oldmat->ht_flag; 2198 a->ht_fact = oldmat->ht_fact; 2199 a->ht_total_ct = 0; 2200 a->ht_insert_ct = 0; 2201 2202 2203 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2204 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2205 a->cowners = a->rowners + a->size + 2; 2206 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2207 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 2208 if (oldmat->colmap) { 2209 #if defined (USE_CTABLE) 2210 ierr = TableCreateCopy( &a->colmap,oldmat->colmap ); CHKERRQ(ierr); 2211 #else 2212 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2213 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2214 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2215 #endif 2216 } else a->colmap = 0; 2217 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2218 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2219 PLogObjectMemory(mat,len*sizeof(int)); 2220 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2221 } else a->garray = 0; 2222 2223 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2224 PLogObjectParent(mat,a->lvec); 2225 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2226 2227 PLogObjectParent(mat,a->Mvctx); 2228 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2229 PLogObjectParent(mat,a->A); 2230 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2231 PLogObjectParent(mat,a->B); 2232 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2233 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2234 if (flg) { 2235 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2236 } 2237 *newmat = mat; 2238 PetscFunctionReturn(0); 2239 } 2240 2241 #include "sys.h" 2242 2243 #undef __FUNC__ 2244 #define __FUNC__ "MatLoad_MPIBAIJ" 2245 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2246 { 2247 Mat A; 2248 int i, nz, ierr, j,rstart, rend, fd; 2249 Scalar *vals,*buf; 2250 MPI_Comm comm = ((PetscObject)viewer)->comm; 2251 MPI_Status status; 2252 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2253 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2254 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2255 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2256 int dcount,kmax,k,nzcount,tmp; 2257 2258 PetscFunctionBegin; 2259 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2260 2261 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2262 if (!rank) { 2263 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2264 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2265 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2266 if (header[3] < 0) { 2267 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2268 } 2269 } 2270 2271 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2272 M = header[1]; N = header[2]; 2273 2274 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2275 2276 /* 2277 This code adds extra rows to make sure the number of rows is 2278 divisible by the blocksize 2279 */ 2280 Mbs = M/bs; 2281 extra_rows = bs - M + bs*(Mbs); 2282 if (extra_rows == bs) extra_rows = 0; 2283 else Mbs++; 2284 if (extra_rows &&!rank) { 2285 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2286 } 2287 2288 /* determine ownership of all rows */ 2289 mbs = Mbs/size + ((Mbs % size) > rank); 2290 m = mbs * bs; 2291 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2292 browners = rowners + size + 1; 2293 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2294 rowners[0] = 0; 2295 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2296 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2297 rstart = rowners[rank]; 2298 rend = rowners[rank+1]; 2299 2300 /* distribute row lengths to all processors */ 2301 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2302 if (!rank) { 2303 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2304 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2305 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2306 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2307 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2308 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2309 PetscFree(sndcounts); 2310 } else { 2311 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2312 } 2313 2314 if (!rank) { 2315 /* calculate the number of nonzeros on each processor */ 2316 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2317 PetscMemzero(procsnz,size*sizeof(int)); 2318 for ( i=0; i<size; i++ ) { 2319 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2320 procsnz[i] += rowlengths[j]; 2321 } 2322 } 2323 PetscFree(rowlengths); 2324 2325 /* determine max buffer needed and allocate it */ 2326 maxnz = 0; 2327 for ( i=0; i<size; i++ ) { 2328 maxnz = PetscMax(maxnz,procsnz[i]); 2329 } 2330 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2331 2332 /* read in my part of the matrix column indices */ 2333 nz = procsnz[0]; 2334 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2335 mycols = ibuf; 2336 if (size == 1) nz -= extra_rows; 2337 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2338 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2339 2340 /* read in every ones (except the last) and ship off */ 2341 for ( i=1; i<size-1; i++ ) { 2342 nz = procsnz[i]; 2343 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2344 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2345 } 2346 /* read in the stuff for the last proc */ 2347 if ( size != 1 ) { 2348 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2349 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2350 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2351 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2352 } 2353 PetscFree(cols); 2354 } else { 2355 /* determine buffer space needed for message */ 2356 nz = 0; 2357 for ( i=0; i<m; i++ ) { 2358 nz += locrowlens[i]; 2359 } 2360 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2361 mycols = ibuf; 2362 /* receive message of column indices*/ 2363 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2364 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2365 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2366 } 2367 2368 /* loop over local rows, determining number of off diagonal entries */ 2369 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2370 odlens = dlens + (rend-rstart); 2371 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2372 PetscMemzero(mask,3*Mbs*sizeof(int)); 2373 masked1 = mask + Mbs; 2374 masked2 = masked1 + Mbs; 2375 rowcount = 0; nzcount = 0; 2376 for ( i=0; i<mbs; i++ ) { 2377 dcount = 0; 2378 odcount = 0; 2379 for ( j=0; j<bs; j++ ) { 2380 kmax = locrowlens[rowcount]; 2381 for ( k=0; k<kmax; k++ ) { 2382 tmp = mycols[nzcount++]/bs; 2383 if (!mask[tmp]) { 2384 mask[tmp] = 1; 2385 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2386 else masked1[dcount++] = tmp; 2387 } 2388 } 2389 rowcount++; 2390 } 2391 2392 dlens[i] = dcount; 2393 odlens[i] = odcount; 2394 2395 /* zero out the mask elements we set */ 2396 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2397 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2398 } 2399 2400 /* create our matrix */ 2401 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2402 CHKERRQ(ierr); 2403 A = *newmat; 2404 MatSetOption(A,MAT_COLUMNS_SORTED); 2405 2406 if (!rank) { 2407 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2408 /* read in my part of the matrix numerical values */ 2409 nz = procsnz[0]; 2410 vals = buf; 2411 mycols = ibuf; 2412 if (size == 1) nz -= extra_rows; 2413 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2414 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2415 2416 /* insert into matrix */ 2417 jj = rstart*bs; 2418 for ( i=0; i<m; i++ ) { 2419 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2420 mycols += locrowlens[i]; 2421 vals += locrowlens[i]; 2422 jj++; 2423 } 2424 /* read in other processors (except the last one) and ship out */ 2425 for ( i=1; i<size-1; i++ ) { 2426 nz = procsnz[i]; 2427 vals = buf; 2428 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2429 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2430 } 2431 /* the last proc */ 2432 if ( size != 1 ){ 2433 nz = procsnz[i] - extra_rows; 2434 vals = buf; 2435 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2436 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2437 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2438 } 2439 PetscFree(procsnz); 2440 } else { 2441 /* receive numeric values */ 2442 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2443 2444 /* receive message of values*/ 2445 vals = buf; 2446 mycols = ibuf; 2447 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2448 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2449 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2450 2451 /* insert into matrix */ 2452 jj = rstart*bs; 2453 for ( i=0; i<m; i++ ) { 2454 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2455 mycols += locrowlens[i]; 2456 vals += locrowlens[i]; 2457 jj++; 2458 } 2459 } 2460 PetscFree(locrowlens); 2461 PetscFree(buf); 2462 PetscFree(ibuf); 2463 PetscFree(rowners); 2464 PetscFree(dlens); 2465 PetscFree(mask); 2466 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2467 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 2472 2473 #undef __FUNC__ 2474 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2475 /*@ 2476 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2477 2478 Input Parameters: 2479 . mat - the matrix 2480 . fact - factor 2481 2482 Collective on Mat 2483 2484 Notes: 2485 This can also be set by the command line option: -mat_use_hash_table fact 2486 2487 .keywords: matrix, hashtable, factor, HT 2488 2489 .seealso: MatSetOption() 2490 @*/ 2491 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2492 { 2493 Mat_MPIBAIJ *baij; 2494 2495 PetscFunctionBegin; 2496 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2497 if (mat->type != MATMPIBAIJ) { 2498 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2499 } 2500 baij = (Mat_MPIBAIJ*) mat->data; 2501 baij->ht_fact = fact; 2502 PetscFunctionReturn(0); 2503 } 2504