1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.141 1999/01/08 18:15:39 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. This is done in a non scable way since the 18 length of colmap equals the global matrix length. 19 */ 20 #undef __FUNC__ 21 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 22 static int CreateColmap_MPIBAIJ_Private(Mat mat) 23 { 24 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 25 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 26 int nbs = B->nbs,i,bs=B->bs,ierr; 27 28 PetscFunctionBegin; 29 #if defined (USE_CTABLE) 30 ierr = TableCreate( &baij->colmap, baij->nbs/5 ); CHKERRQ(ierr); 31 for ( i=0; i<nbs; i++ ){ 32 ierr = TableAdd( baij->colmap, baij->garray[i] + 1, i*bs+1 );CHKERRQ(ierr); 33 } 34 #else 35 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 36 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 37 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 38 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 10 44 45 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 brow = row/bs; \ 49 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 50 rmax = aimax[brow]; nrow = ailen[brow]; \ 51 bcol = col/bs; \ 52 ridx = row % bs; cidx = col % bs; \ 53 low = 0; high = nrow; \ 54 while (high-low > 3) { \ 55 t = (low+high)/2; \ 56 if (rp[t] > bcol) high = t; \ 57 else low = t; \ 58 } \ 59 for ( _i=low; _i<high; _i++ ) { \ 60 if (rp[_i] > bcol) break; \ 61 if (rp[_i] == bcol) { \ 62 bap = ap + bs2*_i + bs*cidx + ridx; \ 63 if (addv == ADD_VALUES) *bap += value; \ 64 else *bap = value; \ 65 goto a_noinsert; \ 66 } \ 67 } \ 68 if (a->nonew == 1) goto a_noinsert; \ 69 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 70 if (nrow >= rmax) { \ 71 /* there is no extra room in row, therefore enlarge */ \ 72 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 73 Scalar *new_a; \ 74 \ 75 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 76 \ 77 /* malloc new storage space */ \ 78 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 79 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 80 new_j = (int *) (new_a + bs2*new_nz); \ 81 new_i = new_j + new_nz; \ 82 \ 83 /* copy over old data into new slots */ \ 84 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 85 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 86 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 87 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 88 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 89 len*sizeof(int)); \ 90 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 91 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 92 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 93 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 94 /* free up old matrix storage */ \ 95 PetscFree(a->a); \ 96 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = 1; \ 99 \ 100 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 101 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 102 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 103 a->maxnz += bs2*CHUNKSIZE; \ 104 a->reallocs++; \ 105 a->nz++; \ 106 } \ 107 N = nrow++ - 1; \ 108 /* shift up all the later entries in this row */ \ 109 for ( ii=N; ii>=_i; ii-- ) { \ 110 rp[ii+1] = rp[ii]; \ 111 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 112 } \ 113 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 114 rp[_i] = bcol; \ 115 ap[bs2*_i + bs*cidx + ridx] = value; \ 116 a_noinsert:; \ 117 ailen[brow] = nrow; \ 118 } 119 120 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 121 { \ 122 \ 123 brow = row/bs; \ 124 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 125 rmax = bimax[brow]; nrow = bilen[brow]; \ 126 bcol = col/bs; \ 127 ridx = row % bs; cidx = col % bs; \ 128 low = 0; high = nrow; \ 129 while (high-low > 3) { \ 130 t = (low+high)/2; \ 131 if (rp[t] > bcol) high = t; \ 132 else low = t; \ 133 } \ 134 for ( _i=low; _i<high; _i++ ) { \ 135 if (rp[_i] > bcol) break; \ 136 if (rp[_i] == bcol) { \ 137 bap = ap + bs2*_i + bs*cidx + ridx; \ 138 if (addv == ADD_VALUES) *bap += value; \ 139 else *bap = value; \ 140 goto b_noinsert; \ 141 } \ 142 } \ 143 if (b->nonew == 1) goto b_noinsert; \ 144 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 145 if (nrow >= rmax) { \ 146 /* there is no extra room in row, therefore enlarge */ \ 147 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 148 Scalar *new_a; \ 149 \ 150 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 151 \ 152 /* malloc new storage space */ \ 153 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 154 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 155 new_j = (int *) (new_a + bs2*new_nz); \ 156 new_i = new_j + new_nz; \ 157 \ 158 /* copy over old data into new slots */ \ 159 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 160 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 161 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 162 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 163 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 164 len*sizeof(int)); \ 165 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 166 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 167 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 168 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 169 /* free up old matrix storage */ \ 170 PetscFree(b->a); \ 171 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = 1; \ 174 \ 175 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 176 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 177 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 178 b->maxnz += bs2*CHUNKSIZE; \ 179 b->reallocs++; \ 180 b->nz++; \ 181 } \ 182 N = nrow++ - 1; \ 183 /* shift up all the later entries in this row */ \ 184 for ( ii=N; ii>=_i; ii-- ) { \ 185 rp[ii+1] = rp[ii]; \ 186 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 187 } \ 188 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 189 rp[_i] = bcol; \ 190 ap[bs2*_i + bs*cidx + ridx] = value; \ 191 b_noinsert:; \ 192 bilen[brow] = nrow; \ 193 } 194 195 #undef __FUNC__ 196 #define __FUNC__ "MatSetValues_MPIBAIJ" 197 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 198 { 199 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 200 Scalar value; 201 int ierr,i,j,row,col; 202 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 203 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 204 int cend_orig=baij->cend_bs,bs=baij->bs; 205 206 /* Some Variables required in the macro */ 207 Mat A = baij->A; 208 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 209 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 210 Scalar *aa=a->a; 211 212 Mat B = baij->B; 213 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 214 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 215 Scalar *ba=b->a; 216 217 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 218 int low,high,t,ridx,cidx,bs2=a->bs2; 219 Scalar *ap,*bap; 220 221 PetscFunctionBegin; 222 for ( i=0; i<m; i++ ) { 223 #if defined(USE_PETSC_BOPT_g) 224 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 225 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 226 #endif 227 if (im[i] >= rstart_orig && im[i] < rend_orig) { 228 row = im[i] - rstart_orig; 229 for ( j=0; j<n; j++ ) { 230 if (in[j] >= cstart_orig && in[j] < cend_orig){ 231 col = in[j] - cstart_orig; 232 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 233 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 234 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 235 } 236 #if defined(USE_PETSC_BOPT_g) 237 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 238 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 239 #endif 240 else { 241 if (mat->was_assembled) { 242 if (!baij->colmap) { 243 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 244 } 245 #if defined (USE_CTABLE) 246 col = TableFind( baij->colmap, in[j]/bs + 1 ) - 1 + in[j]%bs; 247 #else 248 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 249 #endif 250 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 251 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 252 col = in[j]; 253 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 254 B = baij->B; 255 b = (Mat_SeqBAIJ *) (B)->data; 256 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 257 ba=b->a; 258 } 259 } else col = in[j]; 260 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 261 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } 264 } 265 } else { 266 if (roworiented && !baij->donotstash) { 267 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 268 } else { 269 if (!baij->donotstash) { 270 row = im[i]; 271 for ( j=0; j<n; j++ ) { 272 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 273 } 274 } 275 } 276 } 277 } 278 PetscFunctionReturn(0); 279 } 280 281 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 282 #undef __FUNC__ 283 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 284 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 285 { 286 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 287 Scalar *value,*barray=baij->barray; 288 int ierr,i,j,ii,jj,row,col,k,l; 289 int roworiented = baij->roworiented,rstart=baij->rstart ; 290 int rend=baij->rend,cstart=baij->cstart,stepval; 291 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 292 293 if(!barray) { 294 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 295 } 296 297 if (roworiented) { 298 stepval = (n-1)*bs; 299 } else { 300 stepval = (m-1)*bs; 301 } 302 for ( i=0; i<m; i++ ) { 303 #if defined(USE_PETSC_BOPT_g) 304 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 305 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 306 #endif 307 if (im[i] >= rstart && im[i] < rend) { 308 row = im[i] - rstart; 309 for ( j=0; j<n; j++ ) { 310 /* If NumCol = 1 then a copy is not required */ 311 if ((roworiented) && (n == 1)) { 312 barray = v + i*bs2; 313 } else if((!roworiented) && (m == 1)) { 314 barray = v + j*bs2; 315 } else { /* Here a copy is required */ 316 if (roworiented) { 317 value = v + i*(stepval+bs)*bs + j*bs; 318 } else { 319 value = v + j*(stepval+bs)*bs + i*bs; 320 } 321 for ( ii=0; ii<bs; ii++,value+=stepval ) { 322 for (jj=0; jj<bs; jj++ ) { 323 *barray++ = *value++; 324 } 325 } 326 barray -=bs2; 327 } 328 329 if (in[j] >= cstart && in[j] < cend){ 330 col = in[j] - cstart; 331 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 332 } 333 #if defined(USE_PETSC_BOPT_g) 334 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 335 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 336 #endif 337 else { 338 if (mat->was_assembled) { 339 if (!baij->colmap) { 340 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 341 } 342 343 #if defined(USE_PETSC_BOPT_g) 344 #if defined (USE_CTABLE) 345 if( (TableFind( baij->colmap, in[j] + 1 ) - 1) % bs) 346 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 349 #endif 350 #endif 351 #if defined (USE_CTABLE) 352 col = (TableFind( baij->colmap, in[j] + 1 ) - 1)/bs; 353 #else 354 col = (baij->colmap[in[j]] - 1)/bs; 355 #endif 356 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 357 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 358 col = in[j]; 359 } 360 } 361 else col = in[j]; 362 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 363 } 364 } 365 } else { 366 if (!baij->donotstash) { 367 if (roworiented ) { 368 row = im[i]*bs; 369 value = v + i*(stepval+bs)*bs; 370 for ( j=0; j<bs; j++,row++ ) { 371 for ( k=0; k<n; k++ ) { 372 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 373 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 374 } 375 } 376 } 377 } else { 378 for ( j=0; j<n; j++ ) { 379 value = v + j*(stepval+bs)*bs + i*bs; 380 col = in[j]*bs; 381 for ( k=0; k<bs; k++,col++,value+=stepval) { 382 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 383 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 384 } 385 } 386 } 387 } 388 } 389 } 390 } 391 PetscFunctionReturn(0); 392 } 393 #define HASH_KEY 0.6180339887 394 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 395 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 396 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 397 #undef __FUNC__ 398 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 399 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 400 { 401 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 402 int ierr,i,j,row,col; 403 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 404 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 405 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 406 double tmp; 407 Scalar ** HD = baij->hd,value; 408 #if defined(USE_PETSC_BOPT_g) 409 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 410 #endif 411 412 PetscFunctionBegin; 413 414 for ( i=0; i<m; i++ ) { 415 #if defined(USE_PETSC_BOPT_g) 416 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 417 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 418 #endif 419 row = im[i]; 420 if (row >= rstart_orig && row < rend_orig) { 421 for ( j=0; j<n; j++ ) { 422 col = in[j]; 423 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 424 /* Look up into the Hash Table */ 425 key = (row/bs)*Nbs+(col/bs)+1; 426 h1 = HASH(size,key,tmp); 427 428 429 idx = h1; 430 #if defined(USE_PETSC_BOPT_g) 431 insert_ct++; 432 total_ct++; 433 if (HT[idx] != key) { 434 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 435 if (idx == size) { 436 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 437 if (idx == h1) { 438 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 439 } 440 } 441 } 442 #else 443 if (HT[idx] != key) { 444 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 445 if (idx == size) { 446 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 447 if (idx == h1) { 448 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 449 } 450 } 451 } 452 #endif 453 /* A HASH table entry is found, so insert the values at the correct address */ 454 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 455 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 456 } 457 } else { 458 if (roworiented && !baij->donotstash) { 459 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 460 } else { 461 if (!baij->donotstash) { 462 row = im[i]; 463 for ( j=0; j<n; j++ ) { 464 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 465 } 466 } 467 } 468 } 469 } 470 #if defined(USE_PETSC_BOPT_g) 471 baij->ht_total_ct = total_ct; 472 baij->ht_insert_ct = insert_ct; 473 #endif 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNC__ 478 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 479 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 480 { 481 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 482 int ierr,i,j,ii,jj,row,col,k,l; 483 int roworiented = baij->roworiented,rstart=baij->rstart ; 484 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 485 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 486 double tmp; 487 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 488 #if defined(USE_PETSC_BOPT_g) 489 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 490 #endif 491 492 PetscFunctionBegin; 493 494 if (roworiented) { 495 stepval = (n-1)*bs; 496 } else { 497 stepval = (m-1)*bs; 498 } 499 for ( i=0; i<m; i++ ) { 500 #if defined(USE_PETSC_BOPT_g) 501 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 502 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 503 #endif 504 row = im[i]; 505 v_t = v + i*bs2; 506 if (row >= rstart && row < rend) { 507 for ( j=0; j<n; j++ ) { 508 col = in[j]; 509 510 /* Look up into the Hash Table */ 511 key = row*Nbs+col+1; 512 h1 = HASH(size,key,tmp); 513 514 idx = h1; 515 #if defined(USE_PETSC_BOPT_g) 516 total_ct++; 517 insert_ct++; 518 if (HT[idx] != key) { 519 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 520 if (idx == size) { 521 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 522 if (idx == h1) { 523 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 524 } 525 } 526 } 527 #else 528 if (HT[idx] != key) { 529 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 530 if (idx == size) { 531 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 532 if (idx == h1) { 533 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 534 } 535 } 536 } 537 #endif 538 baij_a = HD[idx]; 539 if (roworiented) { 540 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 541 /* value = v + (i*(stepval+bs)+j)*bs; */ 542 value = v_t; 543 v_t += bs; 544 if (addv == ADD_VALUES) { 545 for ( ii=0; ii<bs; ii++,value+=stepval) { 546 for ( jj=ii; jj<bs2; jj+=bs ) { 547 baij_a[jj] += *value++; 548 } 549 } 550 } else { 551 for ( ii=0; ii<bs; ii++,value+=stepval) { 552 for ( jj=ii; jj<bs2; jj+=bs ) { 553 baij_a[jj] = *value++; 554 } 555 } 556 } 557 } else { 558 value = v + j*(stepval+bs)*bs + i*bs; 559 if (addv == ADD_VALUES) { 560 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 561 for ( jj=0; jj<bs; jj++ ) { 562 baij_a[jj] += *value++; 563 } 564 } 565 } else { 566 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 567 for ( jj=0; jj<bs; jj++ ) { 568 baij_a[jj] = *value++; 569 } 570 } 571 } 572 } 573 } 574 } else { 575 if (!baij->donotstash) { 576 if (roworiented ) { 577 row = im[i]*bs; 578 value = v + i*(stepval+bs)*bs; 579 for ( j=0; j<bs; j++,row++ ) { 580 for ( k=0; k<n; k++ ) { 581 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 582 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 583 } 584 } 585 } 586 } else { 587 for ( j=0; j<n; j++ ) { 588 value = v + j*(stepval+bs)*bs + i*bs; 589 col = in[j]*bs; 590 for ( k=0; k<bs; k++,col++,value+=stepval) { 591 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 592 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 593 } 594 } 595 } 596 } 597 } 598 } 599 } 600 #if defined(USE_PETSC_BOPT_g) 601 baij->ht_total_ct = total_ct; 602 baij->ht_insert_ct = insert_ct; 603 #endif 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNC__ 608 #define __FUNC__ "MatGetValues_MPIBAIJ" 609 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 610 { 611 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 612 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 613 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 614 615 PetscFunctionBegin; 616 for ( i=0; i<m; i++ ) { 617 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 618 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 619 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 620 row = idxm[i] - bsrstart; 621 for ( j=0; j<n; j++ ) { 622 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 623 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 624 if (idxn[j] >= bscstart && idxn[j] < bscend){ 625 col = idxn[j] - bscstart; 626 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 627 } else { 628 if (!baij->colmap) { 629 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 630 } 631 #if defined (USE_CTABLE) 632 data = TableFind( baij->colmap, idxn[j]/bs + 1 ) - 1; 633 #else 634 data = baij->colmap[idxn[j]/bs]-1; 635 #endif 636 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 637 else { 638 col = data + idxn[j]%bs; 639 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 640 } 641 } 642 } 643 } else { 644 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 645 } 646 } 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNC__ 651 #define __FUNC__ "MatNorm_MPIBAIJ" 652 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 655 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 656 int ierr, i,bs2=baij->bs2; 657 double sum = 0.0; 658 Scalar *v; 659 660 PetscFunctionBegin; 661 if (baij->size == 1) { 662 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 663 } else { 664 if (type == NORM_FROBENIUS) { 665 v = amat->a; 666 for (i=0; i<amat->nz*bs2; i++ ) { 667 #if defined(USE_PETSC_COMPLEX) 668 sum += PetscReal(PetscConj(*v)*(*v)); v++; 669 #else 670 sum += (*v)*(*v); v++; 671 #endif 672 } 673 v = bmat->a; 674 for (i=0; i<bmat->nz*bs2; i++ ) { 675 #if defined(USE_PETSC_COMPLEX) 676 sum += PetscReal(PetscConj(*v)*(*v)); v++; 677 #else 678 sum += (*v)*(*v); v++; 679 #endif 680 } 681 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 682 *norm = sqrt(*norm); 683 } else { 684 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 685 } 686 } 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNC__ 691 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 692 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 693 { 694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 695 MPI_Comm comm = mat->comm; 696 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 697 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 698 MPI_Request *send_waits,*recv_waits; 699 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 700 InsertMode addv; 701 Scalar *rvalues,*svalues; 702 703 PetscFunctionBegin; 704 if (baij->donotstash) { 705 baij->svalues = 0; baij->rvalues = 0; 706 baij->nsends = 0; baij->nrecvs = 0; 707 baij->send_waits = 0; baij->recv_waits = 0; 708 baij->rmax = 0; 709 PetscFunctionReturn(0); 710 } 711 712 /* make sure all processors are either in INSERTMODE or ADDMODE */ 713 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 714 if (addv == (ADD_VALUES|INSERT_VALUES)) { 715 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 716 } 717 mat->insertmode = addv; /* in case this processor had no cache */ 718 719 /* first count number of contributors to each processor */ 720 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 721 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 722 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 723 for ( i=0; i<baij->stash.n; i++ ) { 724 idx = baij->stash.idx[i]; 725 for ( j=0; j<size; j++ ) { 726 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 727 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 728 } 729 } 730 } 731 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 732 733 /* inform other processors of number of messages and max length*/ 734 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 735 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 736 nreceives = work[rank]; 737 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 738 nmax = work[rank]; 739 PetscFree(work); 740 741 /* post receives: 742 1) each message will consist of ordered pairs 743 (global index,value) we store the global index as a double 744 to simplify the message passing. 745 2) since we don't know how long each individual message is we 746 allocate the largest needed buffer for each receive. Potentially 747 this is a lot of wasted space. 748 749 750 This could be done better. 751 */ 752 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 753 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 754 for ( i=0; i<nreceives; i++ ) { 755 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 756 } 757 758 /* do sends: 759 1) starts[i] gives the starting index in svalues for stuff going to 760 the ith processor 761 */ 762 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 763 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 764 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 765 starts[0] = 0; 766 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 767 for ( i=0; i<baij->stash.n; i++ ) { 768 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 769 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 770 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 771 } 772 PetscFree(owner); 773 starts[0] = 0; 774 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 775 count = 0; 776 for ( i=0; i<size; i++ ) { 777 if (procs[i]) { 778 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 779 } 780 } 781 PetscFree(starts); PetscFree(nprocs); 782 783 /* Free cache space */ 784 PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 785 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 786 787 baij->svalues = svalues; baij->rvalues = rvalues; 788 baij->nsends = nsends; baij->nrecvs = nreceives; 789 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 790 baij->rmax = nmax; 791 792 PetscFunctionReturn(0); 793 } 794 795 /* 796 Creates the hash table, and sets the table 797 This table is created only once. 798 If new entried need to be added to the matrix 799 then the hash table has to be destroyed and 800 recreated. 801 */ 802 #undef __FUNC__ 803 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 804 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 805 { 806 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 807 Mat A = baij->A, B=baij->B; 808 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 809 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 810 int size,bs2=baij->bs2,rstart=baij->rstart; 811 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 812 int *HT,key; 813 Scalar **HD; 814 double tmp; 815 #if defined(USE_PETSC_BOPT_g) 816 int ct=0,max=0; 817 #endif 818 819 PetscFunctionBegin; 820 baij->ht_size=(int)(factor*nz); 821 size = baij->ht_size; 822 823 if (baij->ht) { 824 PetscFunctionReturn(0); 825 } 826 827 /* Allocate Memory for Hash Table */ 828 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 829 baij->ht = (int*)(baij->hd + size); 830 HD = baij->hd; 831 HT = baij->ht; 832 833 834 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 835 836 837 /* Loop Over A */ 838 for ( i=0; i<a->mbs; i++ ) { 839 for ( j=ai[i]; j<ai[i+1]; j++ ) { 840 row = i+rstart; 841 col = aj[j]+cstart; 842 843 key = row*Nbs + col + 1; 844 h1 = HASH(size,key,tmp); 845 for ( k=0; k<size; k++ ){ 846 if (HT[(h1+k)%size] == 0.0) { 847 HT[(h1+k)%size] = key; 848 HD[(h1+k)%size] = a->a + j*bs2; 849 break; 850 #if defined(USE_PETSC_BOPT_g) 851 } else { 852 ct++; 853 #endif 854 } 855 } 856 #if defined(USE_PETSC_BOPT_g) 857 if (k> max) max = k; 858 #endif 859 } 860 } 861 /* Loop Over B */ 862 for ( i=0; i<b->mbs; i++ ) { 863 for ( j=bi[i]; j<bi[i+1]; j++ ) { 864 row = i+rstart; 865 col = garray[bj[j]]; 866 key = row*Nbs + col + 1; 867 h1 = HASH(size,key,tmp); 868 for ( k=0; k<size; k++ ){ 869 if (HT[(h1+k)%size] == 0.0) { 870 HT[(h1+k)%size] = key; 871 HD[(h1+k)%size] = b->a + j*bs2; 872 break; 873 #if defined(USE_PETSC_BOPT_g) 874 } else { 875 ct++; 876 #endif 877 } 878 } 879 #if defined(USE_PETSC_BOPT_g) 880 if (k> max) max = k; 881 #endif 882 } 883 } 884 885 /* Print Summary */ 886 #if defined(USE_PETSC_BOPT_g) 887 for ( i=0,j=0; i<size; i++) 888 if (HT[i]) {j++;} 889 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 890 (j== 0)? 0.0:((double)(ct+j))/j,max); 891 #endif 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNC__ 896 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 897 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 898 { 899 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 900 MPI_Status *send_status,recv_status; 901 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 902 int bs=baij->bs,row,col,other_disassembled; 903 Scalar *values,val; 904 InsertMode addv = mat->insertmode; 905 906 PetscFunctionBegin; 907 /* wait on receives */ 908 while (count) { 909 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 910 /* unpack receives into our local space */ 911 values = baij->rvalues + 3*imdex*baij->rmax; 912 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 913 n = n/3; 914 for ( i=0; i<n; i++ ) { 915 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 916 col = (int) PetscReal(values[3*i+1]); 917 val = values[3*i+2]; 918 if (col >= baij->cstart*bs && col < baij->cend*bs) { 919 col -= baij->cstart*bs; 920 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 921 } else { 922 if (mat->was_assembled) { 923 if (!baij->colmap) { 924 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 925 } 926 #if defined (USE_CTABLE) 927 col = TableFind( baij->colmap, col/bs + 1 ) - 1 + col%bs; 928 #else 929 col = (baij->colmap[col/bs]) - 1 + col%bs; 930 #endif 931 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 932 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 933 col = (int) PetscReal(values[3*i+1]); 934 } 935 } 936 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 937 } 938 } 939 count--; 940 } 941 if (baij->recv_waits) PetscFree(baij->recv_waits); 942 if (baij->rvalues) PetscFree(baij->rvalues); 943 944 /* wait on sends */ 945 if (baij->nsends) { 946 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 947 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 948 PetscFree(send_status); 949 } 950 if (baij->send_waits) PetscFree(baij->send_waits); 951 if (baij->svalues) PetscFree(baij->svalues); 952 953 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 954 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 955 956 /* determine if any processor has disassembled, if so we must 957 also disassemble ourselfs, in order that we may reassemble. */ 958 /* 959 if nonzero structure of submatrix B cannot change then we know that 960 no processor disassembled thus we can skip this stuff 961 */ 962 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 963 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 964 if (mat->was_assembled && !other_disassembled) { 965 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 966 } 967 } 968 969 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 970 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 971 } 972 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 973 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 974 975 #if defined(USE_PETSC_BOPT_g) 976 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 977 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 978 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 979 baij->ht_total_ct = 0; 980 baij->ht_insert_ct = 0; 981 } 982 #endif 983 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 984 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 985 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 986 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 987 } 988 989 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNC__ 994 #define __FUNC__ "MatView_MPIBAIJ_Binary" 995 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 996 { 997 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 998 int ierr; 999 1000 PetscFunctionBegin; 1001 if (baij->size == 1) { 1002 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1003 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNC__ 1008 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 1009 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 1010 { 1011 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1012 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 1013 FILE *fd; 1014 ViewerType vtype; 1015 1016 PetscFunctionBegin; 1017 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1018 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 1019 ierr = ViewerGetFormat(viewer,&format); 1020 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1021 MatInfo info; 1022 MPI_Comm_rank(mat->comm,&rank); 1023 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1024 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 1025 PetscSequentialPhaseBegin(mat->comm,1); 1026 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1027 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1028 baij->bs,(int)info.memory); 1029 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 1030 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 1031 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 1032 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 1033 fflush(fd); 1034 PetscSequentialPhaseEnd(mat->comm,1); 1035 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1038 PetscPrintf(mat->comm," block size is %d\n",bs); 1039 PetscFunctionReturn(0); 1040 } 1041 } 1042 1043 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 1044 Draw draw; 1045 PetscTruth isnull; 1046 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 1047 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1048 } 1049 1050 if (size == 1) { 1051 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1052 } else { 1053 /* assemble the entire matrix onto first processor. */ 1054 Mat A; 1055 Mat_SeqBAIJ *Aloc; 1056 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 1057 int mbs = baij->mbs; 1058 Scalar *a; 1059 1060 if (!rank) { 1061 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1062 } else { 1063 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1064 } 1065 PLogObjectParent(mat,A); 1066 1067 /* copy over the A part */ 1068 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1069 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1070 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1071 1072 for ( i=0; i<mbs; i++ ) { 1073 rvals[0] = bs*(baij->rstart + i); 1074 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1075 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1076 col = (baij->cstart+aj[j])*bs; 1077 for (k=0; k<bs; k++ ) { 1078 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1079 col++; a += bs; 1080 } 1081 } 1082 } 1083 /* copy over the B part */ 1084 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1085 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1086 for ( i=0; i<mbs; i++ ) { 1087 rvals[0] = bs*(baij->rstart + i); 1088 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1089 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1090 col = baij->garray[aj[j]]*bs; 1091 for (k=0; k<bs; k++ ) { 1092 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1093 col++; a += bs; 1094 } 1095 } 1096 } 1097 PetscFree(rvals); 1098 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1099 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1100 /* 1101 Everyone has to call to draw the matrix since the graphics waits are 1102 synchronized across all processors that share the Draw object 1103 */ 1104 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1105 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1106 } 1107 ierr = MatDestroy(A); CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } 1111 1112 1113 1114 #undef __FUNC__ 1115 #define __FUNC__ "MatView_MPIBAIJ" 1116 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1117 { 1118 int ierr; 1119 ViewerType vtype; 1120 1121 PetscFunctionBegin; 1122 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1123 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1124 PetscTypeCompare(vtype,MATLAB_VIEWER)) { 1125 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1126 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1127 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1128 } else { 1129 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1130 } 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNC__ 1135 #define __FUNC__ "MatDestroy_MPIBAIJ" 1136 int MatDestroy_MPIBAIJ(Mat mat) 1137 { 1138 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1139 int ierr; 1140 1141 PetscFunctionBegin; 1142 if (--mat->refct > 0) PetscFunctionReturn(0); 1143 1144 if (mat->mapping) { 1145 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 1146 } 1147 if (mat->bmapping) { 1148 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 1149 } 1150 if (mat->rmap) { 1151 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1152 } 1153 if (mat->cmap) { 1154 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1155 } 1156 #if defined(USE_PETSC_LOG) 1157 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1158 #endif 1159 1160 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1161 PetscFree(baij->rowners); 1162 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1163 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1164 #if defined (USE_CTABLE) 1165 if (baij->colmap) TableDelete(baij->colmap); 1166 #else 1167 if (baij->colmap) PetscFree(baij->colmap); 1168 #endif 1169 if (baij->garray) PetscFree(baij->garray); 1170 if (baij->lvec) VecDestroy(baij->lvec); 1171 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1172 if (baij->rowvalues) PetscFree(baij->rowvalues); 1173 if (baij->barray) PetscFree(baij->barray); 1174 if (baij->hd) PetscFree(baij->hd); 1175 PetscFree(baij); 1176 PLogObjectDestroy(mat); 1177 PetscHeaderDestroy(mat); 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNC__ 1182 #define __FUNC__ "MatMult_MPIBAIJ" 1183 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1184 { 1185 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1186 int ierr, nt; 1187 1188 PetscFunctionBegin; 1189 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1190 if (nt != a->n) { 1191 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1192 } 1193 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1194 if (nt != a->m) { 1195 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1196 } 1197 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1198 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1199 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1200 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1201 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNC__ 1206 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1207 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1208 { 1209 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1210 int ierr; 1211 1212 PetscFunctionBegin; 1213 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1214 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1215 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1216 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNC__ 1221 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1222 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1223 { 1224 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1225 int ierr; 1226 1227 PetscFunctionBegin; 1228 /* do nondiagonal part */ 1229 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1230 /* send it on its way */ 1231 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1232 /* do local part */ 1233 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1234 /* receive remote parts: note this assumes the values are not actually */ 1235 /* inserted in yy until the next line, which is true for my implementation*/ 1236 /* but is not perhaps always true. */ 1237 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNC__ 1242 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1243 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1244 { 1245 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1246 int ierr; 1247 1248 PetscFunctionBegin; 1249 /* do nondiagonal part */ 1250 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1251 /* send it on its way */ 1252 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1253 /* do local part */ 1254 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1255 /* receive remote parts: note this assumes the values are not actually */ 1256 /* inserted in yy until the next line, which is true for my implementation*/ 1257 /* but is not perhaps always true. */ 1258 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 /* 1263 This only works correctly for square matrices where the subblock A->A is the 1264 diagonal block 1265 */ 1266 #undef __FUNC__ 1267 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1268 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1269 { 1270 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1271 int ierr; 1272 1273 PetscFunctionBegin; 1274 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1275 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNC__ 1280 #define __FUNC__ "MatScale_MPIBAIJ" 1281 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1282 { 1283 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1284 int ierr; 1285 1286 PetscFunctionBegin; 1287 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1288 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNC__ 1293 #define __FUNC__ "MatGetSize_MPIBAIJ" 1294 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1295 { 1296 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1297 1298 PetscFunctionBegin; 1299 if (m) *m = mat->M; 1300 if (n) *n = mat->N; 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNC__ 1305 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1306 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1307 { 1308 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1309 1310 PetscFunctionBegin; 1311 *m = mat->m; *n = mat->n; 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #undef __FUNC__ 1316 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1317 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1318 { 1319 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1320 1321 PetscFunctionBegin; 1322 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1323 PetscFunctionReturn(0); 1324 } 1325 1326 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1327 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1328 1329 #undef __FUNC__ 1330 #define __FUNC__ "MatGetRow_MPIBAIJ" 1331 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1332 { 1333 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1334 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1335 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1336 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1337 int *cmap, *idx_p,cstart = mat->cstart; 1338 1339 PetscFunctionBegin; 1340 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1341 mat->getrowactive = PETSC_TRUE; 1342 1343 if (!mat->rowvalues && (idx || v)) { 1344 /* 1345 allocate enough space to hold information from the longest row. 1346 */ 1347 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1348 int max = 1,mbs = mat->mbs,tmp; 1349 for ( i=0; i<mbs; i++ ) { 1350 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1351 if (max < tmp) { max = tmp; } 1352 } 1353 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1354 CHKPTRQ(mat->rowvalues); 1355 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1356 } 1357 1358 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1359 lrow = row - brstart; 1360 1361 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1362 if (!v) {pvA = 0; pvB = 0;} 1363 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1364 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1365 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1366 nztot = nzA + nzB; 1367 1368 cmap = mat->garray; 1369 if (v || idx) { 1370 if (nztot) { 1371 /* Sort by increasing column numbers, assuming A and B already sorted */ 1372 int imark = -1; 1373 if (v) { 1374 *v = v_p = mat->rowvalues; 1375 for ( i=0; i<nzB; i++ ) { 1376 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1377 else break; 1378 } 1379 imark = i; 1380 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1381 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1382 } 1383 if (idx) { 1384 *idx = idx_p = mat->rowindices; 1385 if (imark > -1) { 1386 for ( i=0; i<imark; i++ ) { 1387 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1388 } 1389 } else { 1390 for ( i=0; i<nzB; i++ ) { 1391 if (cmap[cworkB[i]/bs] < cstart) 1392 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1393 else break; 1394 } 1395 imark = i; 1396 } 1397 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1398 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1399 } 1400 } else { 1401 if (idx) *idx = 0; 1402 if (v) *v = 0; 1403 } 1404 } 1405 *nz = nztot; 1406 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1407 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNC__ 1412 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1413 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1414 { 1415 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1416 1417 PetscFunctionBegin; 1418 if (baij->getrowactive == PETSC_FALSE) { 1419 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1420 } 1421 baij->getrowactive = PETSC_FALSE; 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNC__ 1426 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1427 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1428 { 1429 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1430 1431 PetscFunctionBegin; 1432 *bs = baij->bs; 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNC__ 1437 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1438 int MatZeroEntries_MPIBAIJ(Mat A) 1439 { 1440 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1441 int ierr; 1442 1443 PetscFunctionBegin; 1444 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1445 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNC__ 1450 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1451 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1452 { 1453 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1454 Mat A = a->A, B = a->B; 1455 int ierr; 1456 double isend[5], irecv[5]; 1457 1458 PetscFunctionBegin; 1459 info->block_size = (double)a->bs; 1460 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1461 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1462 isend[3] = info->memory; isend[4] = info->mallocs; 1463 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1464 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1465 isend[3] += info->memory; isend[4] += info->mallocs; 1466 if (flag == MAT_LOCAL) { 1467 info->nz_used = isend[0]; 1468 info->nz_allocated = isend[1]; 1469 info->nz_unneeded = isend[2]; 1470 info->memory = isend[3]; 1471 info->mallocs = isend[4]; 1472 } else if (flag == MAT_GLOBAL_MAX) { 1473 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1474 info->nz_used = irecv[0]; 1475 info->nz_allocated = irecv[1]; 1476 info->nz_unneeded = irecv[2]; 1477 info->memory = irecv[3]; 1478 info->mallocs = irecv[4]; 1479 } else if (flag == MAT_GLOBAL_SUM) { 1480 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1481 info->nz_used = irecv[0]; 1482 info->nz_allocated = irecv[1]; 1483 info->nz_unneeded = irecv[2]; 1484 info->memory = irecv[3]; 1485 info->mallocs = irecv[4]; 1486 } 1487 info->rows_global = (double)a->M; 1488 info->columns_global = (double)a->N; 1489 info->rows_local = (double)a->m; 1490 info->columns_local = (double)a->N; 1491 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1492 info->fill_ratio_needed = 0; 1493 info->factor_mallocs = 0; 1494 PetscFunctionReturn(0); 1495 } 1496 1497 #undef __FUNC__ 1498 #define __FUNC__ "MatSetOption_MPIBAIJ" 1499 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1500 { 1501 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1502 1503 PetscFunctionBegin; 1504 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1505 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1506 op == MAT_COLUMNS_UNSORTED || 1507 op == MAT_COLUMNS_SORTED || 1508 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1509 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1510 MatSetOption(a->A,op); 1511 MatSetOption(a->B,op); 1512 } else if (op == MAT_ROW_ORIENTED) { 1513 a->roworiented = 1; 1514 MatSetOption(a->A,op); 1515 MatSetOption(a->B,op); 1516 } else if (op == MAT_ROWS_SORTED || 1517 op == MAT_ROWS_UNSORTED || 1518 op == MAT_SYMMETRIC || 1519 op == MAT_STRUCTURALLY_SYMMETRIC || 1520 op == MAT_YES_NEW_DIAGONALS || 1521 op == MAT_USE_HASH_TABLE) 1522 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1523 else if (op == MAT_COLUMN_ORIENTED) { 1524 a->roworiented = 0; 1525 MatSetOption(a->A,op); 1526 MatSetOption(a->B,op); 1527 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1528 a->donotstash = 1; 1529 } else if (op == MAT_NO_NEW_DIAGONALS) { 1530 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1531 } else if (op == MAT_USE_HASH_TABLE) { 1532 a->ht_flag = 1; 1533 } else { 1534 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1535 } 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNC__ 1540 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1541 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1542 { 1543 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1544 Mat_SeqBAIJ *Aloc; 1545 Mat B; 1546 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1547 int bs=baij->bs,mbs=baij->mbs; 1548 Scalar *a; 1549 1550 PetscFunctionBegin; 1551 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1552 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1553 CHKERRQ(ierr); 1554 1555 /* copy over the A part */ 1556 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1557 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1558 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1559 1560 for ( i=0; i<mbs; i++ ) { 1561 rvals[0] = bs*(baij->rstart + i); 1562 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1563 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1564 col = (baij->cstart+aj[j])*bs; 1565 for (k=0; k<bs; k++ ) { 1566 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1567 col++; a += bs; 1568 } 1569 } 1570 } 1571 /* copy over the B part */ 1572 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1573 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1574 for ( i=0; i<mbs; i++ ) { 1575 rvals[0] = bs*(baij->rstart + i); 1576 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1577 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1578 col = baij->garray[aj[j]]*bs; 1579 for (k=0; k<bs; k++ ) { 1580 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1581 col++; a += bs; 1582 } 1583 } 1584 } 1585 PetscFree(rvals); 1586 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1587 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1588 1589 if (matout != PETSC_NULL) { 1590 *matout = B; 1591 } else { 1592 PetscOps *Abops; 1593 MatOps Aops; 1594 1595 /* This isn't really an in-place transpose .... but free data structures from baij */ 1596 PetscFree(baij->rowners); 1597 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1598 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1599 #if defined (USE_CTABLE) 1600 if (baij->colmap) TableDelete(baij->colmap); 1601 #else 1602 if (baij->colmap) PetscFree(baij->colmap); 1603 #endif 1604 if (baij->garray) PetscFree(baij->garray); 1605 if (baij->lvec) VecDestroy(baij->lvec); 1606 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1607 PetscFree(baij); 1608 1609 /* 1610 This is horrible, horrible code. We need to keep the 1611 A pointers for the bops and ops but copy everything 1612 else from C. 1613 */ 1614 Abops = A->bops; 1615 Aops = A->ops; 1616 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1617 A->bops = Abops; 1618 A->ops = Aops; 1619 1620 PetscHeaderDestroy(B); 1621 } 1622 PetscFunctionReturn(0); 1623 } 1624 1625 #undef __FUNC__ 1626 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1627 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1628 { 1629 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1630 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1631 int ierr,s1,s2,s3; 1632 1633 PetscFunctionBegin; 1634 if (ll) { 1635 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1636 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1637 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1638 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1639 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1640 } 1641 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNC__ 1646 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1647 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1648 { 1649 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1650 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1651 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1652 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1653 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1654 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1655 MPI_Comm comm = A->comm; 1656 MPI_Request *send_waits,*recv_waits; 1657 MPI_Status recv_status,*send_status; 1658 IS istmp; 1659 PetscTruth localdiag; 1660 1661 PetscFunctionBegin; 1662 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1663 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1664 1665 /* first count number of contributors to each processor */ 1666 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1667 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1668 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1669 for ( i=0; i<N; i++ ) { 1670 idx = rows[i]; 1671 found = 0; 1672 for ( j=0; j<size; j++ ) { 1673 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1674 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1675 } 1676 } 1677 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1678 } 1679 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1680 1681 /* inform other processors of number of messages and max length*/ 1682 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1683 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1684 nrecvs = work[rank]; 1685 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1686 nmax = work[rank]; 1687 PetscFree(work); 1688 1689 /* post receives: */ 1690 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1691 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1692 for ( i=0; i<nrecvs; i++ ) { 1693 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1694 } 1695 1696 /* do sends: 1697 1) starts[i] gives the starting index in svalues for stuff going to 1698 the ith processor 1699 */ 1700 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1701 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1702 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1703 starts[0] = 0; 1704 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1705 for ( i=0; i<N; i++ ) { 1706 svalues[starts[owner[i]]++] = rows[i]; 1707 } 1708 ISRestoreIndices(is,&rows); 1709 1710 starts[0] = 0; 1711 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1712 count = 0; 1713 for ( i=0; i<size; i++ ) { 1714 if (procs[i]) { 1715 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1716 } 1717 } 1718 PetscFree(starts); 1719 1720 base = owners[rank]*bs; 1721 1722 /* wait on receives */ 1723 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1724 source = lens + nrecvs; 1725 count = nrecvs; slen = 0; 1726 while (count) { 1727 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1728 /* unpack receives into our local space */ 1729 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1730 source[imdex] = recv_status.MPI_SOURCE; 1731 lens[imdex] = n; 1732 slen += n; 1733 count--; 1734 } 1735 PetscFree(recv_waits); 1736 1737 /* move the data into the send scatter */ 1738 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1739 count = 0; 1740 for ( i=0; i<nrecvs; i++ ) { 1741 values = rvalues + i*nmax; 1742 for ( j=0; j<lens[i]; j++ ) { 1743 lrows[count++] = values[j] - base; 1744 } 1745 } 1746 PetscFree(rvalues); PetscFree(lens); 1747 PetscFree(owner); PetscFree(nprocs); 1748 1749 /* actually zap the local rows */ 1750 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1751 PLogObjectParent(A,istmp); 1752 1753 /* 1754 Zero the required rows. If the "diagonal block" of the matrix 1755 is square and the user wishes to set the diagonal we use seperate 1756 code so that MatSetValues() is not called for each diagonal allocating 1757 new memory, thus calling lots of mallocs and slowing things down. 1758 1759 Contributed by: Mathew Knepley 1760 */ 1761 localdiag = PETSC_FALSE; 1762 if (diag && (l->A->M == l->A->N)) { 1763 localdiag = PETSC_TRUE; 1764 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1765 } else { 1766 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1767 } 1768 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1769 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1770 1771 if (diag && (localdiag == PETSC_FALSE)) { 1772 for ( i = 0; i < slen; i++ ) { 1773 row = lrows[i] + rstart_bs; 1774 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1775 } 1776 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1777 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1778 } 1779 PetscFree(lrows); 1780 1781 /* wait on sends */ 1782 if (nsends) { 1783 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1784 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1785 PetscFree(send_status); 1786 } 1787 PetscFree(send_waits); PetscFree(svalues); 1788 1789 PetscFunctionReturn(0); 1790 } 1791 1792 extern int MatPrintHelp_SeqBAIJ(Mat); 1793 #undef __FUNC__ 1794 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1795 int MatPrintHelp_MPIBAIJ(Mat A) 1796 { 1797 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1798 MPI_Comm comm = A->comm; 1799 static int called = 0; 1800 int ierr; 1801 1802 PetscFunctionBegin; 1803 if (!a->rank) { 1804 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1805 } 1806 if (called) {PetscFunctionReturn(0);} else called = 1; 1807 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1808 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 #undef __FUNC__ 1813 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1814 int MatSetUnfactored_MPIBAIJ(Mat A) 1815 { 1816 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1817 int ierr; 1818 1819 PetscFunctionBegin; 1820 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1825 1826 /* -------------------------------------------------------------------*/ 1827 static struct _MatOps MatOps_Values = { 1828 MatSetValues_MPIBAIJ, 1829 MatGetRow_MPIBAIJ, 1830 MatRestoreRow_MPIBAIJ, 1831 MatMult_MPIBAIJ, 1832 MatMultAdd_MPIBAIJ, 1833 MatMultTrans_MPIBAIJ, 1834 MatMultTransAdd_MPIBAIJ, 1835 0, 1836 0, 1837 0, 1838 0, 1839 0, 1840 0, 1841 0, 1842 MatTranspose_MPIBAIJ, 1843 MatGetInfo_MPIBAIJ, 1844 0, 1845 MatGetDiagonal_MPIBAIJ, 1846 MatDiagonalScale_MPIBAIJ, 1847 MatNorm_MPIBAIJ, 1848 MatAssemblyBegin_MPIBAIJ, 1849 MatAssemblyEnd_MPIBAIJ, 1850 0, 1851 MatSetOption_MPIBAIJ, 1852 MatZeroEntries_MPIBAIJ, 1853 MatZeroRows_MPIBAIJ, 1854 0, 1855 0, 1856 0, 1857 0, 1858 MatGetSize_MPIBAIJ, 1859 MatGetLocalSize_MPIBAIJ, 1860 MatGetOwnershipRange_MPIBAIJ, 1861 0, 1862 0, 1863 0, 1864 0, 1865 MatDuplicate_MPIBAIJ, 1866 0, 1867 0, 1868 0, 1869 0, 1870 0, 1871 MatGetSubMatrices_MPIBAIJ, 1872 MatIncreaseOverlap_MPIBAIJ, 1873 MatGetValues_MPIBAIJ, 1874 0, 1875 MatPrintHelp_MPIBAIJ, 1876 MatScale_MPIBAIJ, 1877 0, 1878 0, 1879 0, 1880 MatGetBlockSize_MPIBAIJ, 1881 0, 1882 0, 1883 0, 1884 0, 1885 0, 1886 0, 1887 MatSetUnfactored_MPIBAIJ, 1888 0, 1889 MatSetValuesBlocked_MPIBAIJ, 1890 0, 1891 0, 1892 0, 1893 MatGetMaps_Petsc}; 1894 1895 #include "pc.h" 1896 EXTERN_C_BEGIN 1897 extern int PCSetUp_BJacobi_BAIJ(PC); 1898 EXTERN_C_END 1899 1900 #undef __FUNC__ 1901 #define __FUNC__ "MatCreateMPIBAIJ" 1902 /*@C 1903 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1904 (block compressed row). For good matrix assembly performance 1905 the user should preallocate the matrix storage by setting the parameters 1906 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1907 performance can be increased by more than a factor of 50. 1908 1909 Collective on MPI_Comm 1910 1911 Input Parameters: 1912 + comm - MPI communicator 1913 . bs - size of blockk 1914 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1915 This value should be the same as the local size used in creating the 1916 y vector for the matrix-vector product y = Ax. 1917 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1918 This value should be the same as the local size used in creating the 1919 x vector for the matrix-vector product y = Ax. 1920 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1921 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1922 . d_nz - number of block nonzeros per block row in diagonal portion of local 1923 submatrix (same for all local rows) 1924 . d_nzz - array containing the number of block nonzeros in the various block rows 1925 of the in diagonal portion of the local (possibly different for each block 1926 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1927 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1928 submatrix (same for all local rows). 1929 - o_nzz - array containing the number of nonzeros in the various block rows of the 1930 off-diagonal portion of the local submatrix (possibly different for 1931 each block row) or PETSC_NULL. 1932 1933 Output Parameter: 1934 . A - the matrix 1935 1936 Options Database Keys: 1937 . -mat_no_unroll - uses code that does not unroll the loops in the 1938 block calculations (much slower) 1939 . -mat_block_size - size of the blocks to use 1940 . -mat_mpi - use the parallel matrix data structures even on one processor 1941 (defaults to using SeqBAIJ format on one processor) 1942 1943 Notes: 1944 The user MUST specify either the local or global matrix dimensions 1945 (possibly both). 1946 1947 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1948 than it must be used on all processors that share the object for that argument. 1949 1950 Storage Information: 1951 For a square global matrix we define each processor's diagonal portion 1952 to be its local rows and the corresponding columns (a square submatrix); 1953 each processor's off-diagonal portion encompasses the remainder of the 1954 local matrix (a rectangular submatrix). 1955 1956 The user can specify preallocated storage for the diagonal part of 1957 the local submatrix with either d_nz or d_nnz (not both). Set 1958 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1959 memory allocation. Likewise, specify preallocated storage for the 1960 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1961 1962 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1963 the figure below we depict these three local rows and all columns (0-11). 1964 1965 .vb 1966 0 1 2 3 4 5 6 7 8 9 10 11 1967 ------------------- 1968 row 3 | o o o d d d o o o o o o 1969 row 4 | o o o d d d o o o o o o 1970 row 5 | o o o d d d o o o o o o 1971 ------------------- 1972 .ve 1973 1974 Thus, any entries in the d locations are stored in the d (diagonal) 1975 submatrix, and any entries in the o locations are stored in the 1976 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1977 stored simply in the MATSEQBAIJ format for compressed row storage. 1978 1979 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1980 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1981 In general, for PDE problems in which most nonzeros are near the diagonal, 1982 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1983 or you will get TERRIBLE performance; see the users' manual chapter on 1984 matrices. 1985 1986 .keywords: matrix, block, aij, compressed row, sparse, parallel 1987 1988 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1989 @*/ 1990 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1991 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1992 { 1993 Mat B; 1994 Mat_MPIBAIJ *b; 1995 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1996 int flag1 = 0,flag2 = 0; 1997 1998 PetscFunctionBegin; 1999 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 2000 2001 MPI_Comm_size(comm,&size); 2002 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr); 2003 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 2004 if (!flag1 && !flag2 && size == 1) { 2005 if (M == PETSC_DECIDE) M = m; 2006 if (N == PETSC_DECIDE) N = n; 2007 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 2008 PetscFunctionReturn(0); 2009 } 2010 2011 *A = 0; 2012 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2013 PLogObjectCreate(B); 2014 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 2015 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 2016 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 2017 2018 B->ops->destroy = MatDestroy_MPIBAIJ; 2019 B->ops->view = MatView_MPIBAIJ; 2020 B->mapping = 0; 2021 B->factor = 0; 2022 B->assembled = PETSC_FALSE; 2023 2024 B->insertmode = NOT_SET_VALUES; 2025 MPI_Comm_rank(comm,&b->rank); 2026 MPI_Comm_size(comm,&b->size); 2027 2028 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2029 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2030 } 2031 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2032 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2033 } 2034 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2035 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2036 } 2037 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2038 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2039 2040 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2041 work[0] = m; work[1] = n; 2042 mbs = m/bs; nbs = n/bs; 2043 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2044 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2045 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2046 } 2047 if (m == PETSC_DECIDE) { 2048 Mbs = M/bs; 2049 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2050 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2051 m = mbs*bs; 2052 } 2053 if (n == PETSC_DECIDE) { 2054 Nbs = N/bs; 2055 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2056 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2057 n = nbs*bs; 2058 } 2059 if (mbs*bs != m || nbs*bs != n) { 2060 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2061 } 2062 2063 b->m = m; B->m = m; 2064 b->n = n; B->n = n; 2065 b->N = N; B->N = N; 2066 b->M = M; B->M = M; 2067 b->bs = bs; 2068 b->bs2 = bs*bs; 2069 b->mbs = mbs; 2070 b->nbs = nbs; 2071 b->Mbs = Mbs; 2072 b->Nbs = Nbs; 2073 2074 /* the information in the maps duplicates the information computed below, eventually 2075 we should remove the duplicate information that is not contained in the maps */ 2076 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2077 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2078 2079 /* build local table of row and column ownerships */ 2080 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 2081 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2082 b->cowners = b->rowners + b->size + 2; 2083 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2084 b->rowners[0] = 0; 2085 for ( i=2; i<=b->size; i++ ) { 2086 b->rowners[i] += b->rowners[i-1]; 2087 } 2088 b->rstart = b->rowners[b->rank]; 2089 b->rend = b->rowners[b->rank+1]; 2090 b->rstart_bs = b->rstart * bs; 2091 b->rend_bs = b->rend * bs; 2092 2093 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2094 b->cowners[0] = 0; 2095 for ( i=2; i<=b->size; i++ ) { 2096 b->cowners[i] += b->cowners[i-1]; 2097 } 2098 b->cstart = b->cowners[b->rank]; 2099 b->cend = b->cowners[b->rank+1]; 2100 b->cstart_bs = b->cstart * bs; 2101 b->cend_bs = b->cend * bs; 2102 2103 2104 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2105 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 2106 PLogObjectParent(B,b->A); 2107 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2108 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 2109 PLogObjectParent(B,b->B); 2110 2111 /* build cache for off array entries formed */ 2112 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 2113 b->donotstash = 0; 2114 b->colmap = 0; 2115 b->garray = 0; 2116 b->roworiented = 1; 2117 2118 /* stuff used in block assembly */ 2119 b->barray = 0; 2120 2121 /* stuff used for matrix vector multiply */ 2122 b->lvec = 0; 2123 b->Mvctx = 0; 2124 2125 /* stuff for MatGetRow() */ 2126 b->rowindices = 0; 2127 b->rowvalues = 0; 2128 b->getrowactive = PETSC_FALSE; 2129 2130 /* hash table stuff */ 2131 b->ht = 0; 2132 b->hd = 0; 2133 b->ht_size = 0; 2134 b->ht_flag = 0; 2135 b->ht_fact = 0; 2136 b->ht_total_ct = 0; 2137 b->ht_insert_ct = 0; 2138 2139 *A = B; 2140 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2141 if (flg) { 2142 double fact = 1.39; 2143 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2144 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2145 if (fact <= 1.0) fact = 1.39; 2146 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2147 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2148 } 2149 ierr = PetscObjectComposeFunction((PetscObject)B,"PCSetUp_BJacobi_C", 2150 "PCSetUp_BJacobi_BAIJ", 2151 (void*)PCSetUp_BJacobi_BAIJ);CHKERRQ(ierr); 2152 PetscFunctionReturn(0); 2153 } 2154 2155 #undef __FUNC__ 2156 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2157 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2158 { 2159 Mat mat; 2160 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2161 int ierr, len=0, flg; 2162 2163 PetscFunctionBegin; 2164 *newmat = 0; 2165 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2166 PLogObjectCreate(mat); 2167 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2168 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2169 mat->ops->destroy = MatDestroy_MPIBAIJ; 2170 mat->ops->view = MatView_MPIBAIJ; 2171 mat->factor = matin->factor; 2172 mat->assembled = PETSC_TRUE; 2173 2174 a->m = mat->m = oldmat->m; 2175 a->n = mat->n = oldmat->n; 2176 a->M = mat->M = oldmat->M; 2177 a->N = mat->N = oldmat->N; 2178 2179 a->bs = oldmat->bs; 2180 a->bs2 = oldmat->bs2; 2181 a->mbs = oldmat->mbs; 2182 a->nbs = oldmat->nbs; 2183 a->Mbs = oldmat->Mbs; 2184 a->Nbs = oldmat->Nbs; 2185 2186 a->rstart = oldmat->rstart; 2187 a->rend = oldmat->rend; 2188 a->cstart = oldmat->cstart; 2189 a->cend = oldmat->cend; 2190 a->size = oldmat->size; 2191 a->rank = oldmat->rank; 2192 mat->insertmode = NOT_SET_VALUES; 2193 a->rowvalues = 0; 2194 a->getrowactive = PETSC_FALSE; 2195 a->barray = 0; 2196 2197 /* hash table stuff */ 2198 a->ht = 0; 2199 a->hd = 0; 2200 a->ht_size = 0; 2201 a->ht_flag = oldmat->ht_flag; 2202 a->ht_fact = oldmat->ht_fact; 2203 a->ht_total_ct = 0; 2204 a->ht_insert_ct = 0; 2205 2206 2207 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2208 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2209 a->cowners = a->rowners + a->size + 2; 2210 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2211 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 2212 if (oldmat->colmap) { 2213 #if defined (USE_CTABLE) 2214 ierr = TableCreateCopy( &a->colmap,oldmat->colmap ); CHKERRQ(ierr); 2215 #else 2216 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2217 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2218 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2219 #endif 2220 } else a->colmap = 0; 2221 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2222 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2223 PLogObjectMemory(mat,len*sizeof(int)); 2224 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2225 } else a->garray = 0; 2226 2227 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2228 PLogObjectParent(mat,a->lvec); 2229 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2230 2231 PLogObjectParent(mat,a->Mvctx); 2232 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2233 PLogObjectParent(mat,a->A); 2234 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2235 PLogObjectParent(mat,a->B); 2236 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2237 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2238 if (flg) { 2239 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2240 } 2241 *newmat = mat; 2242 PetscFunctionReturn(0); 2243 } 2244 2245 #include "sys.h" 2246 2247 #undef __FUNC__ 2248 #define __FUNC__ "MatLoad_MPIBAIJ" 2249 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2250 { 2251 Mat A; 2252 int i, nz, ierr, j,rstart, rend, fd; 2253 Scalar *vals,*buf; 2254 MPI_Comm comm = ((PetscObject)viewer)->comm; 2255 MPI_Status status; 2256 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2257 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2258 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2259 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2260 int dcount,kmax,k,nzcount,tmp; 2261 2262 PetscFunctionBegin; 2263 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2264 2265 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2266 if (!rank) { 2267 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2268 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2269 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2270 if (header[3] < 0) { 2271 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2272 } 2273 } 2274 2275 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2276 M = header[1]; N = header[2]; 2277 2278 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2279 2280 /* 2281 This code adds extra rows to make sure the number of rows is 2282 divisible by the blocksize 2283 */ 2284 Mbs = M/bs; 2285 extra_rows = bs - M + bs*(Mbs); 2286 if (extra_rows == bs) extra_rows = 0; 2287 else Mbs++; 2288 if (extra_rows &&!rank) { 2289 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2290 } 2291 2292 /* determine ownership of all rows */ 2293 mbs = Mbs/size + ((Mbs % size) > rank); 2294 m = mbs * bs; 2295 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2296 browners = rowners + size + 1; 2297 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2298 rowners[0] = 0; 2299 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2300 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2301 rstart = rowners[rank]; 2302 rend = rowners[rank+1]; 2303 2304 /* distribute row lengths to all processors */ 2305 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2306 if (!rank) { 2307 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2308 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2309 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2310 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2311 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2312 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2313 PetscFree(sndcounts); 2314 } else { 2315 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2316 } 2317 2318 if (!rank) { 2319 /* calculate the number of nonzeros on each processor */ 2320 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2321 PetscMemzero(procsnz,size*sizeof(int)); 2322 for ( i=0; i<size; i++ ) { 2323 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2324 procsnz[i] += rowlengths[j]; 2325 } 2326 } 2327 PetscFree(rowlengths); 2328 2329 /* determine max buffer needed and allocate it */ 2330 maxnz = 0; 2331 for ( i=0; i<size; i++ ) { 2332 maxnz = PetscMax(maxnz,procsnz[i]); 2333 } 2334 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2335 2336 /* read in my part of the matrix column indices */ 2337 nz = procsnz[0]; 2338 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2339 mycols = ibuf; 2340 if (size == 1) nz -= extra_rows; 2341 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2342 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2343 2344 /* read in every ones (except the last) and ship off */ 2345 for ( i=1; i<size-1; i++ ) { 2346 nz = procsnz[i]; 2347 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2348 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2349 } 2350 /* read in the stuff for the last proc */ 2351 if ( size != 1 ) { 2352 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2353 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2354 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2355 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2356 } 2357 PetscFree(cols); 2358 } else { 2359 /* determine buffer space needed for message */ 2360 nz = 0; 2361 for ( i=0; i<m; i++ ) { 2362 nz += locrowlens[i]; 2363 } 2364 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2365 mycols = ibuf; 2366 /* receive message of column indices*/ 2367 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2368 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2369 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2370 } 2371 2372 /* loop over local rows, determining number of off diagonal entries */ 2373 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2374 odlens = dlens + (rend-rstart); 2375 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2376 PetscMemzero(mask,3*Mbs*sizeof(int)); 2377 masked1 = mask + Mbs; 2378 masked2 = masked1 + Mbs; 2379 rowcount = 0; nzcount = 0; 2380 for ( i=0; i<mbs; i++ ) { 2381 dcount = 0; 2382 odcount = 0; 2383 for ( j=0; j<bs; j++ ) { 2384 kmax = locrowlens[rowcount]; 2385 for ( k=0; k<kmax; k++ ) { 2386 tmp = mycols[nzcount++]/bs; 2387 if (!mask[tmp]) { 2388 mask[tmp] = 1; 2389 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2390 else masked1[dcount++] = tmp; 2391 } 2392 } 2393 rowcount++; 2394 } 2395 2396 dlens[i] = dcount; 2397 odlens[i] = odcount; 2398 2399 /* zero out the mask elements we set */ 2400 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2401 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2402 } 2403 2404 /* create our matrix */ 2405 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2406 CHKERRQ(ierr); 2407 A = *newmat; 2408 MatSetOption(A,MAT_COLUMNS_SORTED); 2409 2410 if (!rank) { 2411 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2412 /* read in my part of the matrix numerical values */ 2413 nz = procsnz[0]; 2414 vals = buf; 2415 mycols = ibuf; 2416 if (size == 1) nz -= extra_rows; 2417 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2418 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2419 2420 /* insert into matrix */ 2421 jj = rstart*bs; 2422 for ( i=0; i<m; i++ ) { 2423 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2424 mycols += locrowlens[i]; 2425 vals += locrowlens[i]; 2426 jj++; 2427 } 2428 /* read in other processors (except the last one) and ship out */ 2429 for ( i=1; i<size-1; i++ ) { 2430 nz = procsnz[i]; 2431 vals = buf; 2432 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2433 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2434 } 2435 /* the last proc */ 2436 if ( size != 1 ){ 2437 nz = procsnz[i] - extra_rows; 2438 vals = buf; 2439 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2440 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2441 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2442 } 2443 PetscFree(procsnz); 2444 } else { 2445 /* receive numeric values */ 2446 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2447 2448 /* receive message of values*/ 2449 vals = buf; 2450 mycols = ibuf; 2451 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2452 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2453 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2454 2455 /* insert into matrix */ 2456 jj = rstart*bs; 2457 for ( i=0; i<m; i++ ) { 2458 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2459 mycols += locrowlens[i]; 2460 vals += locrowlens[i]; 2461 jj++; 2462 } 2463 } 2464 PetscFree(locrowlens); 2465 PetscFree(buf); 2466 PetscFree(ibuf); 2467 PetscFree(rowners); 2468 PetscFree(dlens); 2469 PetscFree(mask); 2470 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2471 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 2476 2477 #undef __FUNC__ 2478 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2479 /*@ 2480 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2481 2482 Input Parameters: 2483 . mat - the matrix 2484 . fact - factor 2485 2486 Collective on Mat 2487 2488 Notes: 2489 This can also be set by the command line option: -mat_use_hash_table fact 2490 2491 .keywords: matrix, hashtable, factor, HT 2492 2493 .seealso: MatSetOption() 2494 @*/ 2495 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2496 { 2497 Mat_MPIBAIJ *baij; 2498 2499 PetscFunctionBegin; 2500 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2501 if (mat->type != MATMPIBAIJ) { 2502 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2503 } 2504 baij = (Mat_MPIBAIJ*) mat->data; 2505 baij->ht_fact = fact; 2506 PetscFunctionReturn(0); 2507 } 2508