1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.143 1999/01/11 16:22:31 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. This is done in a non scable way since the 18 length of colmap equals the global matrix length. 19 */ 20 #undef __FUNC__ 21 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 22 static int CreateColmap_MPIBAIJ_Private(Mat mat) 23 { 24 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 25 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 26 int nbs = B->nbs,i,bs=B->bs; 27 #if defined (USE_CTABLE) 28 int ierr; 29 #endif 30 31 PetscFunctionBegin; 32 #if defined (USE_CTABLE) 33 ierr = TableCreate( &baij->colmap, baij->nbs/5 ); CHKERRQ(ierr); 34 for ( i=0; i<nbs; i++ ){ 35 ierr = TableAdd( baij->colmap, baij->garray[i] + 1, i*bs+1 );CHKERRQ(ierr); 36 } 37 #else 38 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 39 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 40 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 41 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 42 #endif 43 PetscFunctionReturn(0); 44 } 45 46 #define CHUNKSIZE 10 47 48 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 49 { \ 50 \ 51 brow = row/bs; \ 52 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 53 rmax = aimax[brow]; nrow = ailen[brow]; \ 54 bcol = col/bs; \ 55 ridx = row % bs; cidx = col % bs; \ 56 low = 0; high = nrow; \ 57 while (high-low > 3) { \ 58 t = (low+high)/2; \ 59 if (rp[t] > bcol) high = t; \ 60 else low = t; \ 61 } \ 62 for ( _i=low; _i<high; _i++ ) { \ 63 if (rp[_i] > bcol) break; \ 64 if (rp[_i] == bcol) { \ 65 bap = ap + bs2*_i + bs*cidx + ridx; \ 66 if (addv == ADD_VALUES) *bap += value; \ 67 else *bap = value; \ 68 goto a_noinsert; \ 69 } \ 70 } \ 71 if (a->nonew == 1) goto a_noinsert; \ 72 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 73 if (nrow >= rmax) { \ 74 /* there is no extra room in row, therefore enlarge */ \ 75 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 76 Scalar *new_a; \ 77 \ 78 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 79 \ 80 /* malloc new storage space */ \ 81 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 82 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 83 new_j = (int *) (new_a + bs2*new_nz); \ 84 new_i = new_j + new_nz; \ 85 \ 86 /* copy over old data into new slots */ \ 87 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 88 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 89 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 90 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 91 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 92 len*sizeof(int)); \ 93 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 94 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 95 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 96 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 97 /* free up old matrix storage */ \ 98 PetscFree(a->a); \ 99 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 100 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 101 a->singlemalloc = 1; \ 102 \ 103 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 104 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 105 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 106 a->maxnz += bs2*CHUNKSIZE; \ 107 a->reallocs++; \ 108 a->nz++; \ 109 } \ 110 N = nrow++ - 1; \ 111 /* shift up all the later entries in this row */ \ 112 for ( ii=N; ii>=_i; ii-- ) { \ 113 rp[ii+1] = rp[ii]; \ 114 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 115 } \ 116 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 117 rp[_i] = bcol; \ 118 ap[bs2*_i + bs*cidx + ridx] = value; \ 119 a_noinsert:; \ 120 ailen[brow] = nrow; \ 121 } 122 123 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 124 { \ 125 \ 126 brow = row/bs; \ 127 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 128 rmax = bimax[brow]; nrow = bilen[brow]; \ 129 bcol = col/bs; \ 130 ridx = row % bs; cidx = col % bs; \ 131 low = 0; high = nrow; \ 132 while (high-low > 3) { \ 133 t = (low+high)/2; \ 134 if (rp[t] > bcol) high = t; \ 135 else low = t; \ 136 } \ 137 for ( _i=low; _i<high; _i++ ) { \ 138 if (rp[_i] > bcol) break; \ 139 if (rp[_i] == bcol) { \ 140 bap = ap + bs2*_i + bs*cidx + ridx; \ 141 if (addv == ADD_VALUES) *bap += value; \ 142 else *bap = value; \ 143 goto b_noinsert; \ 144 } \ 145 } \ 146 if (b->nonew == 1) goto b_noinsert; \ 147 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 148 if (nrow >= rmax) { \ 149 /* there is no extra room in row, therefore enlarge */ \ 150 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 151 Scalar *new_a; \ 152 \ 153 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 154 \ 155 /* malloc new storage space */ \ 156 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 157 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 158 new_j = (int *) (new_a + bs2*new_nz); \ 159 new_i = new_j + new_nz; \ 160 \ 161 /* copy over old data into new slots */ \ 162 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 163 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 164 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 165 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 166 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 167 len*sizeof(int)); \ 168 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 169 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 170 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 171 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 172 /* free up old matrix storage */ \ 173 PetscFree(b->a); \ 174 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 175 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 176 b->singlemalloc = 1; \ 177 \ 178 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 179 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 180 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 181 b->maxnz += bs2*CHUNKSIZE; \ 182 b->reallocs++; \ 183 b->nz++; \ 184 } \ 185 N = nrow++ - 1; \ 186 /* shift up all the later entries in this row */ \ 187 for ( ii=N; ii>=_i; ii-- ) { \ 188 rp[ii+1] = rp[ii]; \ 189 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 190 } \ 191 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 192 rp[_i] = bcol; \ 193 ap[bs2*_i + bs*cidx + ridx] = value; \ 194 b_noinsert:; \ 195 bilen[brow] = nrow; \ 196 } 197 198 #undef __FUNC__ 199 #define __FUNC__ "MatSetValues_MPIBAIJ" 200 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 201 { 202 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 203 Scalar value; 204 int ierr,i,j,row,col; 205 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 206 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 207 int cend_orig=baij->cend_bs,bs=baij->bs; 208 209 /* Some Variables required in the macro */ 210 Mat A = baij->A; 211 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 212 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 213 Scalar *aa=a->a; 214 215 Mat B = baij->B; 216 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 217 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 218 Scalar *ba=b->a; 219 220 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 221 int low,high,t,ridx,cidx,bs2=a->bs2; 222 Scalar *ap,*bap; 223 224 PetscFunctionBegin; 225 for ( i=0; i<m; i++ ) { 226 #if defined(USE_PETSC_BOPT_g) 227 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 228 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 229 #endif 230 if (im[i] >= rstart_orig && im[i] < rend_orig) { 231 row = im[i] - rstart_orig; 232 for ( j=0; j<n; j++ ) { 233 if (in[j] >= cstart_orig && in[j] < cend_orig){ 234 col = in[j] - cstart_orig; 235 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 236 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 237 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 238 } 239 #if defined(USE_PETSC_BOPT_g) 240 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 241 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 242 #endif 243 else { 244 if (mat->was_assembled) { 245 if (!baij->colmap) { 246 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 247 } 248 #if defined (USE_CTABLE) 249 col = TableFind( baij->colmap, in[j]/bs + 1 ) - 1 + in[j]%bs; 250 #else 251 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 252 #endif 253 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 254 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 255 col = in[j]; 256 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 257 B = baij->B; 258 b = (Mat_SeqBAIJ *) (B)->data; 259 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 260 ba=b->a; 261 } 262 } else col = in[j]; 263 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 264 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 265 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 266 } 267 } 268 } else { 269 if (roworiented && !baij->donotstash) { 270 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 271 } else { 272 if (!baij->donotstash) { 273 row = im[i]; 274 for ( j=0; j<n; j++ ) { 275 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 276 } 277 } 278 } 279 } 280 } 281 PetscFunctionReturn(0); 282 } 283 284 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 285 #undef __FUNC__ 286 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 287 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 288 { 289 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 290 Scalar *value,*barray=baij->barray; 291 int ierr,i,j,ii,jj,row,col,k,l; 292 int roworiented = baij->roworiented,rstart=baij->rstart ; 293 int rend=baij->rend,cstart=baij->cstart,stepval; 294 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 295 296 if(!barray) { 297 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 298 } 299 300 if (roworiented) { 301 stepval = (n-1)*bs; 302 } else { 303 stepval = (m-1)*bs; 304 } 305 for ( i=0; i<m; i++ ) { 306 #if defined(USE_PETSC_BOPT_g) 307 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 308 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 309 #endif 310 if (im[i] >= rstart && im[i] < rend) { 311 row = im[i] - rstart; 312 for ( j=0; j<n; j++ ) { 313 /* If NumCol = 1 then a copy is not required */ 314 if ((roworiented) && (n == 1)) { 315 barray = v + i*bs2; 316 } else if((!roworiented) && (m == 1)) { 317 barray = v + j*bs2; 318 } else { /* Here a copy is required */ 319 if (roworiented) { 320 value = v + i*(stepval+bs)*bs + j*bs; 321 } else { 322 value = v + j*(stepval+bs)*bs + i*bs; 323 } 324 for ( ii=0; ii<bs; ii++,value+=stepval ) { 325 for (jj=0; jj<bs; jj++ ) { 326 *barray++ = *value++; 327 } 328 } 329 barray -=bs2; 330 } 331 332 if (in[j] >= cstart && in[j] < cend){ 333 col = in[j] - cstart; 334 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 335 } 336 #if defined(USE_PETSC_BOPT_g) 337 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 338 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 339 #endif 340 else { 341 if (mat->was_assembled) { 342 if (!baij->colmap) { 343 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 344 } 345 346 #if defined(USE_PETSC_BOPT_g) 347 #if defined (USE_CTABLE) 348 if( (TableFind( baij->colmap, in[j] + 1 ) - 1) % bs) 349 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 350 #else 351 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 352 #endif 353 #endif 354 #if defined (USE_CTABLE) 355 col = (TableFind( baij->colmap, in[j] + 1 ) - 1)/bs; 356 #else 357 col = (baij->colmap[in[j]] - 1)/bs; 358 #endif 359 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 360 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 361 col = in[j]; 362 } 363 } 364 else col = in[j]; 365 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 366 } 367 } 368 } else { 369 if (!baij->donotstash) { 370 if (roworiented ) { 371 row = im[i]*bs; 372 value = v + i*(stepval+bs)*bs; 373 for ( j=0; j<bs; j++,row++ ) { 374 for ( k=0; k<n; k++ ) { 375 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 376 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 377 } 378 } 379 } 380 } else { 381 for ( j=0; j<n; j++ ) { 382 value = v + j*(stepval+bs)*bs + i*bs; 383 col = in[j]*bs; 384 for ( k=0; k<bs; k++,col++,value+=stepval) { 385 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 386 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 387 } 388 } 389 } 390 } 391 } 392 } 393 } 394 PetscFunctionReturn(0); 395 } 396 #define HASH_KEY 0.6180339887 397 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 398 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 399 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 400 #undef __FUNC__ 401 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 402 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 403 { 404 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 405 int ierr,i,j,row,col; 406 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 407 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 408 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 409 double tmp; 410 Scalar ** HD = baij->hd,value; 411 #if defined(USE_PETSC_BOPT_g) 412 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 413 #endif 414 415 PetscFunctionBegin; 416 417 for ( i=0; i<m; i++ ) { 418 #if defined(USE_PETSC_BOPT_g) 419 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 420 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 421 #endif 422 row = im[i]; 423 if (row >= rstart_orig && row < rend_orig) { 424 for ( j=0; j<n; j++ ) { 425 col = in[j]; 426 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 427 /* Look up into the Hash Table */ 428 key = (row/bs)*Nbs+(col/bs)+1; 429 h1 = HASH(size,key,tmp); 430 431 432 idx = h1; 433 #if defined(USE_PETSC_BOPT_g) 434 insert_ct++; 435 total_ct++; 436 if (HT[idx] != key) { 437 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 438 if (idx == size) { 439 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 440 if (idx == h1) { 441 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 442 } 443 } 444 } 445 #else 446 if (HT[idx] != key) { 447 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 448 if (idx == size) { 449 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 450 if (idx == h1) { 451 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 452 } 453 } 454 } 455 #endif 456 /* A HASH table entry is found, so insert the values at the correct address */ 457 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 458 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 459 } 460 } else { 461 if (roworiented && !baij->donotstash) { 462 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 463 } else { 464 if (!baij->donotstash) { 465 row = im[i]; 466 for ( j=0; j<n; j++ ) { 467 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 468 } 469 } 470 } 471 } 472 } 473 #if defined(USE_PETSC_BOPT_g) 474 baij->ht_total_ct = total_ct; 475 baij->ht_insert_ct = insert_ct; 476 #endif 477 PetscFunctionReturn(0); 478 } 479 480 #undef __FUNC__ 481 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 482 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 483 { 484 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 485 int ierr,i,j,ii,jj,row,col,k,l; 486 int roworiented = baij->roworiented,rstart=baij->rstart ; 487 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 488 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 489 double tmp; 490 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 491 #if defined(USE_PETSC_BOPT_g) 492 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 493 #endif 494 495 PetscFunctionBegin; 496 497 if (roworiented) { 498 stepval = (n-1)*bs; 499 } else { 500 stepval = (m-1)*bs; 501 } 502 for ( i=0; i<m; i++ ) { 503 #if defined(USE_PETSC_BOPT_g) 504 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 505 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 506 #endif 507 row = im[i]; 508 v_t = v + i*bs2; 509 if (row >= rstart && row < rend) { 510 for ( j=0; j<n; j++ ) { 511 col = in[j]; 512 513 /* Look up into the Hash Table */ 514 key = row*Nbs+col+1; 515 h1 = HASH(size,key,tmp); 516 517 idx = h1; 518 #if defined(USE_PETSC_BOPT_g) 519 total_ct++; 520 insert_ct++; 521 if (HT[idx] != key) { 522 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 523 if (idx == size) { 524 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 525 if (idx == h1) { 526 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 527 } 528 } 529 } 530 #else 531 if (HT[idx] != key) { 532 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 533 if (idx == size) { 534 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 535 if (idx == h1) { 536 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 537 } 538 } 539 } 540 #endif 541 baij_a = HD[idx]; 542 if (roworiented) { 543 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 544 /* value = v + (i*(stepval+bs)+j)*bs; */ 545 value = v_t; 546 v_t += bs; 547 if (addv == ADD_VALUES) { 548 for ( ii=0; ii<bs; ii++,value+=stepval) { 549 for ( jj=ii; jj<bs2; jj+=bs ) { 550 baij_a[jj] += *value++; 551 } 552 } 553 } else { 554 for ( ii=0; ii<bs; ii++,value+=stepval) { 555 for ( jj=ii; jj<bs2; jj+=bs ) { 556 baij_a[jj] = *value++; 557 } 558 } 559 } 560 } else { 561 value = v + j*(stepval+bs)*bs + i*bs; 562 if (addv == ADD_VALUES) { 563 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 564 for ( jj=0; jj<bs; jj++ ) { 565 baij_a[jj] += *value++; 566 } 567 } 568 } else { 569 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 570 for ( jj=0; jj<bs; jj++ ) { 571 baij_a[jj] = *value++; 572 } 573 } 574 } 575 } 576 } 577 } else { 578 if (!baij->donotstash) { 579 if (roworiented ) { 580 row = im[i]*bs; 581 value = v + i*(stepval+bs)*bs; 582 for ( j=0; j<bs; j++,row++ ) { 583 for ( k=0; k<n; k++ ) { 584 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 585 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 586 } 587 } 588 } 589 } else { 590 for ( j=0; j<n; j++ ) { 591 value = v + j*(stepval+bs)*bs + i*bs; 592 col = in[j]*bs; 593 for ( k=0; k<bs; k++,col++,value+=stepval) { 594 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 595 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 596 } 597 } 598 } 599 } 600 } 601 } 602 } 603 #if defined(USE_PETSC_BOPT_g) 604 baij->ht_total_ct = total_ct; 605 baij->ht_insert_ct = insert_ct; 606 #endif 607 PetscFunctionReturn(0); 608 } 609 610 #undef __FUNC__ 611 #define __FUNC__ "MatGetValues_MPIBAIJ" 612 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 613 { 614 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 615 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 616 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 617 618 PetscFunctionBegin; 619 for ( i=0; i<m; i++ ) { 620 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 621 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 622 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 623 row = idxm[i] - bsrstart; 624 for ( j=0; j<n; j++ ) { 625 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 626 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 627 if (idxn[j] >= bscstart && idxn[j] < bscend){ 628 col = idxn[j] - bscstart; 629 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 630 } else { 631 if (!baij->colmap) { 632 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 633 } 634 #if defined (USE_CTABLE) 635 data = TableFind( baij->colmap, idxn[j]/bs + 1 ) - 1; 636 #else 637 data = baij->colmap[idxn[j]/bs]-1; 638 #endif 639 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 640 else { 641 col = data + idxn[j]%bs; 642 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 643 } 644 } 645 } 646 } else { 647 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 648 } 649 } 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNC__ 654 #define __FUNC__ "MatNorm_MPIBAIJ" 655 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 656 { 657 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 658 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 659 int ierr, i,bs2=baij->bs2; 660 double sum = 0.0; 661 Scalar *v; 662 663 PetscFunctionBegin; 664 if (baij->size == 1) { 665 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 666 } else { 667 if (type == NORM_FROBENIUS) { 668 v = amat->a; 669 for (i=0; i<amat->nz*bs2; i++ ) { 670 #if defined(USE_PETSC_COMPLEX) 671 sum += PetscReal(PetscConj(*v)*(*v)); v++; 672 #else 673 sum += (*v)*(*v); v++; 674 #endif 675 } 676 v = bmat->a; 677 for (i=0; i<bmat->nz*bs2; i++ ) { 678 #if defined(USE_PETSC_COMPLEX) 679 sum += PetscReal(PetscConj(*v)*(*v)); v++; 680 #else 681 sum += (*v)*(*v); v++; 682 #endif 683 } 684 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 685 *norm = sqrt(*norm); 686 } else { 687 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 688 } 689 } 690 PetscFunctionReturn(0); 691 } 692 693 #undef __FUNC__ 694 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 695 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 696 { 697 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 698 MPI_Comm comm = mat->comm; 699 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 700 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 701 MPI_Request *send_waits,*recv_waits; 702 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 703 InsertMode addv; 704 Scalar *rvalues,*svalues; 705 706 PetscFunctionBegin; 707 if (baij->donotstash) { 708 baij->svalues = 0; baij->rvalues = 0; 709 baij->nsends = 0; baij->nrecvs = 0; 710 baij->send_waits = 0; baij->recv_waits = 0; 711 baij->rmax = 0; 712 PetscFunctionReturn(0); 713 } 714 715 /* make sure all processors are either in INSERTMODE or ADDMODE */ 716 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 717 if (addv == (ADD_VALUES|INSERT_VALUES)) { 718 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 719 } 720 mat->insertmode = addv; /* in case this processor had no cache */ 721 722 /* first count number of contributors to each processor */ 723 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 724 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 725 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 726 for ( i=0; i<baij->stash.n; i++ ) { 727 idx = baij->stash.idx[i]; 728 for ( j=0; j<size; j++ ) { 729 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 730 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 731 } 732 } 733 } 734 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 735 736 /* inform other processors of number of messages and max length*/ 737 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 738 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 739 nreceives = work[rank]; 740 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 741 nmax = work[rank]; 742 PetscFree(work); 743 744 /* post receives: 745 1) each message will consist of ordered pairs 746 (global index,value) we store the global index as a double 747 to simplify the message passing. 748 2) since we don't know how long each individual message is we 749 allocate the largest needed buffer for each receive. Potentially 750 this is a lot of wasted space. 751 752 753 This could be done better. 754 */ 755 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 756 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 757 for ( i=0; i<nreceives; i++ ) { 758 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 759 } 760 761 /* do sends: 762 1) starts[i] gives the starting index in svalues for stuff going to 763 the ith processor 764 */ 765 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 766 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 767 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 768 starts[0] = 0; 769 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 770 for ( i=0; i<baij->stash.n; i++ ) { 771 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 772 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 773 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 774 } 775 PetscFree(owner); 776 starts[0] = 0; 777 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 778 count = 0; 779 for ( i=0; i<size; i++ ) { 780 if (procs[i]) { 781 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 782 } 783 } 784 PetscFree(starts); PetscFree(nprocs); 785 786 /* Free cache space */ 787 PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 788 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 789 790 baij->svalues = svalues; baij->rvalues = rvalues; 791 baij->nsends = nsends; baij->nrecvs = nreceives; 792 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 793 baij->rmax = nmax; 794 795 PetscFunctionReturn(0); 796 } 797 798 /* 799 Creates the hash table, and sets the table 800 This table is created only once. 801 If new entried need to be added to the matrix 802 then the hash table has to be destroyed and 803 recreated. 804 */ 805 #undef __FUNC__ 806 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 807 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 808 { 809 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 810 Mat A = baij->A, B=baij->B; 811 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 812 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 813 int size,bs2=baij->bs2,rstart=baij->rstart; 814 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 815 int *HT,key; 816 Scalar **HD; 817 double tmp; 818 #if defined(USE_PETSC_BOPT_g) 819 int ct=0,max=0; 820 #endif 821 822 PetscFunctionBegin; 823 baij->ht_size=(int)(factor*nz); 824 size = baij->ht_size; 825 826 if (baij->ht) { 827 PetscFunctionReturn(0); 828 } 829 830 /* Allocate Memory for Hash Table */ 831 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 832 baij->ht = (int*)(baij->hd + size); 833 HD = baij->hd; 834 HT = baij->ht; 835 836 837 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 838 839 840 /* Loop Over A */ 841 for ( i=0; i<a->mbs; i++ ) { 842 for ( j=ai[i]; j<ai[i+1]; j++ ) { 843 row = i+rstart; 844 col = aj[j]+cstart; 845 846 key = row*Nbs + col + 1; 847 h1 = HASH(size,key,tmp); 848 for ( k=0; k<size; k++ ){ 849 if (HT[(h1+k)%size] == 0.0) { 850 HT[(h1+k)%size] = key; 851 HD[(h1+k)%size] = a->a + j*bs2; 852 break; 853 #if defined(USE_PETSC_BOPT_g) 854 } else { 855 ct++; 856 #endif 857 } 858 } 859 #if defined(USE_PETSC_BOPT_g) 860 if (k> max) max = k; 861 #endif 862 } 863 } 864 /* Loop Over B */ 865 for ( i=0; i<b->mbs; i++ ) { 866 for ( j=bi[i]; j<bi[i+1]; j++ ) { 867 row = i+rstart; 868 col = garray[bj[j]]; 869 key = row*Nbs + col + 1; 870 h1 = HASH(size,key,tmp); 871 for ( k=0; k<size; k++ ){ 872 if (HT[(h1+k)%size] == 0.0) { 873 HT[(h1+k)%size] = key; 874 HD[(h1+k)%size] = b->a + j*bs2; 875 break; 876 #if defined(USE_PETSC_BOPT_g) 877 } else { 878 ct++; 879 #endif 880 } 881 } 882 #if defined(USE_PETSC_BOPT_g) 883 if (k> max) max = k; 884 #endif 885 } 886 } 887 888 /* Print Summary */ 889 #if defined(USE_PETSC_BOPT_g) 890 for ( i=0,j=0; i<size; i++) 891 if (HT[i]) {j++;} 892 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 893 (j== 0)? 0.0:((double)(ct+j))/j,max); 894 #endif 895 PetscFunctionReturn(0); 896 } 897 898 #undef __FUNC__ 899 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 900 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 901 { 902 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 903 MPI_Status *send_status,recv_status; 904 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 905 int bs=baij->bs,row,col,other_disassembled; 906 Scalar *values,val; 907 InsertMode addv = mat->insertmode; 908 909 PetscFunctionBegin; 910 /* wait on receives */ 911 while (count) { 912 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 913 /* unpack receives into our local space */ 914 values = baij->rvalues + 3*imdex*baij->rmax; 915 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 916 n = n/3; 917 for ( i=0; i<n; i++ ) { 918 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 919 col = (int) PetscReal(values[3*i+1]); 920 val = values[3*i+2]; 921 if (col >= baij->cstart*bs && col < baij->cend*bs) { 922 col -= baij->cstart*bs; 923 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 924 } else { 925 if (mat->was_assembled) { 926 if (!baij->colmap) { 927 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 928 } 929 #if defined (USE_CTABLE) 930 col = TableFind( baij->colmap, col/bs + 1 ) - 1 + col%bs; 931 #else 932 col = (baij->colmap[col/bs]) - 1 + col%bs; 933 #endif 934 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 935 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 936 col = (int) PetscReal(values[3*i+1]); 937 } 938 } 939 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 940 } 941 } 942 count--; 943 } 944 if (baij->recv_waits) PetscFree(baij->recv_waits); 945 if (baij->rvalues) PetscFree(baij->rvalues); 946 947 /* wait on sends */ 948 if (baij->nsends) { 949 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 950 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 951 PetscFree(send_status); 952 } 953 if (baij->send_waits) PetscFree(baij->send_waits); 954 if (baij->svalues) PetscFree(baij->svalues); 955 956 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 957 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 958 959 /* determine if any processor has disassembled, if so we must 960 also disassemble ourselfs, in order that we may reassemble. */ 961 /* 962 if nonzero structure of submatrix B cannot change then we know that 963 no processor disassembled thus we can skip this stuff 964 */ 965 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 966 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 967 if (mat->was_assembled && !other_disassembled) { 968 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 969 } 970 } 971 972 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 973 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 974 } 975 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 977 978 #if defined(USE_PETSC_BOPT_g) 979 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 980 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 981 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 982 baij->ht_total_ct = 0; 983 baij->ht_insert_ct = 0; 984 } 985 #endif 986 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 987 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 988 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 989 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 990 } 991 992 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNC__ 997 #define __FUNC__ "MatView_MPIBAIJ_Binary" 998 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 999 { 1000 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1001 int ierr; 1002 1003 PetscFunctionBegin; 1004 if (baij->size == 1) { 1005 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1006 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNC__ 1011 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1012 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 1013 { 1014 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1015 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 1016 FILE *fd; 1017 ViewerType vtype; 1018 1019 PetscFunctionBegin; 1020 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1021 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 1022 ierr = ViewerGetFormat(viewer,&format); 1023 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1024 MatInfo info; 1025 MPI_Comm_rank(mat->comm,&rank); 1026 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1027 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 1028 PetscSequentialPhaseBegin(mat->comm,1); 1029 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1030 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1031 baij->bs,(int)info.memory); 1032 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 1033 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 1034 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 1035 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 1036 fflush(fd); 1037 PetscSequentialPhaseEnd(mat->comm,1); 1038 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1041 PetscPrintf(mat->comm," block size is %d\n",bs); 1042 PetscFunctionReturn(0); 1043 } 1044 } 1045 1046 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 1047 Draw draw; 1048 PetscTruth isnull; 1049 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 1050 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1051 } 1052 1053 if (size == 1) { 1054 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1055 } else { 1056 /* assemble the entire matrix onto first processor. */ 1057 Mat A; 1058 Mat_SeqBAIJ *Aloc; 1059 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 1060 int mbs = baij->mbs; 1061 Scalar *a; 1062 1063 if (!rank) { 1064 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1065 } else { 1066 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1067 } 1068 PLogObjectParent(mat,A); 1069 1070 /* copy over the A part */ 1071 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1072 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1073 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1074 1075 for ( i=0; i<mbs; i++ ) { 1076 rvals[0] = bs*(baij->rstart + i); 1077 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1078 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1079 col = (baij->cstart+aj[j])*bs; 1080 for (k=0; k<bs; k++ ) { 1081 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1082 col++; a += bs; 1083 } 1084 } 1085 } 1086 /* copy over the B part */ 1087 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1088 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1089 for ( i=0; i<mbs; i++ ) { 1090 rvals[0] = bs*(baij->rstart + i); 1091 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1092 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1093 col = baij->garray[aj[j]]*bs; 1094 for (k=0; k<bs; k++ ) { 1095 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1096 col++; a += bs; 1097 } 1098 } 1099 } 1100 PetscFree(rvals); 1101 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1102 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1103 /* 1104 Everyone has to call to draw the matrix since the graphics waits are 1105 synchronized across all processors that share the Draw object 1106 */ 1107 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1108 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1109 } 1110 ierr = MatDestroy(A); CHKERRQ(ierr); 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 1115 1116 1117 #undef __FUNC__ 1118 #define __FUNC__ "MatView_MPIBAIJ" 1119 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1120 { 1121 int ierr; 1122 ViewerType vtype; 1123 1124 PetscFunctionBegin; 1125 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1126 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1127 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 1128 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 1129 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1130 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1131 } else { 1132 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1133 } 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNC__ 1138 #define __FUNC__ "MatDestroy_MPIBAIJ" 1139 int MatDestroy_MPIBAIJ(Mat mat) 1140 { 1141 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1142 int ierr; 1143 1144 PetscFunctionBegin; 1145 if (--mat->refct > 0) PetscFunctionReturn(0); 1146 1147 if (mat->mapping) { 1148 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 1149 } 1150 if (mat->bmapping) { 1151 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 1152 } 1153 if (mat->rmap) { 1154 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1155 } 1156 if (mat->cmap) { 1157 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1158 } 1159 #if defined(USE_PETSC_LOG) 1160 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1161 #endif 1162 1163 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1164 PetscFree(baij->rowners); 1165 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1166 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1167 #if defined (USE_CTABLE) 1168 if (baij->colmap) TableDelete(baij->colmap); 1169 #else 1170 if (baij->colmap) PetscFree(baij->colmap); 1171 #endif 1172 if (baij->garray) PetscFree(baij->garray); 1173 if (baij->lvec) VecDestroy(baij->lvec); 1174 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1175 if (baij->rowvalues) PetscFree(baij->rowvalues); 1176 if (baij->barray) PetscFree(baij->barray); 1177 if (baij->hd) PetscFree(baij->hd); 1178 PetscFree(baij); 1179 PLogObjectDestroy(mat); 1180 PetscHeaderDestroy(mat); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 #undef __FUNC__ 1185 #define __FUNC__ "MatMult_MPIBAIJ" 1186 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1187 { 1188 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1189 int ierr, nt; 1190 1191 PetscFunctionBegin; 1192 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1193 if (nt != a->n) { 1194 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1195 } 1196 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1197 if (nt != a->m) { 1198 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1199 } 1200 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1201 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1202 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1203 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1204 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNC__ 1209 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1210 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1211 { 1212 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1213 int ierr; 1214 1215 PetscFunctionBegin; 1216 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1217 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1218 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1219 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 #undef __FUNC__ 1224 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1225 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1226 { 1227 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1228 int ierr; 1229 1230 PetscFunctionBegin; 1231 /* do nondiagonal part */ 1232 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1233 /* send it on its way */ 1234 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1235 /* do local part */ 1236 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1237 /* receive remote parts: note this assumes the values are not actually */ 1238 /* inserted in yy until the next line, which is true for my implementation*/ 1239 /* but is not perhaps always true. */ 1240 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNC__ 1245 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1246 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1247 { 1248 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1249 int ierr; 1250 1251 PetscFunctionBegin; 1252 /* do nondiagonal part */ 1253 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1254 /* send it on its way */ 1255 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1256 /* do local part */ 1257 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1258 /* receive remote parts: note this assumes the values are not actually */ 1259 /* inserted in yy until the next line, which is true for my implementation*/ 1260 /* but is not perhaps always true. */ 1261 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1262 PetscFunctionReturn(0); 1263 } 1264 1265 /* 1266 This only works correctly for square matrices where the subblock A->A is the 1267 diagonal block 1268 */ 1269 #undef __FUNC__ 1270 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1271 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1272 { 1273 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1274 int ierr; 1275 1276 PetscFunctionBegin; 1277 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1278 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #undef __FUNC__ 1283 #define __FUNC__ "MatScale_MPIBAIJ" 1284 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1285 { 1286 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1287 int ierr; 1288 1289 PetscFunctionBegin; 1290 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1291 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNC__ 1296 #define __FUNC__ "MatGetSize_MPIBAIJ" 1297 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1298 { 1299 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1300 1301 PetscFunctionBegin; 1302 if (m) *m = mat->M; 1303 if (n) *n = mat->N; 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNC__ 1308 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1309 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1310 { 1311 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1312 1313 PetscFunctionBegin; 1314 *m = mat->m; *n = mat->n; 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #undef __FUNC__ 1319 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1320 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1321 { 1322 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1323 1324 PetscFunctionBegin; 1325 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1326 PetscFunctionReturn(0); 1327 } 1328 1329 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1330 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1331 1332 #undef __FUNC__ 1333 #define __FUNC__ "MatGetRow_MPIBAIJ" 1334 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1335 { 1336 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1337 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1338 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1339 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1340 int *cmap, *idx_p,cstart = mat->cstart; 1341 1342 PetscFunctionBegin; 1343 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1344 mat->getrowactive = PETSC_TRUE; 1345 1346 if (!mat->rowvalues && (idx || v)) { 1347 /* 1348 allocate enough space to hold information from the longest row. 1349 */ 1350 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1351 int max = 1,mbs = mat->mbs,tmp; 1352 for ( i=0; i<mbs; i++ ) { 1353 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1354 if (max < tmp) { max = tmp; } 1355 } 1356 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1357 CHKPTRQ(mat->rowvalues); 1358 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1359 } 1360 1361 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1362 lrow = row - brstart; 1363 1364 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1365 if (!v) {pvA = 0; pvB = 0;} 1366 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1367 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1368 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1369 nztot = nzA + nzB; 1370 1371 cmap = mat->garray; 1372 if (v || idx) { 1373 if (nztot) { 1374 /* Sort by increasing column numbers, assuming A and B already sorted */ 1375 int imark = -1; 1376 if (v) { 1377 *v = v_p = mat->rowvalues; 1378 for ( i=0; i<nzB; i++ ) { 1379 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1380 else break; 1381 } 1382 imark = i; 1383 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1384 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1385 } 1386 if (idx) { 1387 *idx = idx_p = mat->rowindices; 1388 if (imark > -1) { 1389 for ( i=0; i<imark; i++ ) { 1390 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1391 } 1392 } else { 1393 for ( i=0; i<nzB; i++ ) { 1394 if (cmap[cworkB[i]/bs] < cstart) 1395 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1396 else break; 1397 } 1398 imark = i; 1399 } 1400 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1401 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1402 } 1403 } else { 1404 if (idx) *idx = 0; 1405 if (v) *v = 0; 1406 } 1407 } 1408 *nz = nztot; 1409 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1410 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNC__ 1415 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1416 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1417 { 1418 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1419 1420 PetscFunctionBegin; 1421 if (baij->getrowactive == PETSC_FALSE) { 1422 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1423 } 1424 baij->getrowactive = PETSC_FALSE; 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNC__ 1429 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1430 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1431 { 1432 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1433 1434 PetscFunctionBegin; 1435 *bs = baij->bs; 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNC__ 1440 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1441 int MatZeroEntries_MPIBAIJ(Mat A) 1442 { 1443 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1444 int ierr; 1445 1446 PetscFunctionBegin; 1447 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1448 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNC__ 1453 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1454 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1455 { 1456 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1457 Mat A = a->A, B = a->B; 1458 int ierr; 1459 double isend[5], irecv[5]; 1460 1461 PetscFunctionBegin; 1462 info->block_size = (double)a->bs; 1463 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1464 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1465 isend[3] = info->memory; isend[4] = info->mallocs; 1466 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1467 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1468 isend[3] += info->memory; isend[4] += info->mallocs; 1469 if (flag == MAT_LOCAL) { 1470 info->nz_used = isend[0]; 1471 info->nz_allocated = isend[1]; 1472 info->nz_unneeded = isend[2]; 1473 info->memory = isend[3]; 1474 info->mallocs = isend[4]; 1475 } else if (flag == MAT_GLOBAL_MAX) { 1476 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1477 info->nz_used = irecv[0]; 1478 info->nz_allocated = irecv[1]; 1479 info->nz_unneeded = irecv[2]; 1480 info->memory = irecv[3]; 1481 info->mallocs = irecv[4]; 1482 } else if (flag == MAT_GLOBAL_SUM) { 1483 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1484 info->nz_used = irecv[0]; 1485 info->nz_allocated = irecv[1]; 1486 info->nz_unneeded = irecv[2]; 1487 info->memory = irecv[3]; 1488 info->mallocs = irecv[4]; 1489 } 1490 info->rows_global = (double)a->M; 1491 info->columns_global = (double)a->N; 1492 info->rows_local = (double)a->m; 1493 info->columns_local = (double)a->N; 1494 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1495 info->fill_ratio_needed = 0; 1496 info->factor_mallocs = 0; 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNC__ 1501 #define __FUNC__ "MatSetOption_MPIBAIJ" 1502 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1503 { 1504 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1505 1506 PetscFunctionBegin; 1507 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1508 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1509 op == MAT_COLUMNS_UNSORTED || 1510 op == MAT_COLUMNS_SORTED || 1511 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1512 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1513 MatSetOption(a->A,op); 1514 MatSetOption(a->B,op); 1515 } else if (op == MAT_ROW_ORIENTED) { 1516 a->roworiented = 1; 1517 MatSetOption(a->A,op); 1518 MatSetOption(a->B,op); 1519 } else if (op == MAT_ROWS_SORTED || 1520 op == MAT_ROWS_UNSORTED || 1521 op == MAT_SYMMETRIC || 1522 op == MAT_STRUCTURALLY_SYMMETRIC || 1523 op == MAT_YES_NEW_DIAGONALS || 1524 op == MAT_USE_HASH_TABLE) 1525 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1526 else if (op == MAT_COLUMN_ORIENTED) { 1527 a->roworiented = 0; 1528 MatSetOption(a->A,op); 1529 MatSetOption(a->B,op); 1530 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1531 a->donotstash = 1; 1532 } else if (op == MAT_NO_NEW_DIAGONALS) { 1533 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1534 } else if (op == MAT_USE_HASH_TABLE) { 1535 a->ht_flag = 1; 1536 } else { 1537 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1538 } 1539 PetscFunctionReturn(0); 1540 } 1541 1542 #undef __FUNC__ 1543 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1544 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1545 { 1546 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1547 Mat_SeqBAIJ *Aloc; 1548 Mat B; 1549 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1550 int bs=baij->bs,mbs=baij->mbs; 1551 Scalar *a; 1552 1553 PetscFunctionBegin; 1554 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1555 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1556 CHKERRQ(ierr); 1557 1558 /* copy over the A part */ 1559 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1560 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1561 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1562 1563 for ( i=0; i<mbs; i++ ) { 1564 rvals[0] = bs*(baij->rstart + i); 1565 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1566 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1567 col = (baij->cstart+aj[j])*bs; 1568 for (k=0; k<bs; k++ ) { 1569 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1570 col++; a += bs; 1571 } 1572 } 1573 } 1574 /* copy over the B part */ 1575 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1576 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1577 for ( i=0; i<mbs; i++ ) { 1578 rvals[0] = bs*(baij->rstart + i); 1579 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1580 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1581 col = baij->garray[aj[j]]*bs; 1582 for (k=0; k<bs; k++ ) { 1583 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1584 col++; a += bs; 1585 } 1586 } 1587 } 1588 PetscFree(rvals); 1589 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1590 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1591 1592 if (matout != PETSC_NULL) { 1593 *matout = B; 1594 } else { 1595 PetscOps *Abops; 1596 MatOps Aops; 1597 1598 /* This isn't really an in-place transpose .... but free data structures from baij */ 1599 PetscFree(baij->rowners); 1600 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1601 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1602 #if defined (USE_CTABLE) 1603 if (baij->colmap) TableDelete(baij->colmap); 1604 #else 1605 if (baij->colmap) PetscFree(baij->colmap); 1606 #endif 1607 if (baij->garray) PetscFree(baij->garray); 1608 if (baij->lvec) VecDestroy(baij->lvec); 1609 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1610 PetscFree(baij); 1611 1612 /* 1613 This is horrible, horrible code. We need to keep the 1614 A pointers for the bops and ops but copy everything 1615 else from C. 1616 */ 1617 Abops = A->bops; 1618 Aops = A->ops; 1619 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1620 A->bops = Abops; 1621 A->ops = Aops; 1622 1623 PetscHeaderDestroy(B); 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 1628 #undef __FUNC__ 1629 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1630 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1631 { 1632 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1633 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1634 int ierr,s1,s2,s3; 1635 1636 PetscFunctionBegin; 1637 if (ll) { 1638 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1639 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1640 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1641 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1642 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1643 } 1644 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1645 PetscFunctionReturn(0); 1646 } 1647 1648 #undef __FUNC__ 1649 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1650 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1651 { 1652 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1653 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1654 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1655 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1656 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1657 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1658 MPI_Comm comm = A->comm; 1659 MPI_Request *send_waits,*recv_waits; 1660 MPI_Status recv_status,*send_status; 1661 IS istmp; 1662 PetscTruth localdiag; 1663 1664 PetscFunctionBegin; 1665 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1666 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1667 1668 /* first count number of contributors to each processor */ 1669 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1670 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1671 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1672 for ( i=0; i<N; i++ ) { 1673 idx = rows[i]; 1674 found = 0; 1675 for ( j=0; j<size; j++ ) { 1676 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1677 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1678 } 1679 } 1680 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1681 } 1682 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1683 1684 /* inform other processors of number of messages and max length*/ 1685 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1686 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1687 nrecvs = work[rank]; 1688 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1689 nmax = work[rank]; 1690 PetscFree(work); 1691 1692 /* post receives: */ 1693 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1694 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1695 for ( i=0; i<nrecvs; i++ ) { 1696 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1697 } 1698 1699 /* do sends: 1700 1) starts[i] gives the starting index in svalues for stuff going to 1701 the ith processor 1702 */ 1703 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1704 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1705 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1706 starts[0] = 0; 1707 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1708 for ( i=0; i<N; i++ ) { 1709 svalues[starts[owner[i]]++] = rows[i]; 1710 } 1711 ISRestoreIndices(is,&rows); 1712 1713 starts[0] = 0; 1714 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1715 count = 0; 1716 for ( i=0; i<size; i++ ) { 1717 if (procs[i]) { 1718 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1719 } 1720 } 1721 PetscFree(starts); 1722 1723 base = owners[rank]*bs; 1724 1725 /* wait on receives */ 1726 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1727 source = lens + nrecvs; 1728 count = nrecvs; slen = 0; 1729 while (count) { 1730 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1731 /* unpack receives into our local space */ 1732 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1733 source[imdex] = recv_status.MPI_SOURCE; 1734 lens[imdex] = n; 1735 slen += n; 1736 count--; 1737 } 1738 PetscFree(recv_waits); 1739 1740 /* move the data into the send scatter */ 1741 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1742 count = 0; 1743 for ( i=0; i<nrecvs; i++ ) { 1744 values = rvalues + i*nmax; 1745 for ( j=0; j<lens[i]; j++ ) { 1746 lrows[count++] = values[j] - base; 1747 } 1748 } 1749 PetscFree(rvalues); PetscFree(lens); 1750 PetscFree(owner); PetscFree(nprocs); 1751 1752 /* actually zap the local rows */ 1753 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1754 PLogObjectParent(A,istmp); 1755 1756 /* 1757 Zero the required rows. If the "diagonal block" of the matrix 1758 is square and the user wishes to set the diagonal we use seperate 1759 code so that MatSetValues() is not called for each diagonal allocating 1760 new memory, thus calling lots of mallocs and slowing things down. 1761 1762 Contributed by: Mathew Knepley 1763 */ 1764 localdiag = PETSC_FALSE; 1765 if (diag && (l->A->M == l->A->N)) { 1766 localdiag = PETSC_TRUE; 1767 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1768 } else { 1769 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1770 } 1771 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1772 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1773 1774 if (diag && (localdiag == PETSC_FALSE)) { 1775 for ( i = 0; i < slen; i++ ) { 1776 row = lrows[i] + rstart_bs; 1777 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1778 } 1779 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1780 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1781 } 1782 PetscFree(lrows); 1783 1784 /* wait on sends */ 1785 if (nsends) { 1786 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1787 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1788 PetscFree(send_status); 1789 } 1790 PetscFree(send_waits); PetscFree(svalues); 1791 1792 PetscFunctionReturn(0); 1793 } 1794 1795 extern int MatPrintHelp_SeqBAIJ(Mat); 1796 #undef __FUNC__ 1797 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1798 int MatPrintHelp_MPIBAIJ(Mat A) 1799 { 1800 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1801 MPI_Comm comm = A->comm; 1802 static int called = 0; 1803 int ierr; 1804 1805 PetscFunctionBegin; 1806 if (!a->rank) { 1807 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1808 } 1809 if (called) {PetscFunctionReturn(0);} else called = 1; 1810 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1811 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #undef __FUNC__ 1816 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1817 int MatSetUnfactored_MPIBAIJ(Mat A) 1818 { 1819 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1820 int ierr; 1821 1822 PetscFunctionBegin; 1823 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1824 PetscFunctionReturn(0); 1825 } 1826 1827 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1828 1829 /* -------------------------------------------------------------------*/ 1830 static struct _MatOps MatOps_Values = { 1831 MatSetValues_MPIBAIJ, 1832 MatGetRow_MPIBAIJ, 1833 MatRestoreRow_MPIBAIJ, 1834 MatMult_MPIBAIJ, 1835 MatMultAdd_MPIBAIJ, 1836 MatMultTrans_MPIBAIJ, 1837 MatMultTransAdd_MPIBAIJ, 1838 0, 1839 0, 1840 0, 1841 0, 1842 0, 1843 0, 1844 0, 1845 MatTranspose_MPIBAIJ, 1846 MatGetInfo_MPIBAIJ, 1847 0, 1848 MatGetDiagonal_MPIBAIJ, 1849 MatDiagonalScale_MPIBAIJ, 1850 MatNorm_MPIBAIJ, 1851 MatAssemblyBegin_MPIBAIJ, 1852 MatAssemblyEnd_MPIBAIJ, 1853 0, 1854 MatSetOption_MPIBAIJ, 1855 MatZeroEntries_MPIBAIJ, 1856 MatZeroRows_MPIBAIJ, 1857 0, 1858 0, 1859 0, 1860 0, 1861 MatGetSize_MPIBAIJ, 1862 MatGetLocalSize_MPIBAIJ, 1863 MatGetOwnershipRange_MPIBAIJ, 1864 0, 1865 0, 1866 0, 1867 0, 1868 MatDuplicate_MPIBAIJ, 1869 0, 1870 0, 1871 0, 1872 0, 1873 0, 1874 MatGetSubMatrices_MPIBAIJ, 1875 MatIncreaseOverlap_MPIBAIJ, 1876 MatGetValues_MPIBAIJ, 1877 0, 1878 MatPrintHelp_MPIBAIJ, 1879 MatScale_MPIBAIJ, 1880 0, 1881 0, 1882 0, 1883 MatGetBlockSize_MPIBAIJ, 1884 0, 1885 0, 1886 0, 1887 0, 1888 0, 1889 0, 1890 MatSetUnfactored_MPIBAIJ, 1891 0, 1892 MatSetValuesBlocked_MPIBAIJ, 1893 0, 1894 0, 1895 0, 1896 MatGetMaps_Petsc}; 1897 1898 #include "pc.h" 1899 EXTERN_C_BEGIN 1900 extern int PCSetUp_BJacobi_BAIJ(PC); 1901 EXTERN_C_END 1902 1903 #undef __FUNC__ 1904 #define __FUNC__ "MatCreateMPIBAIJ" 1905 /*@C 1906 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1907 (block compressed row). For good matrix assembly performance 1908 the user should preallocate the matrix storage by setting the parameters 1909 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1910 performance can be increased by more than a factor of 50. 1911 1912 Collective on MPI_Comm 1913 1914 Input Parameters: 1915 + comm - MPI communicator 1916 . bs - size of blockk 1917 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1918 This value should be the same as the local size used in creating the 1919 y vector for the matrix-vector product y = Ax. 1920 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1921 This value should be the same as the local size used in creating the 1922 x vector for the matrix-vector product y = Ax. 1923 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1924 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1925 . d_nz - number of block nonzeros per block row in diagonal portion of local 1926 submatrix (same for all local rows) 1927 . d_nzz - array containing the number of block nonzeros in the various block rows 1928 of the in diagonal portion of the local (possibly different for each block 1929 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1930 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1931 submatrix (same for all local rows). 1932 - o_nzz - array containing the number of nonzeros in the various block rows of the 1933 off-diagonal portion of the local submatrix (possibly different for 1934 each block row) or PETSC_NULL. 1935 1936 Output Parameter: 1937 . A - the matrix 1938 1939 Options Database Keys: 1940 . -mat_no_unroll - uses code that does not unroll the loops in the 1941 block calculations (much slower) 1942 . -mat_block_size - size of the blocks to use 1943 . -mat_mpi - use the parallel matrix data structures even on one processor 1944 (defaults to using SeqBAIJ format on one processor) 1945 1946 Notes: 1947 The user MUST specify either the local or global matrix dimensions 1948 (possibly both). 1949 1950 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1951 than it must be used on all processors that share the object for that argument. 1952 1953 Storage Information: 1954 For a square global matrix we define each processor's diagonal portion 1955 to be its local rows and the corresponding columns (a square submatrix); 1956 each processor's off-diagonal portion encompasses the remainder of the 1957 local matrix (a rectangular submatrix). 1958 1959 The user can specify preallocated storage for the diagonal part of 1960 the local submatrix with either d_nz or d_nnz (not both). Set 1961 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1962 memory allocation. Likewise, specify preallocated storage for the 1963 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1964 1965 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1966 the figure below we depict these three local rows and all columns (0-11). 1967 1968 .vb 1969 0 1 2 3 4 5 6 7 8 9 10 11 1970 ------------------- 1971 row 3 | o o o d d d o o o o o o 1972 row 4 | o o o d d d o o o o o o 1973 row 5 | o o o d d d o o o o o o 1974 ------------------- 1975 .ve 1976 1977 Thus, any entries in the d locations are stored in the d (diagonal) 1978 submatrix, and any entries in the o locations are stored in the 1979 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1980 stored simply in the MATSEQBAIJ format for compressed row storage. 1981 1982 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1983 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1984 In general, for PDE problems in which most nonzeros are near the diagonal, 1985 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1986 or you will get TERRIBLE performance; see the users' manual chapter on 1987 matrices. 1988 1989 .keywords: matrix, block, aij, compressed row, sparse, parallel 1990 1991 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1992 @*/ 1993 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1994 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1995 { 1996 Mat B; 1997 Mat_MPIBAIJ *b; 1998 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1999 int flag1 = 0,flag2 = 0; 2000 2001 PetscFunctionBegin; 2002 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 2003 2004 MPI_Comm_size(comm,&size); 2005 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr); 2006 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 2007 if (!flag1 && !flag2 && size == 1) { 2008 if (M == PETSC_DECIDE) M = m; 2009 if (N == PETSC_DECIDE) N = n; 2010 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 2011 PetscFunctionReturn(0); 2012 } 2013 2014 *A = 0; 2015 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2016 PLogObjectCreate(B); 2017 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 2018 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 2019 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 2020 2021 B->ops->destroy = MatDestroy_MPIBAIJ; 2022 B->ops->view = MatView_MPIBAIJ; 2023 B->mapping = 0; 2024 B->factor = 0; 2025 B->assembled = PETSC_FALSE; 2026 2027 B->insertmode = NOT_SET_VALUES; 2028 MPI_Comm_rank(comm,&b->rank); 2029 MPI_Comm_size(comm,&b->size); 2030 2031 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2032 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2033 } 2034 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2035 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2036 } 2037 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2038 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2039 } 2040 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2041 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2042 2043 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2044 work[0] = m; work[1] = n; 2045 mbs = m/bs; nbs = n/bs; 2046 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2047 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2048 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2049 } 2050 if (m == PETSC_DECIDE) { 2051 Mbs = M/bs; 2052 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2053 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2054 m = mbs*bs; 2055 } 2056 if (n == PETSC_DECIDE) { 2057 Nbs = N/bs; 2058 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2059 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2060 n = nbs*bs; 2061 } 2062 if (mbs*bs != m || nbs*bs != n) { 2063 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2064 } 2065 2066 b->m = m; B->m = m; 2067 b->n = n; B->n = n; 2068 b->N = N; B->N = N; 2069 b->M = M; B->M = M; 2070 b->bs = bs; 2071 b->bs2 = bs*bs; 2072 b->mbs = mbs; 2073 b->nbs = nbs; 2074 b->Mbs = Mbs; 2075 b->Nbs = Nbs; 2076 2077 /* the information in the maps duplicates the information computed below, eventually 2078 we should remove the duplicate information that is not contained in the maps */ 2079 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2080 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2081 2082 /* build local table of row and column ownerships */ 2083 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 2084 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2085 b->cowners = b->rowners + b->size + 2; 2086 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2087 b->rowners[0] = 0; 2088 for ( i=2; i<=b->size; i++ ) { 2089 b->rowners[i] += b->rowners[i-1]; 2090 } 2091 b->rstart = b->rowners[b->rank]; 2092 b->rend = b->rowners[b->rank+1]; 2093 b->rstart_bs = b->rstart * bs; 2094 b->rend_bs = b->rend * bs; 2095 2096 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2097 b->cowners[0] = 0; 2098 for ( i=2; i<=b->size; i++ ) { 2099 b->cowners[i] += b->cowners[i-1]; 2100 } 2101 b->cstart = b->cowners[b->rank]; 2102 b->cend = b->cowners[b->rank+1]; 2103 b->cstart_bs = b->cstart * bs; 2104 b->cend_bs = b->cend * bs; 2105 2106 2107 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2108 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 2109 PLogObjectParent(B,b->A); 2110 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2111 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 2112 PLogObjectParent(B,b->B); 2113 2114 /* build cache for off array entries formed */ 2115 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 2116 b->donotstash = 0; 2117 b->colmap = 0; 2118 b->garray = 0; 2119 b->roworiented = 1; 2120 2121 /* stuff used in block assembly */ 2122 b->barray = 0; 2123 2124 /* stuff used for matrix vector multiply */ 2125 b->lvec = 0; 2126 b->Mvctx = 0; 2127 2128 /* stuff for MatGetRow() */ 2129 b->rowindices = 0; 2130 b->rowvalues = 0; 2131 b->getrowactive = PETSC_FALSE; 2132 2133 /* hash table stuff */ 2134 b->ht = 0; 2135 b->hd = 0; 2136 b->ht_size = 0; 2137 b->ht_flag = 0; 2138 b->ht_fact = 0; 2139 b->ht_total_ct = 0; 2140 b->ht_insert_ct = 0; 2141 2142 *A = B; 2143 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2144 if (flg) { 2145 double fact = 1.39; 2146 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2147 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2148 if (fact <= 1.0) fact = 1.39; 2149 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2150 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2151 } 2152 ierr = PetscObjectComposeFunction((PetscObject)B,"PCSetUp_BJacobi_C", 2153 "PCSetUp_BJacobi_BAIJ", 2154 (void*)PCSetUp_BJacobi_BAIJ);CHKERRQ(ierr); 2155 PetscFunctionReturn(0); 2156 } 2157 2158 #undef __FUNC__ 2159 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2160 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2161 { 2162 Mat mat; 2163 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2164 int ierr, len=0, flg; 2165 2166 PetscFunctionBegin; 2167 *newmat = 0; 2168 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2169 PLogObjectCreate(mat); 2170 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2171 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2172 mat->ops->destroy = MatDestroy_MPIBAIJ; 2173 mat->ops->view = MatView_MPIBAIJ; 2174 mat->factor = matin->factor; 2175 mat->assembled = PETSC_TRUE; 2176 2177 a->m = mat->m = oldmat->m; 2178 a->n = mat->n = oldmat->n; 2179 a->M = mat->M = oldmat->M; 2180 a->N = mat->N = oldmat->N; 2181 2182 a->bs = oldmat->bs; 2183 a->bs2 = oldmat->bs2; 2184 a->mbs = oldmat->mbs; 2185 a->nbs = oldmat->nbs; 2186 a->Mbs = oldmat->Mbs; 2187 a->Nbs = oldmat->Nbs; 2188 2189 a->rstart = oldmat->rstart; 2190 a->rend = oldmat->rend; 2191 a->cstart = oldmat->cstart; 2192 a->cend = oldmat->cend; 2193 a->size = oldmat->size; 2194 a->rank = oldmat->rank; 2195 mat->insertmode = NOT_SET_VALUES; 2196 a->rowvalues = 0; 2197 a->getrowactive = PETSC_FALSE; 2198 a->barray = 0; 2199 2200 /* hash table stuff */ 2201 a->ht = 0; 2202 a->hd = 0; 2203 a->ht_size = 0; 2204 a->ht_flag = oldmat->ht_flag; 2205 a->ht_fact = oldmat->ht_fact; 2206 a->ht_total_ct = 0; 2207 a->ht_insert_ct = 0; 2208 2209 2210 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2211 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2212 a->cowners = a->rowners + a->size + 2; 2213 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2214 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 2215 if (oldmat->colmap) { 2216 #if defined (USE_CTABLE) 2217 ierr = TableCreateCopy( &a->colmap,oldmat->colmap ); CHKERRQ(ierr); 2218 #else 2219 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2220 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2221 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2222 #endif 2223 } else a->colmap = 0; 2224 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2225 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2226 PLogObjectMemory(mat,len*sizeof(int)); 2227 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2228 } else a->garray = 0; 2229 2230 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2231 PLogObjectParent(mat,a->lvec); 2232 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2233 2234 PLogObjectParent(mat,a->Mvctx); 2235 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2236 PLogObjectParent(mat,a->A); 2237 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2238 PLogObjectParent(mat,a->B); 2239 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2240 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2241 if (flg) { 2242 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2243 } 2244 *newmat = mat; 2245 PetscFunctionReturn(0); 2246 } 2247 2248 #include "sys.h" 2249 2250 #undef __FUNC__ 2251 #define __FUNC__ "MatLoad_MPIBAIJ" 2252 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2253 { 2254 Mat A; 2255 int i, nz, ierr, j,rstart, rend, fd; 2256 Scalar *vals,*buf; 2257 MPI_Comm comm = ((PetscObject)viewer)->comm; 2258 MPI_Status status; 2259 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2260 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2261 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2262 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2263 int dcount,kmax,k,nzcount,tmp; 2264 2265 PetscFunctionBegin; 2266 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2267 2268 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2269 if (!rank) { 2270 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2271 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2272 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2273 if (header[3] < 0) { 2274 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2275 } 2276 } 2277 2278 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2279 M = header[1]; N = header[2]; 2280 2281 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2282 2283 /* 2284 This code adds extra rows to make sure the number of rows is 2285 divisible by the blocksize 2286 */ 2287 Mbs = M/bs; 2288 extra_rows = bs - M + bs*(Mbs); 2289 if (extra_rows == bs) extra_rows = 0; 2290 else Mbs++; 2291 if (extra_rows &&!rank) { 2292 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2293 } 2294 2295 /* determine ownership of all rows */ 2296 mbs = Mbs/size + ((Mbs % size) > rank); 2297 m = mbs * bs; 2298 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2299 browners = rowners + size + 1; 2300 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2301 rowners[0] = 0; 2302 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2303 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2304 rstart = rowners[rank]; 2305 rend = rowners[rank+1]; 2306 2307 /* distribute row lengths to all processors */ 2308 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2309 if (!rank) { 2310 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2311 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2312 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2313 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2314 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2315 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2316 PetscFree(sndcounts); 2317 } else { 2318 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2319 } 2320 2321 if (!rank) { 2322 /* calculate the number of nonzeros on each processor */ 2323 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2324 PetscMemzero(procsnz,size*sizeof(int)); 2325 for ( i=0; i<size; i++ ) { 2326 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2327 procsnz[i] += rowlengths[j]; 2328 } 2329 } 2330 PetscFree(rowlengths); 2331 2332 /* determine max buffer needed and allocate it */ 2333 maxnz = 0; 2334 for ( i=0; i<size; i++ ) { 2335 maxnz = PetscMax(maxnz,procsnz[i]); 2336 } 2337 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2338 2339 /* read in my part of the matrix column indices */ 2340 nz = procsnz[0]; 2341 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2342 mycols = ibuf; 2343 if (size == 1) nz -= extra_rows; 2344 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2345 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2346 2347 /* read in every ones (except the last) and ship off */ 2348 for ( i=1; i<size-1; i++ ) { 2349 nz = procsnz[i]; 2350 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2351 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2352 } 2353 /* read in the stuff for the last proc */ 2354 if ( size != 1 ) { 2355 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2356 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2357 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2358 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2359 } 2360 PetscFree(cols); 2361 } else { 2362 /* determine buffer space needed for message */ 2363 nz = 0; 2364 for ( i=0; i<m; i++ ) { 2365 nz += locrowlens[i]; 2366 } 2367 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2368 mycols = ibuf; 2369 /* receive message of column indices*/ 2370 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2371 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2372 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2373 } 2374 2375 /* loop over local rows, determining number of off diagonal entries */ 2376 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2377 odlens = dlens + (rend-rstart); 2378 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2379 PetscMemzero(mask,3*Mbs*sizeof(int)); 2380 masked1 = mask + Mbs; 2381 masked2 = masked1 + Mbs; 2382 rowcount = 0; nzcount = 0; 2383 for ( i=0; i<mbs; i++ ) { 2384 dcount = 0; 2385 odcount = 0; 2386 for ( j=0; j<bs; j++ ) { 2387 kmax = locrowlens[rowcount]; 2388 for ( k=0; k<kmax; k++ ) { 2389 tmp = mycols[nzcount++]/bs; 2390 if (!mask[tmp]) { 2391 mask[tmp] = 1; 2392 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2393 else masked1[dcount++] = tmp; 2394 } 2395 } 2396 rowcount++; 2397 } 2398 2399 dlens[i] = dcount; 2400 odlens[i] = odcount; 2401 2402 /* zero out the mask elements we set */ 2403 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2404 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2405 } 2406 2407 /* create our matrix */ 2408 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2409 CHKERRQ(ierr); 2410 A = *newmat; 2411 MatSetOption(A,MAT_COLUMNS_SORTED); 2412 2413 if (!rank) { 2414 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2415 /* read in my part of the matrix numerical values */ 2416 nz = procsnz[0]; 2417 vals = buf; 2418 mycols = ibuf; 2419 if (size == 1) nz -= extra_rows; 2420 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2421 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2422 2423 /* insert into matrix */ 2424 jj = rstart*bs; 2425 for ( i=0; i<m; i++ ) { 2426 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2427 mycols += locrowlens[i]; 2428 vals += locrowlens[i]; 2429 jj++; 2430 } 2431 /* read in other processors (except the last one) and ship out */ 2432 for ( i=1; i<size-1; i++ ) { 2433 nz = procsnz[i]; 2434 vals = buf; 2435 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2436 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2437 } 2438 /* the last proc */ 2439 if ( size != 1 ){ 2440 nz = procsnz[i] - extra_rows; 2441 vals = buf; 2442 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2443 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2444 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2445 } 2446 PetscFree(procsnz); 2447 } else { 2448 /* receive numeric values */ 2449 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2450 2451 /* receive message of values*/ 2452 vals = buf; 2453 mycols = ibuf; 2454 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2455 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2456 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2457 2458 /* insert into matrix */ 2459 jj = rstart*bs; 2460 for ( i=0; i<m; i++ ) { 2461 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2462 mycols += locrowlens[i]; 2463 vals += locrowlens[i]; 2464 jj++; 2465 } 2466 } 2467 PetscFree(locrowlens); 2468 PetscFree(buf); 2469 PetscFree(ibuf); 2470 PetscFree(rowners); 2471 PetscFree(dlens); 2472 PetscFree(mask); 2473 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2474 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2475 PetscFunctionReturn(0); 2476 } 2477 2478 2479 2480 #undef __FUNC__ 2481 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2482 /*@ 2483 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2484 2485 Input Parameters: 2486 . mat - the matrix 2487 . fact - factor 2488 2489 Collective on Mat 2490 2491 Notes: 2492 This can also be set by the command line option: -mat_use_hash_table fact 2493 2494 .keywords: matrix, hashtable, factor, HT 2495 2496 .seealso: MatSetOption() 2497 @*/ 2498 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2499 { 2500 Mat_MPIBAIJ *baij; 2501 2502 PetscFunctionBegin; 2503 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2504 if (mat->type != MATMPIBAIJ) { 2505 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2506 } 2507 baij = (Mat_MPIBAIJ*) mat->data; 2508 baij->ht_fact = fact; 2509 PetscFunctionReturn(0); 2510 } 2511