1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.100 1998/01/27 01:23:50 balay Exp balay $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 23 static int CreateColmap_MPIBAIJ_Private(Mat mat) 24 { 25 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 26 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27 int nbs = B->nbs,i,bs=B->bs;; 28 29 PetscFunctionBegin; 30 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 31 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 32 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 34 PetscFunctionReturn(0); 35 } 36 37 #define CHUNKSIZE 10 38 39 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 40 { \ 41 \ 42 brow = row/bs; \ 43 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44 rmax = aimax[brow]; nrow = ailen[brow]; \ 45 bcol = col/bs; \ 46 ridx = row % bs; cidx = col % bs; \ 47 low = 0; high = nrow; \ 48 while (high-low > 3) { \ 49 t = (low+high)/2; \ 50 if (rp[t] > bcol) high = t; \ 51 else low = t; \ 52 } \ 53 for ( _i=low; _i<high; _i++ ) { \ 54 if (rp[_i] > bcol) break; \ 55 if (rp[_i] == bcol) { \ 56 bap = ap + bs2*_i + bs*cidx + ridx; \ 57 if (addv == ADD_VALUES) *bap += value; \ 58 else *bap = value; \ 59 goto a_noinsert; \ 60 } \ 61 } \ 62 if (a->nonew == 1) goto a_noinsert; \ 63 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 64 if (nrow >= rmax) { \ 65 /* there is no extra room in row, therefore enlarge */ \ 66 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 67 Scalar *new_a; \ 68 \ 69 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 70 \ 71 /* malloc new storage space */ \ 72 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 73 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 74 new_j = (int *) (new_a + bs2*new_nz); \ 75 new_i = new_j + new_nz; \ 76 \ 77 /* copy over old data into new slots */ \ 78 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 79 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 80 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 81 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 82 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 83 len*sizeof(int)); \ 84 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 85 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 86 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 87 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 88 /* free up old matrix storage */ \ 89 PetscFree(a->a); \ 90 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 91 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 92 a->singlemalloc = 1; \ 93 \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 96 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 97 a->maxnz += bs2*CHUNKSIZE; \ 98 a->reallocs++; \ 99 a->nz++; \ 100 } \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for ( ii=N; ii>=_i; ii-- ) { \ 104 rp[ii+1] = rp[ii]; \ 105 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 106 } \ 107 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 a_noinsert:; \ 111 ailen[brow] = nrow; \ 112 } 113 114 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 brow = row/bs; \ 118 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119 rmax = bimax[brow]; nrow = bilen[brow]; \ 120 bcol = col/bs; \ 121 ridx = row % bs; cidx = col % bs; \ 122 low = 0; high = nrow; \ 123 while (high-low > 3) { \ 124 t = (low+high)/2; \ 125 if (rp[t] > bcol) high = t; \ 126 else low = t; \ 127 } \ 128 for ( _i=low; _i<high; _i++ ) { \ 129 if (rp[_i] > bcol) break; \ 130 if (rp[_i] == bcol) { \ 131 bap = ap + bs2*_i + bs*cidx + ridx; \ 132 if (addv == ADD_VALUES) *bap += value; \ 133 else *bap = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (b->nonew == 1) goto b_noinsert; \ 138 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + bs2*new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158 len*sizeof(int)); \ 159 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163 /* free up old matrix storage */ \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 172 b->maxnz += bs2*CHUNKSIZE; \ 173 b->reallocs++; \ 174 b->nz++; \ 175 } \ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for ( ii=N; ii>=_i; ii-- ) { \ 179 rp[ii+1] = rp[ii]; \ 180 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181 } \ 182 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNC__ 190 #define __FUNC__ "MatSetValues_MPIBAIJ" 191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 194 Scalar value; 195 int ierr,i,j,row,col; 196 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 197 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 198 int cend_orig=baij->cend_bs,bs=baij->bs; 199 200 /* Some Variables required in the macro */ 201 Mat A = baij->A; 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204 Scalar *aa=a->a; 205 206 Mat B = baij->B; 207 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209 Scalar *ba=b->a; 210 211 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212 int low,high,t,ridx,cidx,bs2=a->bs2; 213 Scalar *ap,*bap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 #if defined(USE_PETSC_BOPT_g) 218 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig){ 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 #if defined(USE_PETSC_BOPT_g) 231 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 240 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 241 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 244 B = baij->B; 245 b = (Mat_SeqBAIJ *) (B)->data; 246 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 247 ba=b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !baij->donotstash) { 257 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!baij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272 #undef __FUNC__ 273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277 Scalar *value,*barray=baij->barray; 278 int ierr,i,j,ii,jj,row,col,k,l; 279 int roworiented = baij->roworiented,rstart=baij->rstart ; 280 int rend=baij->rend,cstart=baij->cstart,stepval; 281 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282 283 if(!barray) { 284 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 285 } 286 287 if (roworiented) { 288 stepval = (n-1)*bs; 289 } else { 290 stepval = (m-1)*bs; 291 } 292 for ( i=0; i<m; i++ ) { 293 #if defined(USE_PETSC_BOPT_g) 294 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296 #endif 297 if (im[i] >= rstart && im[i] < rend) { 298 row = im[i] - rstart; 299 for ( j=0; j<n; j++ ) { 300 /* If NumCol = 1 then a copy is not required */ 301 if ((roworiented) && (n == 1)) { 302 barray = v + i*bs2; 303 } else if((!roworiented) && (m == 1)) { 304 barray = v + j*bs2; 305 } else { /* Here a copy is required */ 306 if (roworiented) { 307 value = v + i*(stepval+bs)*bs + j*bs; 308 } else { 309 value = v + j*(stepval+bs)*bs + i*bs; 310 } 311 for ( ii=0; ii<bs; ii++,value+=stepval ) { 312 for (jj=0; jj<bs; jj++ ) { 313 *barray++ = *value++; 314 } 315 } 316 barray -=bs2; 317 } 318 319 if (in[j] >= cstart && in[j] < cend){ 320 col = in[j] - cstart; 321 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322 } 323 #if defined(USE_PETSC_BOPT_g) 324 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326 #endif 327 else { 328 if (mat->was_assembled) { 329 if (!baij->colmap) { 330 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331 } 332 333 #if defined(USE_PETSC_BOPT_g) 334 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335 #endif 336 col = (baij->colmap[in[j]] - 1)/bs; 337 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339 col = in[j]; 340 } 341 } 342 else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (!baij->donotstash) { 348 if (roworiented ) { 349 row = im[i]*bs; 350 value = v + i*(stepval+bs)*bs; 351 for ( j=0; j<bs; j++,row++ ) { 352 for ( k=0; k<n; k++ ) { 353 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355 } 356 } 357 } 358 } else { 359 for ( j=0; j<n; j++ ) { 360 value = v + j*(stepval+bs)*bs + i*bs; 361 col = in[j]*bs; 362 for ( k=0; k<bs; k++,col++,value+=stepval) { 363 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365 } 366 } 367 } 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 #include <math.h> 375 #define HASH_KEY 0.6180339887 376 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 378 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 379 #undef __FUNC__ 380 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 381 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 382 { 383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 384 int ierr,i,j,row,col; 385 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 386 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 387 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 388 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 389 double tmp; 390 Scalar ** HD = baij->hd,value; 391 392 393 PetscFunctionBegin; 394 395 for ( i=0; i<m; i++ ) { 396 #if defined(USE_PETSC_BOPT_g) 397 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 398 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 399 #endif 400 row = im[i]; 401 if (row >= rstart_orig && row < rend_orig) { 402 for ( j=0; j<n; j++ ) { 403 col = in[j]; 404 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 405 /* Look up into the Hash Table */ 406 key = (row/bs)*Nbs+(col/bs)+1; 407 h1 = HASH(size,key,tmp); 408 409 410 idx = h1; 411 #if defined(USE_PETSC_BOPT_g) 412 insert_ct++; 413 total_ct++; 414 if (HT[idx] != key) { 415 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 416 if (idx == size) { 417 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 418 if (idx == h1) { 419 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 420 } 421 } 422 } 423 #else 424 if (HT[idx] != key) { 425 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 426 if (idx == size) { 427 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 428 if (idx == h1) { 429 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 430 } 431 } 432 } 433 #endif 434 /* A HASH table entry is found, so insert the values at the correct address */ 435 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 436 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 437 break; 438 } 439 } else { 440 if (roworiented && !baij->donotstash) { 441 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 442 } else { 443 if (!baij->donotstash) { 444 row = im[i]; 445 for ( j=0; j<n; j++ ) { 446 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 447 } 448 } 449 } 450 } 451 } 452 #if defined(USE_PETSC_BOPT_g) 453 baij->ht_total_ct = total_ct; 454 baij->ht_insert_ct = insert_ct; 455 #endif 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNC__ 460 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 461 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 462 { 463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 464 int ierr,i,j,ii,jj,row,col,k,l; 465 int roworiented = baij->roworiented,rstart=baij->rstart ; 466 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 467 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 468 469 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 470 double tmp; 471 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 472 473 PetscFunctionBegin; 474 475 if (roworiented) { 476 stepval = (n-1)*bs; 477 } else { 478 stepval = (m-1)*bs; 479 } 480 for ( i=0; i<m; i++ ) { 481 #if defined(USE_PETSC_BOPT_g) 482 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 483 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 484 #endif 485 row = im[i]; 486 v_t = v + i*bs2; 487 if (row >= rstart && row < rend) { 488 for ( j=0; j<n; j++ ) { 489 col = in[j]; 490 491 /* Look up into the Hash Table */ 492 key = row*Nbs+col+1; 493 h1 = HASH(size,key,tmp); 494 495 idx = h1; 496 #if defined(USE_PETSC_BOPT_g) 497 total_ct++; 498 insert_ct++; 499 if (HT[idx] != key) { 500 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 501 if (idx == size) { 502 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 503 if (idx == h1) { 504 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 505 } 506 } 507 } 508 #else 509 if (HT[idx] != key) { 510 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 511 if (idx == size) { 512 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 513 if (idx == h1) { 514 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 515 } 516 } 517 } 518 #endif 519 baij_a = HD[idx]; 520 if (roworiented) { 521 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 522 /* value = v + (i*(stepval+bs)+j)*bs; */ 523 value = v_t; 524 v_t += bs; 525 if (addv == ADD_VALUES) { 526 for ( ii=0; ii<bs; ii++,value+=stepval) { 527 for ( jj=ii; jj<bs2; jj+=bs ) { 528 baij_a[jj] += *value++; 529 } 530 } 531 } else { 532 for ( ii=0; ii<bs; ii++,value+=stepval) { 533 for ( jj=ii; jj<bs2; jj+=bs ) { 534 baij_a[jj] = *value++; 535 } 536 } 537 } 538 } else { 539 value = v + j*(stepval+bs)*bs + i*bs; 540 if (addv == ADD_VALUES) { 541 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 542 for ( jj=0; jj<bs; jj++ ) { 543 baij_a[jj] += *value++; 544 } 545 } 546 } else { 547 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 548 for ( jj=0; jj<bs; jj++ ) { 549 baij_a[jj] = *value++; 550 } 551 } 552 } 553 } 554 } 555 } else { 556 if (!baij->donotstash) { 557 if (roworiented ) { 558 row = im[i]*bs; 559 value = v + i*(stepval+bs)*bs; 560 for ( j=0; j<bs; j++,row++ ) { 561 for ( k=0; k<n; k++ ) { 562 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 563 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 564 } 565 } 566 } 567 } else { 568 for ( j=0; j<n; j++ ) { 569 value = v + j*(stepval+bs)*bs + i*bs; 570 col = in[j]*bs; 571 for ( k=0; k<bs; k++,col++,value+=stepval) { 572 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 573 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 574 } 575 } 576 } 577 } 578 } 579 } 580 } 581 #if defined(USE_PETSC_BOPT_g) 582 baij->ht_total_ct = total_ct; 583 baij->ht_insert_ct = insert_ct; 584 #endif 585 PetscFunctionReturn(0); 586 } 587 #undef __FUNC__ 588 #define __FUNC__ "MatGetValues_MPIBAIJ" 589 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 590 { 591 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 592 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 593 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 594 595 for ( i=0; i<m; i++ ) { 596 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 597 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 598 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 599 row = idxm[i] - bsrstart; 600 for ( j=0; j<n; j++ ) { 601 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 602 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 603 if (idxn[j] >= bscstart && idxn[j] < bscend){ 604 col = idxn[j] - bscstart; 605 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 606 } else { 607 if (!baij->colmap) { 608 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 609 } 610 if((baij->colmap[idxn[j]/bs]-1 < 0) || 611 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 612 else { 613 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 614 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 615 } 616 } 617 } 618 } else { 619 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 620 } 621 } 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNC__ 626 #define __FUNC__ "MatNorm_MPIBAIJ" 627 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 628 { 629 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 630 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 631 int ierr, i,bs2=baij->bs2; 632 double sum = 0.0; 633 Scalar *v; 634 635 PetscFunctionBegin; 636 if (baij->size == 1) { 637 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 638 } else { 639 if (type == NORM_FROBENIUS) { 640 v = amat->a; 641 for (i=0; i<amat->nz*bs2; i++ ) { 642 #if defined(USE_PETSC_COMPLEX) 643 sum += real(conj(*v)*(*v)); v++; 644 #else 645 sum += (*v)*(*v); v++; 646 #endif 647 } 648 v = bmat->a; 649 for (i=0; i<bmat->nz*bs2; i++ ) { 650 #if defined(USE_PETSC_COMPLEX) 651 sum += real(conj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 657 *norm = sqrt(*norm); 658 } else { 659 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 660 } 661 } 662 PetscFunctionReturn(0); 663 } 664 665 #undef __FUNC__ 666 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 667 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 668 { 669 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 670 MPI_Comm comm = mat->comm; 671 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 672 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 673 MPI_Request *send_waits,*recv_waits; 674 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 675 InsertMode addv; 676 Scalar *rvalues,*svalues; 677 678 PetscFunctionBegin; 679 /* make sure all processors are either in INSERTMODE or ADDMODE */ 680 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 681 if (addv == (ADD_VALUES|INSERT_VALUES)) { 682 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 683 } 684 mat->insertmode = addv; /* in case this processor had no cache */ 685 686 /* first count number of contributors to each processor */ 687 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 688 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 689 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 690 for ( i=0; i<baij->stash.n; i++ ) { 691 idx = baij->stash.idx[i]; 692 for ( j=0; j<size; j++ ) { 693 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 694 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 695 } 696 } 697 } 698 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 699 700 /* inform other processors of number of messages and max length*/ 701 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 702 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 703 nreceives = work[rank]; 704 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 705 nmax = work[rank]; 706 PetscFree(work); 707 708 /* post receives: 709 1) each message will consist of ordered pairs 710 (global index,value) we store the global index as a double 711 to simplify the message passing. 712 2) since we don't know how long each individual message is we 713 allocate the largest needed buffer for each receive. Potentially 714 this is a lot of wasted space. 715 716 717 This could be done better. 718 */ 719 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 720 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 721 for ( i=0; i<nreceives; i++ ) { 722 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 723 } 724 725 /* do sends: 726 1) starts[i] gives the starting index in svalues for stuff going to 727 the ith processor 728 */ 729 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 730 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 731 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 732 starts[0] = 0; 733 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 734 for ( i=0; i<baij->stash.n; i++ ) { 735 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 736 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 737 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 738 } 739 PetscFree(owner); 740 starts[0] = 0; 741 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 742 count = 0; 743 for ( i=0; i<size; i++ ) { 744 if (procs[i]) { 745 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 746 } 747 } 748 PetscFree(starts); PetscFree(nprocs); 749 750 /* Free cache space */ 751 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 752 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 753 754 baij->svalues = svalues; baij->rvalues = rvalues; 755 baij->nsends = nsends; baij->nrecvs = nreceives; 756 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 757 baij->rmax = nmax; 758 759 PetscFunctionReturn(0); 760 } 761 762 /* 763 Creates the hash table, and sets the table 764 This table is created only once. 765 If new entried need to be added to the matrix 766 then the hash table has to be destroyed and 767 recreated. 768 */ 769 #undef __FUNC__ 770 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 771 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 772 { 773 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 774 Mat A = baij->A, B=baij->B; 775 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 776 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 777 int size,ct=0,max=0,bs2=baij->bs2,rstart=baij->rstart; 778 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 779 int *HT,key; 780 Scalar **HD; 781 double tmp; 782 783 PetscFunctionBegin; 784 baij->ht_size=(int)(factor*nz); 785 size = baij->ht_size; 786 787 if (baij->ht) { 788 PetscFunctionReturn(0); 789 } 790 791 /* Allocate Memory for Hash Table */ 792 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 793 baij->ht = (int*)(baij->hd + size); 794 HD = baij->hd; 795 HT = baij->ht; 796 797 798 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 799 800 801 /* Loop Over A */ 802 for ( i=0; i<a->mbs; i++ ) { 803 for ( j=ai[i]; j<ai[i+1]; j++ ) { 804 row = i+rstart; 805 col = aj[j]+cstart; 806 807 key = row*Nbs + col + 1; 808 h1 = HASH(size,key,tmp); 809 for ( k=0; k<size; k++ ){ 810 if (HT[(h1+k)%size] == 0.0) { 811 HT[(h1+k)%size] = key; 812 HD[(h1+k)%size] = a->a + j*bs2; 813 break; 814 #if defined(USE_PETSC_BOPT_g) 815 } else { 816 ct++; 817 #endif 818 } 819 } 820 #if defined(USE_PETSC_BOPT_g) 821 if (k> max) max = k; 822 #endif 823 } 824 } 825 /* Loop Over B */ 826 for ( i=0; i<b->mbs; i++ ) { 827 for ( j=bi[i]; j<bi[i+1]; j++ ) { 828 row = i+rstart; 829 col = garray[bj[j]]; 830 key = row*Nbs + col + 1; 831 h1 = HASH(size,key,tmp); 832 for ( k=0; k<size; k++ ){ 833 if (HT[(h1+k)%size] == 0.0) { 834 HT[(h1+k)%size] = key; 835 HD[(h1+k)%size] = b->a + j*bs2; 836 break; 837 #if defined(USE_PETSC_BOPT_g) 838 } else { 839 ct++; 840 #endif 841 } 842 } 843 #if defined(USE_PETSC_BOPT_g) 844 if (k> max) max = k; 845 #endif 846 } 847 } 848 849 /* Print Summary */ 850 #if defined(USE_PETSC_BOPT_g) 851 for ( i=0,j=0; i<size; i++) 852 if (HT[i]) {j++;} 853 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 854 (j== 0)? 0.0:((double)(ct+j))/j,max); 855 #endif 856 PetscFunctionReturn(0); 857 } 858 859 #undef __FUNC__ 860 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 861 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 862 { 863 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 864 MPI_Status *send_status,recv_status; 865 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 866 int bs=baij->bs,row,col,other_disassembled,flg; 867 Scalar *values,val; 868 InsertMode addv = mat->insertmode; 869 870 PetscFunctionBegin; 871 /* wait on receives */ 872 while (count) { 873 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 874 /* unpack receives into our local space */ 875 values = baij->rvalues + 3*imdex*baij->rmax; 876 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 877 n = n/3; 878 for ( i=0; i<n; i++ ) { 879 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 880 col = (int) PetscReal(values[3*i+1]); 881 val = values[3*i+2]; 882 if (col >= baij->cstart*bs && col < baij->cend*bs) { 883 col -= baij->cstart*bs; 884 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 885 } else { 886 if (mat->was_assembled) { 887 if (!baij->colmap) { 888 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 889 } 890 col = (baij->colmap[col/bs]) - 1 + col%bs; 891 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 892 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 893 col = (int) PetscReal(values[3*i+1]); 894 } 895 } 896 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 897 } 898 } 899 count--; 900 } 901 PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 902 903 /* wait on sends */ 904 if (baij->nsends) { 905 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 906 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 907 PetscFree(send_status); 908 } 909 PetscFree(baij->send_waits); PetscFree(baij->svalues); 910 911 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 912 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 913 914 /* determine if any processor has disassembled, if so we must 915 also disassemble ourselfs, in order that we may reassemble. */ 916 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 917 if (mat->was_assembled && !other_disassembled) { 918 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 919 } 920 921 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 922 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 923 } 924 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 925 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 926 927 #if defined(USE_PETSC_BOPT_g) 928 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 929 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 930 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 931 baij->ht_total_ct = 0; 932 baij->ht_insert_ct = 0; 933 } 934 #endif 935 ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr); 936 if (flg && !baij->ht && mode== MAT_FINAL_ASSEMBLY) { 937 double fact = 1.39; 938 ierr = OptionsGetDouble(PETSC_NULL,"-use_hash",&fact,&flg); CHKERRQ(ierr); 939 if (fact <= 1.0) fact = 1.39; 940 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Hash table Factor used %5.2f\n",fact); 941 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,fact); CHKERRQ(ierr); 942 mat->ops.setvalues = MatSetValues_MPIBAIJ_HT; 943 mat->ops.setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 944 } 945 946 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 947 PetscFunctionReturn(0); 948 } 949 950 #undef __FUNC__ 951 #define __FUNC__ "MatView_MPIBAIJ_Binary" 952 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 953 { 954 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 955 int ierr; 956 957 PetscFunctionBegin; 958 if (baij->size == 1) { 959 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 960 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNC__ 965 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 966 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 967 { 968 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 969 int ierr, format,rank,bs = baij->bs; 970 FILE *fd; 971 ViewerType vtype; 972 973 PetscFunctionBegin; 974 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 975 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 976 ierr = ViewerGetFormat(viewer,&format); 977 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 978 MatInfo info; 979 MPI_Comm_rank(mat->comm,&rank); 980 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 981 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 982 PetscSequentialPhaseBegin(mat->comm,1); 983 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 984 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 985 baij->bs,(int)info.memory); 986 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 987 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 988 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 989 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 990 fflush(fd); 991 PetscSequentialPhaseEnd(mat->comm,1); 992 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 995 PetscPrintf(mat->comm," block size is %d\n",bs); 996 PetscFunctionReturn(0); 997 } 998 } 999 1000 if (vtype == DRAW_VIEWER) { 1001 Draw draw; 1002 PetscTruth isnull; 1003 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 1004 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1005 } 1006 1007 if (vtype == ASCII_FILE_VIEWER) { 1008 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1009 PetscSequentialPhaseBegin(mat->comm,1); 1010 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 1011 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 1012 baij->cstart*bs,baij->cend*bs); 1013 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1014 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 1015 fflush(fd); 1016 PetscSequentialPhaseEnd(mat->comm,1); 1017 } else { 1018 int size = baij->size; 1019 rank = baij->rank; 1020 if (size == 1) { 1021 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1022 } else { 1023 /* assemble the entire matrix onto first processor. */ 1024 Mat A; 1025 Mat_SeqBAIJ *Aloc; 1026 int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 1027 int mbs=baij->mbs; 1028 Scalar *a; 1029 1030 if (!rank) { 1031 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1032 } else { 1033 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1034 } 1035 PLogObjectParent(mat,A); 1036 1037 /* copy over the A part */ 1038 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1039 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1040 row = baij->rstart; 1041 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1042 1043 for ( i=0; i<mbs; i++ ) { 1044 rvals[0] = bs*(baij->rstart + i); 1045 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1046 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1047 col = (baij->cstart+aj[j])*bs; 1048 for (k=0; k<bs; k++ ) { 1049 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1050 col++; a += bs; 1051 } 1052 } 1053 } 1054 /* copy over the B part */ 1055 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1056 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1057 row = baij->rstart*bs; 1058 for ( i=0; i<mbs; i++ ) { 1059 rvals[0] = bs*(baij->rstart + i); 1060 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1061 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1062 col = baij->garray[aj[j]]*bs; 1063 for (k=0; k<bs; k++ ) { 1064 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1065 col++; a += bs; 1066 } 1067 } 1068 } 1069 PetscFree(rvals); 1070 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1071 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1072 /* 1073 Everyone has to call to draw the matrix since the graphics waits are 1074 synchronized across all processors that share the Draw object 1075 */ 1076 if (!rank || vtype == DRAW_VIEWER) { 1077 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1078 } 1079 ierr = MatDestroy(A); CHKERRQ(ierr); 1080 } 1081 } 1082 PetscFunctionReturn(0); 1083 } 1084 1085 1086 1087 #undef __FUNC__ 1088 #define __FUNC__ "MatView_MPIBAIJ" 1089 int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 1090 { 1091 Mat mat = (Mat) obj; 1092 int ierr; 1093 ViewerType vtype; 1094 1095 PetscFunctionBegin; 1096 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1097 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 1098 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 1099 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1100 } else if (vtype == BINARY_FILE_VIEWER) { 1101 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1102 } 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNC__ 1107 #define __FUNC__ "MatDestroy_MPIBAIJ" 1108 int MatDestroy_MPIBAIJ(PetscObject obj) 1109 { 1110 Mat mat = (Mat) obj; 1111 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1112 int ierr; 1113 1114 PetscFunctionBegin; 1115 #if defined(USE_PETSC_LOG) 1116 PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 1117 #endif 1118 1119 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1120 PetscFree(baij->rowners); 1121 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1122 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1123 if (baij->colmap) PetscFree(baij->colmap); 1124 if (baij->garray) PetscFree(baij->garray); 1125 if (baij->lvec) VecDestroy(baij->lvec); 1126 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1127 if (baij->rowvalues) PetscFree(baij->rowvalues); 1128 if (baij->barray) PetscFree(baij->barray); 1129 if (baij->hd) PetscFree(baij->hd); 1130 PetscFree(baij); 1131 PLogObjectDestroy(mat); 1132 PetscHeaderDestroy(mat); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 #undef __FUNC__ 1137 #define __FUNC__ "MatMult_MPIBAIJ" 1138 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1139 { 1140 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1141 int ierr, nt; 1142 1143 PetscFunctionBegin; 1144 VecGetLocalSize_Fast(xx,nt); 1145 if (nt != a->n) { 1146 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1147 } 1148 VecGetLocalSize_Fast(yy,nt); 1149 if (nt != a->m) { 1150 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1151 } 1152 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1153 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 1154 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1155 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1156 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNC__ 1161 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1162 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1163 { 1164 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1165 int ierr; 1166 1167 PetscFunctionBegin; 1168 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1169 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1170 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1171 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNC__ 1176 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1177 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1178 { 1179 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1180 int ierr; 1181 1182 PetscFunctionBegin; 1183 /* do nondiagonal part */ 1184 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1185 /* send it on its way */ 1186 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1187 /* do local part */ 1188 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1189 /* receive remote parts: note this assumes the values are not actually */ 1190 /* inserted in yy until the next line, which is true for my implementation*/ 1191 /* but is not perhaps always true. */ 1192 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNC__ 1197 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1198 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1199 { 1200 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1201 int ierr; 1202 1203 PetscFunctionBegin; 1204 /* do nondiagonal part */ 1205 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1206 /* send it on its way */ 1207 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1208 /* do local part */ 1209 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1210 /* receive remote parts: note this assumes the values are not actually */ 1211 /* inserted in yy until the next line, which is true for my implementation*/ 1212 /* but is not perhaps always true. */ 1213 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 /* 1218 This only works correctly for square matrices where the subblock A->A is the 1219 diagonal block 1220 */ 1221 #undef __FUNC__ 1222 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1223 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1224 { 1225 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1226 int ierr; 1227 1228 PetscFunctionBegin; 1229 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1230 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNC__ 1235 #define __FUNC__ "MatScale_MPIBAIJ" 1236 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1237 { 1238 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1239 int ierr; 1240 1241 PetscFunctionBegin; 1242 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1243 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 1247 #undef __FUNC__ 1248 #define __FUNC__ "MatGetSize_MPIBAIJ" 1249 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1250 { 1251 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1252 1253 PetscFunctionBegin; 1254 if (m) *m = mat->M; 1255 if (n) *n = mat->N; 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNC__ 1260 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1261 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1262 { 1263 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1264 1265 PetscFunctionBegin; 1266 *m = mat->m; *n = mat->N; 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNC__ 1271 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1272 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1273 { 1274 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1275 1276 PetscFunctionBegin; 1277 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1278 PetscFunctionReturn(0); 1279 } 1280 1281 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1282 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1283 1284 #undef __FUNC__ 1285 #define __FUNC__ "MatGetRow_MPIBAIJ" 1286 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1287 { 1288 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1289 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1290 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1291 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1292 int *cmap, *idx_p,cstart = mat->cstart; 1293 1294 PetscFunctionBegin; 1295 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1296 mat->getrowactive = PETSC_TRUE; 1297 1298 if (!mat->rowvalues && (idx || v)) { 1299 /* 1300 allocate enough space to hold information from the longest row. 1301 */ 1302 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1303 int max = 1,mbs = mat->mbs,tmp; 1304 for ( i=0; i<mbs; i++ ) { 1305 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1306 if (max < tmp) { max = tmp; } 1307 } 1308 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1309 CHKPTRQ(mat->rowvalues); 1310 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1311 } 1312 1313 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1314 lrow = row - brstart; 1315 1316 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1317 if (!v) {pvA = 0; pvB = 0;} 1318 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1319 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1320 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1321 nztot = nzA + nzB; 1322 1323 cmap = mat->garray; 1324 if (v || idx) { 1325 if (nztot) { 1326 /* Sort by increasing column numbers, assuming A and B already sorted */ 1327 int imark = -1; 1328 if (v) { 1329 *v = v_p = mat->rowvalues; 1330 for ( i=0; i<nzB; i++ ) { 1331 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1332 else break; 1333 } 1334 imark = i; 1335 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1336 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1337 } 1338 if (idx) { 1339 *idx = idx_p = mat->rowindices; 1340 if (imark > -1) { 1341 for ( i=0; i<imark; i++ ) { 1342 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1343 } 1344 } else { 1345 for ( i=0; i<nzB; i++ ) { 1346 if (cmap[cworkB[i]/bs] < cstart) 1347 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1348 else break; 1349 } 1350 imark = i; 1351 } 1352 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1353 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1354 } 1355 } else { 1356 if (idx) *idx = 0; 1357 if (v) *v = 0; 1358 } 1359 } 1360 *nz = nztot; 1361 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1362 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNC__ 1367 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1368 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1369 { 1370 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1371 1372 PetscFunctionBegin; 1373 if (baij->getrowactive == PETSC_FALSE) { 1374 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1375 } 1376 baij->getrowactive = PETSC_FALSE; 1377 PetscFunctionReturn(0); 1378 } 1379 1380 #undef __FUNC__ 1381 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1382 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1383 { 1384 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1385 1386 PetscFunctionBegin; 1387 *bs = baij->bs; 1388 PetscFunctionReturn(0); 1389 } 1390 1391 #undef __FUNC__ 1392 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1393 int MatZeroEntries_MPIBAIJ(Mat A) 1394 { 1395 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1396 int ierr; 1397 1398 PetscFunctionBegin; 1399 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1400 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNC__ 1405 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1406 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1407 { 1408 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1409 Mat A = a->A, B = a->B; 1410 int ierr; 1411 double isend[5], irecv[5]; 1412 1413 PetscFunctionBegin; 1414 info->block_size = (double)a->bs; 1415 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1416 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1417 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1418 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1419 if (flag == MAT_LOCAL) { 1420 info->nz_used = isend[0]; 1421 info->nz_allocated = isend[1]; 1422 info->nz_unneeded = isend[2]; 1423 info->memory = isend[3]; 1424 info->mallocs = isend[4]; 1425 } else if (flag == MAT_GLOBAL_MAX) { 1426 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 1427 info->nz_used = irecv[0]; 1428 info->nz_allocated = irecv[1]; 1429 info->nz_unneeded = irecv[2]; 1430 info->memory = irecv[3]; 1431 info->mallocs = irecv[4]; 1432 } else if (flag == MAT_GLOBAL_SUM) { 1433 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 1434 info->nz_used = irecv[0]; 1435 info->nz_allocated = irecv[1]; 1436 info->nz_unneeded = irecv[2]; 1437 info->memory = irecv[3]; 1438 info->mallocs = irecv[4]; 1439 } 1440 info->rows_global = (double)a->M; 1441 info->columns_global = (double)a->N; 1442 info->rows_local = (double)a->m; 1443 info->columns_local = (double)a->N; 1444 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1445 info->fill_ratio_needed = 0; 1446 info->factor_mallocs = 0; 1447 PetscFunctionReturn(0); 1448 } 1449 1450 #undef __FUNC__ 1451 #define __FUNC__ "MatSetOption_MPIBAIJ" 1452 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1453 { 1454 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1455 1456 PetscFunctionBegin; 1457 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1458 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1459 op == MAT_COLUMNS_UNSORTED || 1460 op == MAT_COLUMNS_SORTED || 1461 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1462 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1463 MatSetOption(a->A,op); 1464 MatSetOption(a->B,op); 1465 } else if (op == MAT_ROW_ORIENTED) { 1466 a->roworiented = 1; 1467 MatSetOption(a->A,op); 1468 MatSetOption(a->B,op); 1469 } else if (op == MAT_ROWS_SORTED || 1470 op == MAT_ROWS_UNSORTED || 1471 op == MAT_SYMMETRIC || 1472 op == MAT_STRUCTURALLY_SYMMETRIC || 1473 op == MAT_YES_NEW_DIAGONALS) 1474 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1475 else if (op == MAT_COLUMN_ORIENTED) { 1476 a->roworiented = 0; 1477 MatSetOption(a->A,op); 1478 MatSetOption(a->B,op); 1479 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1480 a->donotstash = 1; 1481 } else if (op == MAT_NO_NEW_DIAGONALS) { 1482 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1483 } else { 1484 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1485 } 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNC__ 1490 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1491 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1492 { 1493 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1494 Mat_SeqBAIJ *Aloc; 1495 Mat B; 1496 int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 1497 int bs=baij->bs,mbs=baij->mbs; 1498 Scalar *a; 1499 1500 PetscFunctionBegin; 1501 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1502 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1503 CHKERRQ(ierr); 1504 1505 /* copy over the A part */ 1506 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1507 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1508 row = baij->rstart; 1509 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1510 1511 for ( i=0; i<mbs; i++ ) { 1512 rvals[0] = bs*(baij->rstart + i); 1513 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1514 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1515 col = (baij->cstart+aj[j])*bs; 1516 for (k=0; k<bs; k++ ) { 1517 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1518 col++; a += bs; 1519 } 1520 } 1521 } 1522 /* copy over the B part */ 1523 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1524 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1525 row = baij->rstart*bs; 1526 for ( i=0; i<mbs; i++ ) { 1527 rvals[0] = bs*(baij->rstart + i); 1528 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1529 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1530 col = baij->garray[aj[j]]*bs; 1531 for (k=0; k<bs; k++ ) { 1532 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1533 col++; a += bs; 1534 } 1535 } 1536 } 1537 PetscFree(rvals); 1538 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1539 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1540 1541 if (matout != PETSC_NULL) { 1542 *matout = B; 1543 } else { 1544 /* This isn't really an in-place transpose .... but free data structures from baij */ 1545 PetscFree(baij->rowners); 1546 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1547 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1548 if (baij->colmap) PetscFree(baij->colmap); 1549 if (baij->garray) PetscFree(baij->garray); 1550 if (baij->lvec) VecDestroy(baij->lvec); 1551 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1552 PetscFree(baij); 1553 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1554 PetscHeaderDestroy(B); 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNC__ 1560 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1561 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1562 { 1563 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1564 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1565 int ierr,s1,s2,s3; 1566 1567 PetscFunctionBegin; 1568 if (ll) { 1569 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1570 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1571 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1572 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1573 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1574 } 1575 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNC__ 1580 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1581 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1582 { 1583 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1584 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1585 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1586 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1587 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1588 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1589 MPI_Comm comm = A->comm; 1590 MPI_Request *send_waits,*recv_waits; 1591 MPI_Status recv_status,*send_status; 1592 IS istmp; 1593 1594 PetscFunctionBegin; 1595 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1596 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1597 1598 /* first count number of contributors to each processor */ 1599 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1600 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1601 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1602 for ( i=0; i<N; i++ ) { 1603 idx = rows[i]; 1604 found = 0; 1605 for ( j=0; j<size; j++ ) { 1606 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1607 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1608 } 1609 } 1610 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1611 } 1612 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1613 1614 /* inform other processors of number of messages and max length*/ 1615 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1616 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1617 nrecvs = work[rank]; 1618 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1619 nmax = work[rank]; 1620 PetscFree(work); 1621 1622 /* post receives: */ 1623 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1624 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1625 for ( i=0; i<nrecvs; i++ ) { 1626 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1627 } 1628 1629 /* do sends: 1630 1) starts[i] gives the starting index in svalues for stuff going to 1631 the ith processor 1632 */ 1633 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1634 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1635 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1636 starts[0] = 0; 1637 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1638 for ( i=0; i<N; i++ ) { 1639 svalues[starts[owner[i]]++] = rows[i]; 1640 } 1641 ISRestoreIndices(is,&rows); 1642 1643 starts[0] = 0; 1644 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1645 count = 0; 1646 for ( i=0; i<size; i++ ) { 1647 if (procs[i]) { 1648 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1649 } 1650 } 1651 PetscFree(starts); 1652 1653 base = owners[rank]*bs; 1654 1655 /* wait on receives */ 1656 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1657 source = lens + nrecvs; 1658 count = nrecvs; slen = 0; 1659 while (count) { 1660 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1661 /* unpack receives into our local space */ 1662 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1663 source[imdex] = recv_status.MPI_SOURCE; 1664 lens[imdex] = n; 1665 slen += n; 1666 count--; 1667 } 1668 PetscFree(recv_waits); 1669 1670 /* move the data into the send scatter */ 1671 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1672 count = 0; 1673 for ( i=0; i<nrecvs; i++ ) { 1674 values = rvalues + i*nmax; 1675 for ( j=0; j<lens[i]; j++ ) { 1676 lrows[count++] = values[j] - base; 1677 } 1678 } 1679 PetscFree(rvalues); PetscFree(lens); 1680 PetscFree(owner); PetscFree(nprocs); 1681 1682 /* actually zap the local rows */ 1683 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1684 PLogObjectParent(A,istmp); 1685 1686 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1687 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1688 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1689 1690 if (diag) { 1691 for ( i = 0; i < slen; i++ ) { 1692 row = lrows[i] + rstart_bs; 1693 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1694 } 1695 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1696 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1697 } 1698 PetscFree(lrows); 1699 1700 /* wait on sends */ 1701 if (nsends) { 1702 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1703 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1704 PetscFree(send_status); 1705 } 1706 PetscFree(send_waits); PetscFree(svalues); 1707 1708 PetscFunctionReturn(0); 1709 } 1710 extern int MatPrintHelp_SeqBAIJ(Mat); 1711 #undef __FUNC__ 1712 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1713 int MatPrintHelp_MPIBAIJ(Mat A) 1714 { 1715 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1716 int ierr; 1717 1718 PetscFunctionBegin; 1719 if (!a->rank) { 1720 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1721 } 1722 PetscFunctionReturn(0); 1723 } 1724 1725 #undef __FUNC__ 1726 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1727 int MatSetUnfactored_MPIBAIJ(Mat A) 1728 { 1729 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1730 int ierr; 1731 1732 PetscFunctionBegin; 1733 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1734 PetscFunctionReturn(0); 1735 } 1736 1737 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1738 1739 /* -------------------------------------------------------------------*/ 1740 static struct _MatOps MatOps = { 1741 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1742 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1743 0,0,0,0, 1744 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1745 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1746 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1747 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1748 0,0,0,MatGetSize_MPIBAIJ, 1749 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1750 0,0,MatConvertSameType_MPIBAIJ,0,0, 1751 0,0,0,MatGetSubMatrices_MPIBAIJ, 1752 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1753 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1754 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1755 1756 1757 #undef __FUNC__ 1758 #define __FUNC__ "MatCreateMPIBAIJ" 1759 /*@C 1760 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1761 (block compressed row). For good matrix assembly performance 1762 the user should preallocate the matrix storage by setting the parameters 1763 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1764 performance can be increased by more than a factor of 50. 1765 1766 Input Parameters: 1767 . comm - MPI communicator 1768 . bs - size of blockk 1769 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1770 This value should be the same as the local size used in creating the 1771 y vector for the matrix-vector product y = Ax. 1772 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1773 This value should be the same as the local size used in creating the 1774 x vector for the matrix-vector product y = Ax. 1775 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1776 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1777 . d_nz - number of block nonzeros per block row in diagonal portion of local 1778 submatrix (same for all local rows) 1779 . d_nzz - array containing the number of block nonzeros in the various block rows 1780 of the in diagonal portion of the local (possibly different for each block 1781 row) or PETSC_NULL. You must leave room for the diagonal entry even if 1782 it is zero. 1783 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1784 submatrix (same for all local rows). 1785 . o_nzz - array containing the number of nonzeros in the various block rows of the 1786 off-diagonal portion of the local submatrix (possibly different for 1787 each block row) or PETSC_NULL. 1788 1789 Output Parameter: 1790 . A - the matrix 1791 1792 Notes: 1793 The user MUST specify either the local or global matrix dimensions 1794 (possibly both). 1795 1796 Storage Information: 1797 For a square global matrix we define each processor's diagonal portion 1798 to be its local rows and the corresponding columns (a square submatrix); 1799 each processor's off-diagonal portion encompasses the remainder of the 1800 local matrix (a rectangular submatrix). 1801 1802 The user can specify preallocated storage for the diagonal part of 1803 the local submatrix with either d_nz or d_nnz (not both). Set 1804 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1805 memory allocation. Likewise, specify preallocated storage for the 1806 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1807 1808 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1809 the figure below we depict these three local rows and all columns (0-11). 1810 1811 $ 0 1 2 3 4 5 6 7 8 9 10 11 1812 $ ------------------- 1813 $ row 3 | o o o d d d o o o o o o 1814 $ row 4 | o o o d d d o o o o o o 1815 $ row 5 | o o o d d d o o o o o o 1816 $ ------------------- 1817 $ 1818 1819 Thus, any entries in the d locations are stored in the d (diagonal) 1820 submatrix, and any entries in the o locations are stored in the 1821 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1822 stored simply in the MATSEQBAIJ format for compressed row storage. 1823 1824 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1825 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1826 In general, for PDE problems in which most nonzeros are near the diagonal, 1827 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1828 or you will get TERRIBLE performance; see the users' manual chapter on 1829 matrices. 1830 1831 .keywords: matrix, block, aij, compressed row, sparse, parallel 1832 1833 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 1834 @*/ 1835 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1836 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1837 { 1838 Mat B; 1839 Mat_MPIBAIJ *b; 1840 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 1841 1842 PetscFunctionBegin; 1843 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1844 1845 MPI_Comm_size(comm,&size); 1846 if (size == 1) { 1847 if (M == PETSC_DECIDE) M = m; 1848 if (N == PETSC_DECIDE) N = n; 1849 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1850 PetscFunctionReturn(0); 1851 } 1852 1853 *A = 0; 1854 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 1855 PLogObjectCreate(B); 1856 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1857 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1858 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1859 1860 B->destroy = MatDestroy_MPIBAIJ; 1861 B->view = MatView_MPIBAIJ; 1862 B->mapping = 0; 1863 B->factor = 0; 1864 B->assembled = PETSC_FALSE; 1865 1866 B->insertmode = NOT_SET_VALUES; 1867 MPI_Comm_rank(comm,&b->rank); 1868 MPI_Comm_size(comm,&b->size); 1869 1870 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1871 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1872 } 1873 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1874 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1875 } 1876 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1877 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1878 } 1879 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1880 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1881 1882 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1883 work[0] = m; work[1] = n; 1884 mbs = m/bs; nbs = n/bs; 1885 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1886 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1887 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1888 } 1889 if (m == PETSC_DECIDE) { 1890 Mbs = M/bs; 1891 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1892 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1893 m = mbs*bs; 1894 } 1895 if (n == PETSC_DECIDE) { 1896 Nbs = N/bs; 1897 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1898 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1899 n = nbs*bs; 1900 } 1901 if (mbs*bs != m || nbs*bs != n) { 1902 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1903 } 1904 1905 b->m = m; B->m = m; 1906 b->n = n; B->n = n; 1907 b->N = N; B->N = N; 1908 b->M = M; B->M = M; 1909 b->bs = bs; 1910 b->bs2 = bs*bs; 1911 b->mbs = mbs; 1912 b->nbs = nbs; 1913 b->Mbs = Mbs; 1914 b->Nbs = Nbs; 1915 1916 /* build local table of row and column ownerships */ 1917 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1918 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1919 b->cowners = b->rowners + b->size + 2; 1920 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1921 b->rowners[0] = 0; 1922 for ( i=2; i<=b->size; i++ ) { 1923 b->rowners[i] += b->rowners[i-1]; 1924 } 1925 b->rstart = b->rowners[b->rank]; 1926 b->rend = b->rowners[b->rank+1]; 1927 b->rstart_bs = b->rstart * bs; 1928 b->rend_bs = b->rend * bs; 1929 1930 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1931 b->cowners[0] = 0; 1932 for ( i=2; i<=b->size; i++ ) { 1933 b->cowners[i] += b->cowners[i-1]; 1934 } 1935 b->cstart = b->cowners[b->rank]; 1936 b->cend = b->cowners[b->rank+1]; 1937 b->cstart_bs = b->cstart * bs; 1938 b->cend_bs = b->cend * bs; 1939 1940 1941 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1942 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1943 PLogObjectParent(B,b->A); 1944 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1945 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1946 PLogObjectParent(B,b->B); 1947 1948 /* build cache for off array entries formed */ 1949 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1950 b->donotstash = 0; 1951 b->colmap = 0; 1952 b->garray = 0; 1953 b->roworiented = 1; 1954 1955 /* stuff used in block assembly */ 1956 b->barray = 0; 1957 1958 /* stuff used for matrix vector multiply */ 1959 b->lvec = 0; 1960 b->Mvctx = 0; 1961 1962 /* stuff for MatGetRow() */ 1963 b->rowindices = 0; 1964 b->rowvalues = 0; 1965 b->getrowactive = PETSC_FALSE; 1966 1967 /* hash table stuff */ 1968 b->ht = 0; 1969 b->hd = 0; 1970 b->ht_size = 0; 1971 b->ht_total_ct = 0; 1972 b->ht_insert_ct = 0; 1973 1974 *A = B; 1975 PetscFunctionReturn(0); 1976 } 1977 1978 #undef __FUNC__ 1979 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 1980 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 1981 { 1982 Mat mat; 1983 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 1984 int ierr, len=0, flg; 1985 1986 PetscFunctionBegin; 1987 *newmat = 0; 1988 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 1989 PLogObjectCreate(mat); 1990 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 1991 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1992 mat->destroy = MatDestroy_MPIBAIJ; 1993 mat->view = MatView_MPIBAIJ; 1994 mat->factor = matin->factor; 1995 mat->assembled = PETSC_TRUE; 1996 1997 a->m = mat->m = oldmat->m; 1998 a->n = mat->n = oldmat->n; 1999 a->M = mat->M = oldmat->M; 2000 a->N = mat->N = oldmat->N; 2001 2002 a->bs = oldmat->bs; 2003 a->bs2 = oldmat->bs2; 2004 a->mbs = oldmat->mbs; 2005 a->nbs = oldmat->nbs; 2006 a->Mbs = oldmat->Mbs; 2007 a->Nbs = oldmat->Nbs; 2008 2009 a->rstart = oldmat->rstart; 2010 a->rend = oldmat->rend; 2011 a->cstart = oldmat->cstart; 2012 a->cend = oldmat->cend; 2013 a->size = oldmat->size; 2014 a->rank = oldmat->rank; 2015 mat->insertmode = NOT_SET_VALUES; 2016 a->rowvalues = 0; 2017 a->getrowactive = PETSC_FALSE; 2018 a->barray = 0; 2019 2020 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2021 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2022 a->cowners = a->rowners + a->size + 2; 2023 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2024 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 2025 if (oldmat->colmap) { 2026 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2027 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2028 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2029 } else a->colmap = 0; 2030 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2031 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2032 PLogObjectMemory(mat,len*sizeof(int)); 2033 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2034 } else a->garray = 0; 2035 2036 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2037 PLogObjectParent(mat,a->lvec); 2038 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2039 PLogObjectParent(mat,a->Mvctx); 2040 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 2041 PLogObjectParent(mat,a->A); 2042 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 2043 PLogObjectParent(mat,a->B); 2044 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2045 if (flg) { 2046 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2047 } 2048 *newmat = mat; 2049 PetscFunctionReturn(0); 2050 } 2051 2052 #include "sys.h" 2053 2054 #undef __FUNC__ 2055 #define __FUNC__ "MatLoad_MPIBAIJ" 2056 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2057 { 2058 Mat A; 2059 int i, nz, ierr, j,rstart, rend, fd; 2060 Scalar *vals,*buf; 2061 MPI_Comm comm = ((PetscObject)viewer)->comm; 2062 MPI_Status status; 2063 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2064 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2065 int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 2066 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2067 int dcount,kmax,k,nzcount,tmp; 2068 2069 PetscFunctionBegin; 2070 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2071 bs2 = bs*bs; 2072 2073 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2074 if (!rank) { 2075 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2076 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2077 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2078 if (header[3] < 0) { 2079 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2080 } 2081 } 2082 2083 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2084 M = header[1]; N = header[2]; 2085 2086 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2087 2088 /* 2089 This code adds extra rows to make sure the number of rows is 2090 divisible by the blocksize 2091 */ 2092 Mbs = M/bs; 2093 extra_rows = bs - M + bs*(Mbs); 2094 if (extra_rows == bs) extra_rows = 0; 2095 else Mbs++; 2096 if (extra_rows &&!rank) { 2097 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2098 } 2099 2100 /* determine ownership of all rows */ 2101 mbs = Mbs/size + ((Mbs % size) > rank); 2102 m = mbs * bs; 2103 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2104 browners = rowners + size + 1; 2105 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2106 rowners[0] = 0; 2107 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2108 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2109 rstart = rowners[rank]; 2110 rend = rowners[rank+1]; 2111 2112 /* distribute row lengths to all processors */ 2113 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2114 if (!rank) { 2115 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2116 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2117 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2118 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2119 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2120 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2121 PetscFree(sndcounts); 2122 } else { 2123 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2124 } 2125 2126 if (!rank) { 2127 /* calculate the number of nonzeros on each processor */ 2128 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2129 PetscMemzero(procsnz,size*sizeof(int)); 2130 for ( i=0; i<size; i++ ) { 2131 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2132 procsnz[i] += rowlengths[j]; 2133 } 2134 } 2135 PetscFree(rowlengths); 2136 2137 /* determine max buffer needed and allocate it */ 2138 maxnz = 0; 2139 for ( i=0; i<size; i++ ) { 2140 maxnz = PetscMax(maxnz,procsnz[i]); 2141 } 2142 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2143 2144 /* read in my part of the matrix column indices */ 2145 nz = procsnz[0]; 2146 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2147 mycols = ibuf; 2148 if (size == 1) nz -= extra_rows; 2149 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2150 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2151 2152 /* read in every ones (except the last) and ship off */ 2153 for ( i=1; i<size-1; i++ ) { 2154 nz = procsnz[i]; 2155 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2156 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2157 } 2158 /* read in the stuff for the last proc */ 2159 if ( size != 1 ) { 2160 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2161 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2162 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2163 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2164 } 2165 PetscFree(cols); 2166 } else { 2167 /* determine buffer space needed for message */ 2168 nz = 0; 2169 for ( i=0; i<m; i++ ) { 2170 nz += locrowlens[i]; 2171 } 2172 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2173 mycols = ibuf; 2174 /* receive message of column indices*/ 2175 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2176 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2177 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2178 } 2179 2180 /* loop over local rows, determining number of off diagonal entries */ 2181 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2182 odlens = dlens + (rend-rstart); 2183 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2184 PetscMemzero(mask,3*Mbs*sizeof(int)); 2185 masked1 = mask + Mbs; 2186 masked2 = masked1 + Mbs; 2187 rowcount = 0; nzcount = 0; 2188 for ( i=0; i<mbs; i++ ) { 2189 dcount = 0; 2190 odcount = 0; 2191 for ( j=0; j<bs; j++ ) { 2192 kmax = locrowlens[rowcount]; 2193 for ( k=0; k<kmax; k++ ) { 2194 tmp = mycols[nzcount++]/bs; 2195 if (!mask[tmp]) { 2196 mask[tmp] = 1; 2197 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2198 else masked1[dcount++] = tmp; 2199 } 2200 } 2201 rowcount++; 2202 } 2203 2204 dlens[i] = dcount; 2205 odlens[i] = odcount; 2206 2207 /* zero out the mask elements we set */ 2208 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2209 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2210 } 2211 2212 /* create our matrix */ 2213 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2214 CHKERRQ(ierr); 2215 A = *newmat; 2216 MatSetOption(A,MAT_COLUMNS_SORTED); 2217 2218 if (!rank) { 2219 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2220 /* read in my part of the matrix numerical values */ 2221 nz = procsnz[0]; 2222 vals = buf; 2223 mycols = ibuf; 2224 if (size == 1) nz -= extra_rows; 2225 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2226 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2227 2228 /* insert into matrix */ 2229 jj = rstart*bs; 2230 for ( i=0; i<m; i++ ) { 2231 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2232 mycols += locrowlens[i]; 2233 vals += locrowlens[i]; 2234 jj++; 2235 } 2236 /* read in other processors (except the last one) and ship out */ 2237 for ( i=1; i<size-1; i++ ) { 2238 nz = procsnz[i]; 2239 vals = buf; 2240 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2241 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2242 } 2243 /* the last proc */ 2244 if ( size != 1 ){ 2245 nz = procsnz[i] - extra_rows; 2246 vals = buf; 2247 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2248 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2249 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2250 } 2251 PetscFree(procsnz); 2252 } else { 2253 /* receive numeric values */ 2254 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2255 2256 /* receive message of values*/ 2257 vals = buf; 2258 mycols = ibuf; 2259 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2260 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2261 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2262 2263 /* insert into matrix */ 2264 jj = rstart*bs; 2265 for ( i=0; i<m; i++ ) { 2266 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2267 mycols += locrowlens[i]; 2268 vals += locrowlens[i]; 2269 jj++; 2270 } 2271 } 2272 PetscFree(locrowlens); 2273 PetscFree(buf); 2274 PetscFree(ibuf); 2275 PetscFree(rowners); 2276 PetscFree(dlens); 2277 PetscFree(mask); 2278 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2279 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2280 PetscFunctionReturn(0); 2281 } 2282 2283 2284