1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.101 1998/01/27 23:43:07 balay Exp balay $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 23 static int CreateColmap_MPIBAIJ_Private(Mat mat) 24 { 25 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 26 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27 int nbs = B->nbs,i,bs=B->bs;; 28 29 PetscFunctionBegin; 30 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 31 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 32 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 34 PetscFunctionReturn(0); 35 } 36 37 #define CHUNKSIZE 10 38 39 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 40 { \ 41 \ 42 brow = row/bs; \ 43 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44 rmax = aimax[brow]; nrow = ailen[brow]; \ 45 bcol = col/bs; \ 46 ridx = row % bs; cidx = col % bs; \ 47 low = 0; high = nrow; \ 48 while (high-low > 3) { \ 49 t = (low+high)/2; \ 50 if (rp[t] > bcol) high = t; \ 51 else low = t; \ 52 } \ 53 for ( _i=low; _i<high; _i++ ) { \ 54 if (rp[_i] > bcol) break; \ 55 if (rp[_i] == bcol) { \ 56 bap = ap + bs2*_i + bs*cidx + ridx; \ 57 if (addv == ADD_VALUES) *bap += value; \ 58 else *bap = value; \ 59 goto a_noinsert; \ 60 } \ 61 } \ 62 if (a->nonew == 1) goto a_noinsert; \ 63 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 64 if (nrow >= rmax) { \ 65 /* there is no extra room in row, therefore enlarge */ \ 66 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 67 Scalar *new_a; \ 68 \ 69 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 70 \ 71 /* malloc new storage space */ \ 72 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 73 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 74 new_j = (int *) (new_a + bs2*new_nz); \ 75 new_i = new_j + new_nz; \ 76 \ 77 /* copy over old data into new slots */ \ 78 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 79 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 80 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 81 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 82 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 83 len*sizeof(int)); \ 84 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 85 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 86 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 87 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 88 /* free up old matrix storage */ \ 89 PetscFree(a->a); \ 90 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 91 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 92 a->singlemalloc = 1; \ 93 \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 96 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 97 a->maxnz += bs2*CHUNKSIZE; \ 98 a->reallocs++; \ 99 a->nz++; \ 100 } \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for ( ii=N; ii>=_i; ii-- ) { \ 104 rp[ii+1] = rp[ii]; \ 105 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 106 } \ 107 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 a_noinsert:; \ 111 ailen[brow] = nrow; \ 112 } 113 114 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 brow = row/bs; \ 118 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119 rmax = bimax[brow]; nrow = bilen[brow]; \ 120 bcol = col/bs; \ 121 ridx = row % bs; cidx = col % bs; \ 122 low = 0; high = nrow; \ 123 while (high-low > 3) { \ 124 t = (low+high)/2; \ 125 if (rp[t] > bcol) high = t; \ 126 else low = t; \ 127 } \ 128 for ( _i=low; _i<high; _i++ ) { \ 129 if (rp[_i] > bcol) break; \ 130 if (rp[_i] == bcol) { \ 131 bap = ap + bs2*_i + bs*cidx + ridx; \ 132 if (addv == ADD_VALUES) *bap += value; \ 133 else *bap = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (b->nonew == 1) goto b_noinsert; \ 138 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + bs2*new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158 len*sizeof(int)); \ 159 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163 /* free up old matrix storage */ \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 172 b->maxnz += bs2*CHUNKSIZE; \ 173 b->reallocs++; \ 174 b->nz++; \ 175 } \ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for ( ii=N; ii>=_i; ii-- ) { \ 179 rp[ii+1] = rp[ii]; \ 180 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181 } \ 182 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNC__ 190 #define __FUNC__ "MatSetValues_MPIBAIJ" 191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 194 Scalar value; 195 int ierr,i,j,row,col; 196 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 197 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 198 int cend_orig=baij->cend_bs,bs=baij->bs; 199 200 /* Some Variables required in the macro */ 201 Mat A = baij->A; 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204 Scalar *aa=a->a; 205 206 Mat B = baij->B; 207 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209 Scalar *ba=b->a; 210 211 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212 int low,high,t,ridx,cidx,bs2=a->bs2; 213 Scalar *ap,*bap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 #if defined(USE_PETSC_BOPT_g) 218 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig){ 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 #if defined(USE_PETSC_BOPT_g) 231 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 240 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 241 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 244 B = baij->B; 245 b = (Mat_SeqBAIJ *) (B)->data; 246 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 247 ba=b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !baij->donotstash) { 257 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!baij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272 #undef __FUNC__ 273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277 Scalar *value,*barray=baij->barray; 278 int ierr,i,j,ii,jj,row,col,k,l; 279 int roworiented = baij->roworiented,rstart=baij->rstart ; 280 int rend=baij->rend,cstart=baij->cstart,stepval; 281 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282 283 if(!barray) { 284 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 285 } 286 287 if (roworiented) { 288 stepval = (n-1)*bs; 289 } else { 290 stepval = (m-1)*bs; 291 } 292 for ( i=0; i<m; i++ ) { 293 #if defined(USE_PETSC_BOPT_g) 294 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296 #endif 297 if (im[i] >= rstart && im[i] < rend) { 298 row = im[i] - rstart; 299 for ( j=0; j<n; j++ ) { 300 /* If NumCol = 1 then a copy is not required */ 301 if ((roworiented) && (n == 1)) { 302 barray = v + i*bs2; 303 } else if((!roworiented) && (m == 1)) { 304 barray = v + j*bs2; 305 } else { /* Here a copy is required */ 306 if (roworiented) { 307 value = v + i*(stepval+bs)*bs + j*bs; 308 } else { 309 value = v + j*(stepval+bs)*bs + i*bs; 310 } 311 for ( ii=0; ii<bs; ii++,value+=stepval ) { 312 for (jj=0; jj<bs; jj++ ) { 313 *barray++ = *value++; 314 } 315 } 316 barray -=bs2; 317 } 318 319 if (in[j] >= cstart && in[j] < cend){ 320 col = in[j] - cstart; 321 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322 } 323 #if defined(USE_PETSC_BOPT_g) 324 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326 #endif 327 else { 328 if (mat->was_assembled) { 329 if (!baij->colmap) { 330 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331 } 332 333 #if defined(USE_PETSC_BOPT_g) 334 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335 #endif 336 col = (baij->colmap[in[j]] - 1)/bs; 337 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339 col = in[j]; 340 } 341 } 342 else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (!baij->donotstash) { 348 if (roworiented ) { 349 row = im[i]*bs; 350 value = v + i*(stepval+bs)*bs; 351 for ( j=0; j<bs; j++,row++ ) { 352 for ( k=0; k<n; k++ ) { 353 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355 } 356 } 357 } 358 } else { 359 for ( j=0; j<n; j++ ) { 360 value = v + j*(stepval+bs)*bs + i*bs; 361 col = in[j]*bs; 362 for ( k=0; k<bs; k++,col++,value+=stepval) { 363 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365 } 366 } 367 } 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 #include <math.h> 375 #define HASH_KEY 0.6180339887 376 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 378 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 379 #undef __FUNC__ 380 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 381 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 382 { 383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 384 int ierr,i,j,row,col; 385 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 386 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 387 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 388 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 389 double tmp; 390 Scalar ** HD = baij->hd,value; 391 392 393 PetscFunctionBegin; 394 395 for ( i=0; i<m; i++ ) { 396 #if defined(USE_PETSC_BOPT_g) 397 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 398 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 399 #endif 400 row = im[i]; 401 if (row >= rstart_orig && row < rend_orig) { 402 for ( j=0; j<n; j++ ) { 403 col = in[j]; 404 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 405 /* Look up into the Hash Table */ 406 key = (row/bs)*Nbs+(col/bs)+1; 407 h1 = HASH(size,key,tmp); 408 409 410 idx = h1; 411 #if defined(USE_PETSC_BOPT_g) 412 insert_ct++; 413 total_ct++; 414 if (HT[idx] != key) { 415 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 416 if (idx == size) { 417 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 418 if (idx == h1) { 419 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 420 } 421 } 422 } 423 #else 424 if (HT[idx] != key) { 425 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 426 if (idx == size) { 427 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 428 if (idx == h1) { 429 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 430 } 431 } 432 } 433 #endif 434 /* A HASH table entry is found, so insert the values at the correct address */ 435 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 436 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 437 } 438 } else { 439 if (roworiented && !baij->donotstash) { 440 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 441 } else { 442 if (!baij->donotstash) { 443 row = im[i]; 444 for ( j=0; j<n; j++ ) { 445 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 446 } 447 } 448 } 449 } 450 } 451 #if defined(USE_PETSC_BOPT_g) 452 baij->ht_total_ct = total_ct; 453 baij->ht_insert_ct = insert_ct; 454 #endif 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNC__ 459 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 460 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 461 { 462 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 463 int ierr,i,j,ii,jj,row,col,k,l; 464 int roworiented = baij->roworiented,rstart=baij->rstart ; 465 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 466 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 467 468 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 469 double tmp; 470 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 471 472 PetscFunctionBegin; 473 474 if (roworiented) { 475 stepval = (n-1)*bs; 476 } else { 477 stepval = (m-1)*bs; 478 } 479 for ( i=0; i<m; i++ ) { 480 #if defined(USE_PETSC_BOPT_g) 481 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 482 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 483 #endif 484 row = im[i]; 485 v_t = v + i*bs2; 486 if (row >= rstart && row < rend) { 487 for ( j=0; j<n; j++ ) { 488 col = in[j]; 489 490 /* Look up into the Hash Table */ 491 key = row*Nbs+col+1; 492 h1 = HASH(size,key,tmp); 493 494 idx = h1; 495 #if defined(USE_PETSC_BOPT_g) 496 total_ct++; 497 insert_ct++; 498 if (HT[idx] != key) { 499 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 500 if (idx == size) { 501 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 502 if (idx == h1) { 503 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 504 } 505 } 506 } 507 #else 508 if (HT[idx] != key) { 509 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 510 if (idx == size) { 511 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 512 if (idx == h1) { 513 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 514 } 515 } 516 } 517 #endif 518 baij_a = HD[idx]; 519 if (roworiented) { 520 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 521 /* value = v + (i*(stepval+bs)+j)*bs; */ 522 value = v_t; 523 v_t += bs; 524 if (addv == ADD_VALUES) { 525 for ( ii=0; ii<bs; ii++,value+=stepval) { 526 for ( jj=ii; jj<bs2; jj+=bs ) { 527 baij_a[jj] += *value++; 528 } 529 } 530 } else { 531 for ( ii=0; ii<bs; ii++,value+=stepval) { 532 for ( jj=ii; jj<bs2; jj+=bs ) { 533 baij_a[jj] = *value++; 534 } 535 } 536 } 537 } else { 538 value = v + j*(stepval+bs)*bs + i*bs; 539 if (addv == ADD_VALUES) { 540 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 541 for ( jj=0; jj<bs; jj++ ) { 542 baij_a[jj] += *value++; 543 } 544 } 545 } else { 546 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 547 for ( jj=0; jj<bs; jj++ ) { 548 baij_a[jj] = *value++; 549 } 550 } 551 } 552 } 553 } 554 } else { 555 if (!baij->donotstash) { 556 if (roworiented ) { 557 row = im[i]*bs; 558 value = v + i*(stepval+bs)*bs; 559 for ( j=0; j<bs; j++,row++ ) { 560 for ( k=0; k<n; k++ ) { 561 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 562 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 563 } 564 } 565 } 566 } else { 567 for ( j=0; j<n; j++ ) { 568 value = v + j*(stepval+bs)*bs + i*bs; 569 col = in[j]*bs; 570 for ( k=0; k<bs; k++,col++,value+=stepval) { 571 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 572 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 573 } 574 } 575 } 576 } 577 } 578 } 579 } 580 #if defined(USE_PETSC_BOPT_g) 581 baij->ht_total_ct = total_ct; 582 baij->ht_insert_ct = insert_ct; 583 #endif 584 PetscFunctionReturn(0); 585 } 586 #undef __FUNC__ 587 #define __FUNC__ "MatGetValues_MPIBAIJ" 588 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 589 { 590 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 591 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 592 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 593 594 for ( i=0; i<m; i++ ) { 595 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 596 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 597 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 598 row = idxm[i] - bsrstart; 599 for ( j=0; j<n; j++ ) { 600 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 601 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 602 if (idxn[j] >= bscstart && idxn[j] < bscend){ 603 col = idxn[j] - bscstart; 604 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 605 } else { 606 if (!baij->colmap) { 607 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 608 } 609 if((baij->colmap[idxn[j]/bs]-1 < 0) || 610 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 611 else { 612 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 613 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 614 } 615 } 616 } 617 } else { 618 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 619 } 620 } 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNC__ 625 #define __FUNC__ "MatNorm_MPIBAIJ" 626 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 627 { 628 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 629 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 630 int ierr, i,bs2=baij->bs2; 631 double sum = 0.0; 632 Scalar *v; 633 634 PetscFunctionBegin; 635 if (baij->size == 1) { 636 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 637 } else { 638 if (type == NORM_FROBENIUS) { 639 v = amat->a; 640 for (i=0; i<amat->nz*bs2; i++ ) { 641 #if defined(USE_PETSC_COMPLEX) 642 sum += real(conj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 for (i=0; i<bmat->nz*bs2; i++ ) { 649 #if defined(USE_PETSC_COMPLEX) 650 sum += real(conj(*v)*(*v)); v++; 651 #else 652 sum += (*v)*(*v); v++; 653 #endif 654 } 655 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 656 *norm = sqrt(*norm); 657 } else { 658 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 659 } 660 } 661 PetscFunctionReturn(0); 662 } 663 664 #undef __FUNC__ 665 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 666 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 669 MPI_Comm comm = mat->comm; 670 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 671 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 672 MPI_Request *send_waits,*recv_waits; 673 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 674 InsertMode addv; 675 Scalar *rvalues,*svalues; 676 677 PetscFunctionBegin; 678 /* make sure all processors are either in INSERTMODE or ADDMODE */ 679 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 680 if (addv == (ADD_VALUES|INSERT_VALUES)) { 681 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 682 } 683 mat->insertmode = addv; /* in case this processor had no cache */ 684 685 /* first count number of contributors to each processor */ 686 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 687 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 688 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 689 for ( i=0; i<baij->stash.n; i++ ) { 690 idx = baij->stash.idx[i]; 691 for ( j=0; j<size; j++ ) { 692 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 693 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 694 } 695 } 696 } 697 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 698 699 /* inform other processors of number of messages and max length*/ 700 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 701 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 702 nreceives = work[rank]; 703 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 704 nmax = work[rank]; 705 PetscFree(work); 706 707 /* post receives: 708 1) each message will consist of ordered pairs 709 (global index,value) we store the global index as a double 710 to simplify the message passing. 711 2) since we don't know how long each individual message is we 712 allocate the largest needed buffer for each receive. Potentially 713 this is a lot of wasted space. 714 715 716 This could be done better. 717 */ 718 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 719 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 720 for ( i=0; i<nreceives; i++ ) { 721 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 722 } 723 724 /* do sends: 725 1) starts[i] gives the starting index in svalues for stuff going to 726 the ith processor 727 */ 728 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 729 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 730 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 731 starts[0] = 0; 732 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 733 for ( i=0; i<baij->stash.n; i++ ) { 734 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 735 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 736 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 737 } 738 PetscFree(owner); 739 starts[0] = 0; 740 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 741 count = 0; 742 for ( i=0; i<size; i++ ) { 743 if (procs[i]) { 744 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 745 } 746 } 747 PetscFree(starts); PetscFree(nprocs); 748 749 /* Free cache space */ 750 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 751 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 752 753 baij->svalues = svalues; baij->rvalues = rvalues; 754 baij->nsends = nsends; baij->nrecvs = nreceives; 755 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 756 baij->rmax = nmax; 757 758 PetscFunctionReturn(0); 759 } 760 761 /* 762 Creates the hash table, and sets the table 763 This table is created only once. 764 If new entried need to be added to the matrix 765 then the hash table has to be destroyed and 766 recreated. 767 */ 768 #undef __FUNC__ 769 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 770 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 771 { 772 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 773 Mat A = baij->A, B=baij->B; 774 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 775 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 776 int size,ct=0,max=0,bs2=baij->bs2,rstart=baij->rstart; 777 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 778 int *HT,key; 779 Scalar **HD; 780 double tmp; 781 782 PetscFunctionBegin; 783 baij->ht_size=(int)(factor*nz); 784 size = baij->ht_size; 785 786 if (baij->ht) { 787 PetscFunctionReturn(0); 788 } 789 790 /* Allocate Memory for Hash Table */ 791 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 792 baij->ht = (int*)(baij->hd + size); 793 HD = baij->hd; 794 HT = baij->ht; 795 796 797 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 798 799 800 /* Loop Over A */ 801 for ( i=0; i<a->mbs; i++ ) { 802 for ( j=ai[i]; j<ai[i+1]; j++ ) { 803 row = i+rstart; 804 col = aj[j]+cstart; 805 806 key = row*Nbs + col + 1; 807 h1 = HASH(size,key,tmp); 808 for ( k=0; k<size; k++ ){ 809 if (HT[(h1+k)%size] == 0.0) { 810 HT[(h1+k)%size] = key; 811 HD[(h1+k)%size] = a->a + j*bs2; 812 break; 813 #if defined(USE_PETSC_BOPT_g) 814 } else { 815 ct++; 816 #endif 817 } 818 } 819 #if defined(USE_PETSC_BOPT_g) 820 if (k> max) max = k; 821 #endif 822 } 823 } 824 /* Loop Over B */ 825 for ( i=0; i<b->mbs; i++ ) { 826 for ( j=bi[i]; j<bi[i+1]; j++ ) { 827 row = i+rstart; 828 col = garray[bj[j]]; 829 key = row*Nbs + col + 1; 830 h1 = HASH(size,key,tmp); 831 for ( k=0; k<size; k++ ){ 832 if (HT[(h1+k)%size] == 0.0) { 833 HT[(h1+k)%size] = key; 834 HD[(h1+k)%size] = b->a + j*bs2; 835 break; 836 #if defined(USE_PETSC_BOPT_g) 837 } else { 838 ct++; 839 #endif 840 } 841 } 842 #if defined(USE_PETSC_BOPT_g) 843 if (k> max) max = k; 844 #endif 845 } 846 } 847 848 /* Print Summary */ 849 #if defined(USE_PETSC_BOPT_g) 850 for ( i=0,j=0; i<size; i++) 851 if (HT[i]) {j++;} 852 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 853 (j== 0)? 0.0:((double)(ct+j))/j,max); 854 #endif 855 PetscFunctionReturn(0); 856 } 857 858 #undef __FUNC__ 859 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 860 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 861 { 862 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 863 MPI_Status *send_status,recv_status; 864 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 865 int bs=baij->bs,row,col,other_disassembled,flg; 866 Scalar *values,val; 867 InsertMode addv = mat->insertmode; 868 869 PetscFunctionBegin; 870 /* wait on receives */ 871 while (count) { 872 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 873 /* unpack receives into our local space */ 874 values = baij->rvalues + 3*imdex*baij->rmax; 875 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 876 n = n/3; 877 for ( i=0; i<n; i++ ) { 878 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 879 col = (int) PetscReal(values[3*i+1]); 880 val = values[3*i+2]; 881 if (col >= baij->cstart*bs && col < baij->cend*bs) { 882 col -= baij->cstart*bs; 883 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 884 } else { 885 if (mat->was_assembled) { 886 if (!baij->colmap) { 887 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 888 } 889 col = (baij->colmap[col/bs]) - 1 + col%bs; 890 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 891 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 892 col = (int) PetscReal(values[3*i+1]); 893 } 894 } 895 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 896 } 897 } 898 count--; 899 } 900 PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 901 902 /* wait on sends */ 903 if (baij->nsends) { 904 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 905 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 906 PetscFree(send_status); 907 } 908 PetscFree(baij->send_waits); PetscFree(baij->svalues); 909 910 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 911 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 912 913 /* determine if any processor has disassembled, if so we must 914 also disassemble ourselfs, in order that we may reassemble. */ 915 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 916 if (mat->was_assembled && !other_disassembled) { 917 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 918 } 919 920 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 921 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 922 } 923 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 924 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 925 926 #if defined(USE_PETSC_BOPT_g) 927 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 928 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 929 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 930 baij->ht_total_ct = 0; 931 baij->ht_insert_ct = 0; 932 } 933 #endif 934 ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr); 935 if (flg && !baij->ht && mode== MAT_FINAL_ASSEMBLY) { 936 double fact = 1.39; 937 ierr = OptionsGetDouble(PETSC_NULL,"-use_hash",&fact,&flg); CHKERRQ(ierr); 938 if (fact <= 1.0) fact = 1.39; 939 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Hash table Factor used %5.2f\n",fact); 940 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,fact); CHKERRQ(ierr); 941 mat->ops.setvalues = MatSetValues_MPIBAIJ_HT; 942 mat->ops.setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 943 } 944 945 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNC__ 950 #define __FUNC__ "MatView_MPIBAIJ_Binary" 951 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 952 { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 954 int ierr; 955 956 PetscFunctionBegin; 957 if (baij->size == 1) { 958 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 959 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNC__ 964 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 965 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 966 { 967 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 968 int ierr, format,rank,bs = baij->bs; 969 FILE *fd; 970 ViewerType vtype; 971 972 PetscFunctionBegin; 973 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 974 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 975 ierr = ViewerGetFormat(viewer,&format); 976 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 977 MatInfo info; 978 MPI_Comm_rank(mat->comm,&rank); 979 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 980 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 981 PetscSequentialPhaseBegin(mat->comm,1); 982 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 983 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 984 baij->bs,(int)info.memory); 985 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 986 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 987 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 988 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 989 fflush(fd); 990 PetscSequentialPhaseEnd(mat->comm,1); 991 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 992 PetscFunctionReturn(0); 993 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 994 PetscPrintf(mat->comm," block size is %d\n",bs); 995 PetscFunctionReturn(0); 996 } 997 } 998 999 if (vtype == DRAW_VIEWER) { 1000 Draw draw; 1001 PetscTruth isnull; 1002 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 1003 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1004 } 1005 1006 if (vtype == ASCII_FILE_VIEWER) { 1007 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1008 PetscSequentialPhaseBegin(mat->comm,1); 1009 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 1010 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 1011 baij->cstart*bs,baij->cend*bs); 1012 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1013 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 1014 fflush(fd); 1015 PetscSequentialPhaseEnd(mat->comm,1); 1016 } else { 1017 int size = baij->size; 1018 rank = baij->rank; 1019 if (size == 1) { 1020 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1021 } else { 1022 /* assemble the entire matrix onto first processor. */ 1023 Mat A; 1024 Mat_SeqBAIJ *Aloc; 1025 int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 1026 int mbs=baij->mbs; 1027 Scalar *a; 1028 1029 if (!rank) { 1030 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1031 } else { 1032 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1033 } 1034 PLogObjectParent(mat,A); 1035 1036 /* copy over the A part */ 1037 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1038 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1039 row = baij->rstart; 1040 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1041 1042 for ( i=0; i<mbs; i++ ) { 1043 rvals[0] = bs*(baij->rstart + i); 1044 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1045 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1046 col = (baij->cstart+aj[j])*bs; 1047 for (k=0; k<bs; k++ ) { 1048 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1049 col++; a += bs; 1050 } 1051 } 1052 } 1053 /* copy over the B part */ 1054 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1055 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1056 row = baij->rstart*bs; 1057 for ( i=0; i<mbs; i++ ) { 1058 rvals[0] = bs*(baij->rstart + i); 1059 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1060 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1061 col = baij->garray[aj[j]]*bs; 1062 for (k=0; k<bs; k++ ) { 1063 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1064 col++; a += bs; 1065 } 1066 } 1067 } 1068 PetscFree(rvals); 1069 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1070 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1071 /* 1072 Everyone has to call to draw the matrix since the graphics waits are 1073 synchronized across all processors that share the Draw object 1074 */ 1075 if (!rank || vtype == DRAW_VIEWER) { 1076 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1077 } 1078 ierr = MatDestroy(A); CHKERRQ(ierr); 1079 } 1080 } 1081 PetscFunctionReturn(0); 1082 } 1083 1084 1085 1086 #undef __FUNC__ 1087 #define __FUNC__ "MatView_MPIBAIJ" 1088 int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 1089 { 1090 Mat mat = (Mat) obj; 1091 int ierr; 1092 ViewerType vtype; 1093 1094 PetscFunctionBegin; 1095 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1096 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 1097 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 1098 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1099 } else if (vtype == BINARY_FILE_VIEWER) { 1100 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1101 } 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNC__ 1106 #define __FUNC__ "MatDestroy_MPIBAIJ" 1107 int MatDestroy_MPIBAIJ(PetscObject obj) 1108 { 1109 Mat mat = (Mat) obj; 1110 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1111 int ierr; 1112 1113 PetscFunctionBegin; 1114 #if defined(USE_PETSC_LOG) 1115 PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 1116 #endif 1117 1118 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1119 PetscFree(baij->rowners); 1120 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1121 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1122 if (baij->colmap) PetscFree(baij->colmap); 1123 if (baij->garray) PetscFree(baij->garray); 1124 if (baij->lvec) VecDestroy(baij->lvec); 1125 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1126 if (baij->rowvalues) PetscFree(baij->rowvalues); 1127 if (baij->barray) PetscFree(baij->barray); 1128 if (baij->hd) PetscFree(baij->hd); 1129 PetscFree(baij); 1130 PLogObjectDestroy(mat); 1131 PetscHeaderDestroy(mat); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNC__ 1136 #define __FUNC__ "MatMult_MPIBAIJ" 1137 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1138 { 1139 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1140 int ierr, nt; 1141 1142 PetscFunctionBegin; 1143 VecGetLocalSize_Fast(xx,nt); 1144 if (nt != a->n) { 1145 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1146 } 1147 VecGetLocalSize_Fast(yy,nt); 1148 if (nt != a->m) { 1149 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1150 } 1151 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1152 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 1153 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1154 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1155 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNC__ 1160 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1161 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1162 { 1163 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1164 int ierr; 1165 1166 PetscFunctionBegin; 1167 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1168 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1169 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1170 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNC__ 1175 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1176 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1177 { 1178 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1179 int ierr; 1180 1181 PetscFunctionBegin; 1182 /* do nondiagonal part */ 1183 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1184 /* send it on its way */ 1185 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1186 /* do local part */ 1187 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1188 /* receive remote parts: note this assumes the values are not actually */ 1189 /* inserted in yy until the next line, which is true for my implementation*/ 1190 /* but is not perhaps always true. */ 1191 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNC__ 1196 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1197 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1198 { 1199 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1200 int ierr; 1201 1202 PetscFunctionBegin; 1203 /* do nondiagonal part */ 1204 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1205 /* send it on its way */ 1206 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1207 /* do local part */ 1208 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1209 /* receive remote parts: note this assumes the values are not actually */ 1210 /* inserted in yy until the next line, which is true for my implementation*/ 1211 /* but is not perhaps always true. */ 1212 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 /* 1217 This only works correctly for square matrices where the subblock A->A is the 1218 diagonal block 1219 */ 1220 #undef __FUNC__ 1221 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1222 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1223 { 1224 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1225 int ierr; 1226 1227 PetscFunctionBegin; 1228 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1229 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNC__ 1234 #define __FUNC__ "MatScale_MPIBAIJ" 1235 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1236 { 1237 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1238 int ierr; 1239 1240 PetscFunctionBegin; 1241 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1242 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNC__ 1247 #define __FUNC__ "MatGetSize_MPIBAIJ" 1248 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1249 { 1250 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1251 1252 PetscFunctionBegin; 1253 if (m) *m = mat->M; 1254 if (n) *n = mat->N; 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNC__ 1259 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1260 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1261 { 1262 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1263 1264 PetscFunctionBegin; 1265 *m = mat->m; *n = mat->N; 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNC__ 1270 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1271 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1272 { 1273 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1274 1275 PetscFunctionBegin; 1276 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1277 PetscFunctionReturn(0); 1278 } 1279 1280 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1281 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1282 1283 #undef __FUNC__ 1284 #define __FUNC__ "MatGetRow_MPIBAIJ" 1285 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1286 { 1287 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1288 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1289 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1290 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1291 int *cmap, *idx_p,cstart = mat->cstart; 1292 1293 PetscFunctionBegin; 1294 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1295 mat->getrowactive = PETSC_TRUE; 1296 1297 if (!mat->rowvalues && (idx || v)) { 1298 /* 1299 allocate enough space to hold information from the longest row. 1300 */ 1301 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1302 int max = 1,mbs = mat->mbs,tmp; 1303 for ( i=0; i<mbs; i++ ) { 1304 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1305 if (max < tmp) { max = tmp; } 1306 } 1307 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1308 CHKPTRQ(mat->rowvalues); 1309 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1310 } 1311 1312 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1313 lrow = row - brstart; 1314 1315 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1316 if (!v) {pvA = 0; pvB = 0;} 1317 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1318 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1319 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1320 nztot = nzA + nzB; 1321 1322 cmap = mat->garray; 1323 if (v || idx) { 1324 if (nztot) { 1325 /* Sort by increasing column numbers, assuming A and B already sorted */ 1326 int imark = -1; 1327 if (v) { 1328 *v = v_p = mat->rowvalues; 1329 for ( i=0; i<nzB; i++ ) { 1330 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1331 else break; 1332 } 1333 imark = i; 1334 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1335 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1336 } 1337 if (idx) { 1338 *idx = idx_p = mat->rowindices; 1339 if (imark > -1) { 1340 for ( i=0; i<imark; i++ ) { 1341 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1342 } 1343 } else { 1344 for ( i=0; i<nzB; i++ ) { 1345 if (cmap[cworkB[i]/bs] < cstart) 1346 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1347 else break; 1348 } 1349 imark = i; 1350 } 1351 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1352 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1353 } 1354 } else { 1355 if (idx) *idx = 0; 1356 if (v) *v = 0; 1357 } 1358 } 1359 *nz = nztot; 1360 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1361 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNC__ 1366 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1367 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1368 { 1369 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1370 1371 PetscFunctionBegin; 1372 if (baij->getrowactive == PETSC_FALSE) { 1373 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1374 } 1375 baij->getrowactive = PETSC_FALSE; 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNC__ 1380 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1381 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1382 { 1383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1384 1385 PetscFunctionBegin; 1386 *bs = baij->bs; 1387 PetscFunctionReturn(0); 1388 } 1389 1390 #undef __FUNC__ 1391 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1392 int MatZeroEntries_MPIBAIJ(Mat A) 1393 { 1394 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1395 int ierr; 1396 1397 PetscFunctionBegin; 1398 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1399 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNC__ 1404 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1405 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1406 { 1407 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1408 Mat A = a->A, B = a->B; 1409 int ierr; 1410 double isend[5], irecv[5]; 1411 1412 PetscFunctionBegin; 1413 info->block_size = (double)a->bs; 1414 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1415 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1416 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1417 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1418 if (flag == MAT_LOCAL) { 1419 info->nz_used = isend[0]; 1420 info->nz_allocated = isend[1]; 1421 info->nz_unneeded = isend[2]; 1422 info->memory = isend[3]; 1423 info->mallocs = isend[4]; 1424 } else if (flag == MAT_GLOBAL_MAX) { 1425 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 1426 info->nz_used = irecv[0]; 1427 info->nz_allocated = irecv[1]; 1428 info->nz_unneeded = irecv[2]; 1429 info->memory = irecv[3]; 1430 info->mallocs = irecv[4]; 1431 } else if (flag == MAT_GLOBAL_SUM) { 1432 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 1433 info->nz_used = irecv[0]; 1434 info->nz_allocated = irecv[1]; 1435 info->nz_unneeded = irecv[2]; 1436 info->memory = irecv[3]; 1437 info->mallocs = irecv[4]; 1438 } 1439 info->rows_global = (double)a->M; 1440 info->columns_global = (double)a->N; 1441 info->rows_local = (double)a->m; 1442 info->columns_local = (double)a->N; 1443 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1444 info->fill_ratio_needed = 0; 1445 info->factor_mallocs = 0; 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNC__ 1450 #define __FUNC__ "MatSetOption_MPIBAIJ" 1451 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1452 { 1453 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1454 1455 PetscFunctionBegin; 1456 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1457 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1458 op == MAT_COLUMNS_UNSORTED || 1459 op == MAT_COLUMNS_SORTED || 1460 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1461 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1462 MatSetOption(a->A,op); 1463 MatSetOption(a->B,op); 1464 } else if (op == MAT_ROW_ORIENTED) { 1465 a->roworiented = 1; 1466 MatSetOption(a->A,op); 1467 MatSetOption(a->B,op); 1468 } else if (op == MAT_ROWS_SORTED || 1469 op == MAT_ROWS_UNSORTED || 1470 op == MAT_SYMMETRIC || 1471 op == MAT_STRUCTURALLY_SYMMETRIC || 1472 op == MAT_YES_NEW_DIAGONALS) 1473 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1474 else if (op == MAT_COLUMN_ORIENTED) { 1475 a->roworiented = 0; 1476 MatSetOption(a->A,op); 1477 MatSetOption(a->B,op); 1478 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1479 a->donotstash = 1; 1480 } else if (op == MAT_NO_NEW_DIAGONALS) { 1481 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1482 } else { 1483 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1484 } 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNC__ 1489 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1490 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1491 { 1492 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1493 Mat_SeqBAIJ *Aloc; 1494 Mat B; 1495 int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 1496 int bs=baij->bs,mbs=baij->mbs; 1497 Scalar *a; 1498 1499 PetscFunctionBegin; 1500 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1501 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1502 CHKERRQ(ierr); 1503 1504 /* copy over the A part */ 1505 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1506 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1507 row = baij->rstart; 1508 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1509 1510 for ( i=0; i<mbs; i++ ) { 1511 rvals[0] = bs*(baij->rstart + i); 1512 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1513 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1514 col = (baij->cstart+aj[j])*bs; 1515 for (k=0; k<bs; k++ ) { 1516 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1517 col++; a += bs; 1518 } 1519 } 1520 } 1521 /* copy over the B part */ 1522 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1523 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1524 row = baij->rstart*bs; 1525 for ( i=0; i<mbs; i++ ) { 1526 rvals[0] = bs*(baij->rstart + i); 1527 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1528 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1529 col = baij->garray[aj[j]]*bs; 1530 for (k=0; k<bs; k++ ) { 1531 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1532 col++; a += bs; 1533 } 1534 } 1535 } 1536 PetscFree(rvals); 1537 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1538 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1539 1540 if (matout != PETSC_NULL) { 1541 *matout = B; 1542 } else { 1543 /* This isn't really an in-place transpose .... but free data structures from baij */ 1544 PetscFree(baij->rowners); 1545 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1546 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1547 if (baij->colmap) PetscFree(baij->colmap); 1548 if (baij->garray) PetscFree(baij->garray); 1549 if (baij->lvec) VecDestroy(baij->lvec); 1550 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1551 PetscFree(baij); 1552 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1553 PetscHeaderDestroy(B); 1554 } 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #undef __FUNC__ 1559 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1560 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1561 { 1562 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1563 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1564 int ierr,s1,s2,s3; 1565 1566 PetscFunctionBegin; 1567 if (ll) { 1568 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1569 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1570 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1571 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1572 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1573 } 1574 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNC__ 1579 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1580 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1581 { 1582 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1583 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1584 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1585 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1586 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1587 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1588 MPI_Comm comm = A->comm; 1589 MPI_Request *send_waits,*recv_waits; 1590 MPI_Status recv_status,*send_status; 1591 IS istmp; 1592 1593 PetscFunctionBegin; 1594 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1595 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1596 1597 /* first count number of contributors to each processor */ 1598 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1599 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1600 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1601 for ( i=0; i<N; i++ ) { 1602 idx = rows[i]; 1603 found = 0; 1604 for ( j=0; j<size; j++ ) { 1605 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1606 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1607 } 1608 } 1609 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1610 } 1611 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1612 1613 /* inform other processors of number of messages and max length*/ 1614 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1615 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1616 nrecvs = work[rank]; 1617 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1618 nmax = work[rank]; 1619 PetscFree(work); 1620 1621 /* post receives: */ 1622 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1623 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1624 for ( i=0; i<nrecvs; i++ ) { 1625 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1626 } 1627 1628 /* do sends: 1629 1) starts[i] gives the starting index in svalues for stuff going to 1630 the ith processor 1631 */ 1632 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1633 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1634 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1635 starts[0] = 0; 1636 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1637 for ( i=0; i<N; i++ ) { 1638 svalues[starts[owner[i]]++] = rows[i]; 1639 } 1640 ISRestoreIndices(is,&rows); 1641 1642 starts[0] = 0; 1643 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1644 count = 0; 1645 for ( i=0; i<size; i++ ) { 1646 if (procs[i]) { 1647 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1648 } 1649 } 1650 PetscFree(starts); 1651 1652 base = owners[rank]*bs; 1653 1654 /* wait on receives */ 1655 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1656 source = lens + nrecvs; 1657 count = nrecvs; slen = 0; 1658 while (count) { 1659 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1660 /* unpack receives into our local space */ 1661 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1662 source[imdex] = recv_status.MPI_SOURCE; 1663 lens[imdex] = n; 1664 slen += n; 1665 count--; 1666 } 1667 PetscFree(recv_waits); 1668 1669 /* move the data into the send scatter */ 1670 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1671 count = 0; 1672 for ( i=0; i<nrecvs; i++ ) { 1673 values = rvalues + i*nmax; 1674 for ( j=0; j<lens[i]; j++ ) { 1675 lrows[count++] = values[j] - base; 1676 } 1677 } 1678 PetscFree(rvalues); PetscFree(lens); 1679 PetscFree(owner); PetscFree(nprocs); 1680 1681 /* actually zap the local rows */ 1682 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1683 PLogObjectParent(A,istmp); 1684 1685 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1686 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1687 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1688 1689 if (diag) { 1690 for ( i = 0; i < slen; i++ ) { 1691 row = lrows[i] + rstart_bs; 1692 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1693 } 1694 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1695 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1696 } 1697 PetscFree(lrows); 1698 1699 /* wait on sends */ 1700 if (nsends) { 1701 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1703 PetscFree(send_status); 1704 } 1705 PetscFree(send_waits); PetscFree(svalues); 1706 1707 PetscFunctionReturn(0); 1708 } 1709 extern int MatPrintHelp_SeqBAIJ(Mat); 1710 #undef __FUNC__ 1711 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1712 int MatPrintHelp_MPIBAIJ(Mat A) 1713 { 1714 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1715 int ierr; 1716 1717 PetscFunctionBegin; 1718 if (!a->rank) { 1719 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1720 } 1721 PetscFunctionReturn(0); 1722 } 1723 1724 #undef __FUNC__ 1725 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1726 int MatSetUnfactored_MPIBAIJ(Mat A) 1727 { 1728 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1729 int ierr; 1730 1731 PetscFunctionBegin; 1732 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1733 PetscFunctionReturn(0); 1734 } 1735 1736 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1737 1738 /* -------------------------------------------------------------------*/ 1739 static struct _MatOps MatOps = { 1740 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1741 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1742 0,0,0,0, 1743 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1744 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1745 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1746 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1747 0,0,0,MatGetSize_MPIBAIJ, 1748 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1749 0,0,MatConvertSameType_MPIBAIJ,0,0, 1750 0,0,0,MatGetSubMatrices_MPIBAIJ, 1751 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1752 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1753 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1754 1755 1756 #undef __FUNC__ 1757 #define __FUNC__ "MatCreateMPIBAIJ" 1758 /*@C 1759 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1760 (block compressed row). For good matrix assembly performance 1761 the user should preallocate the matrix storage by setting the parameters 1762 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1763 performance can be increased by more than a factor of 50. 1764 1765 Input Parameters: 1766 . comm - MPI communicator 1767 . bs - size of blockk 1768 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1769 This value should be the same as the local size used in creating the 1770 y vector for the matrix-vector product y = Ax. 1771 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1772 This value should be the same as the local size used in creating the 1773 x vector for the matrix-vector product y = Ax. 1774 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1775 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1776 . d_nz - number of block nonzeros per block row in diagonal portion of local 1777 submatrix (same for all local rows) 1778 . d_nzz - array containing the number of block nonzeros in the various block rows 1779 of the in diagonal portion of the local (possibly different for each block 1780 row) or PETSC_NULL. You must leave room for the diagonal entry even if 1781 it is zero. 1782 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1783 submatrix (same for all local rows). 1784 . o_nzz - array containing the number of nonzeros in the various block rows of the 1785 off-diagonal portion of the local submatrix (possibly different for 1786 each block row) or PETSC_NULL. 1787 1788 Output Parameter: 1789 . A - the matrix 1790 1791 Notes: 1792 The user MUST specify either the local or global matrix dimensions 1793 (possibly both). 1794 1795 Storage Information: 1796 For a square global matrix we define each processor's diagonal portion 1797 to be its local rows and the corresponding columns (a square submatrix); 1798 each processor's off-diagonal portion encompasses the remainder of the 1799 local matrix (a rectangular submatrix). 1800 1801 The user can specify preallocated storage for the diagonal part of 1802 the local submatrix with either d_nz or d_nnz (not both). Set 1803 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1804 memory allocation. Likewise, specify preallocated storage for the 1805 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1806 1807 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1808 the figure below we depict these three local rows and all columns (0-11). 1809 1810 $ 0 1 2 3 4 5 6 7 8 9 10 11 1811 $ ------------------- 1812 $ row 3 | o o o d d d o o o o o o 1813 $ row 4 | o o o d d d o o o o o o 1814 $ row 5 | o o o d d d o o o o o o 1815 $ ------------------- 1816 $ 1817 1818 Thus, any entries in the d locations are stored in the d (diagonal) 1819 submatrix, and any entries in the o locations are stored in the 1820 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1821 stored simply in the MATSEQBAIJ format for compressed row storage. 1822 1823 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1824 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1825 In general, for PDE problems in which most nonzeros are near the diagonal, 1826 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1827 or you will get TERRIBLE performance; see the users' manual chapter on 1828 matrices. 1829 1830 .keywords: matrix, block, aij, compressed row, sparse, parallel 1831 1832 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 1833 @*/ 1834 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1835 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1836 { 1837 Mat B; 1838 Mat_MPIBAIJ *b; 1839 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 1840 1841 PetscFunctionBegin; 1842 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1843 1844 MPI_Comm_size(comm,&size); 1845 if (size == 1) { 1846 if (M == PETSC_DECIDE) M = m; 1847 if (N == PETSC_DECIDE) N = n; 1848 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1849 PetscFunctionReturn(0); 1850 } 1851 1852 *A = 0; 1853 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 1854 PLogObjectCreate(B); 1855 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1856 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1857 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1858 1859 B->destroy = MatDestroy_MPIBAIJ; 1860 B->view = MatView_MPIBAIJ; 1861 B->mapping = 0; 1862 B->factor = 0; 1863 B->assembled = PETSC_FALSE; 1864 1865 B->insertmode = NOT_SET_VALUES; 1866 MPI_Comm_rank(comm,&b->rank); 1867 MPI_Comm_size(comm,&b->size); 1868 1869 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1870 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1871 } 1872 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1873 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1874 } 1875 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1876 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1877 } 1878 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1879 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1880 1881 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1882 work[0] = m; work[1] = n; 1883 mbs = m/bs; nbs = n/bs; 1884 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1885 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1886 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1887 } 1888 if (m == PETSC_DECIDE) { 1889 Mbs = M/bs; 1890 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1891 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1892 m = mbs*bs; 1893 } 1894 if (n == PETSC_DECIDE) { 1895 Nbs = N/bs; 1896 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1897 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1898 n = nbs*bs; 1899 } 1900 if (mbs*bs != m || nbs*bs != n) { 1901 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1902 } 1903 1904 b->m = m; B->m = m; 1905 b->n = n; B->n = n; 1906 b->N = N; B->N = N; 1907 b->M = M; B->M = M; 1908 b->bs = bs; 1909 b->bs2 = bs*bs; 1910 b->mbs = mbs; 1911 b->nbs = nbs; 1912 b->Mbs = Mbs; 1913 b->Nbs = Nbs; 1914 1915 /* build local table of row and column ownerships */ 1916 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1917 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1918 b->cowners = b->rowners + b->size + 2; 1919 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1920 b->rowners[0] = 0; 1921 for ( i=2; i<=b->size; i++ ) { 1922 b->rowners[i] += b->rowners[i-1]; 1923 } 1924 b->rstart = b->rowners[b->rank]; 1925 b->rend = b->rowners[b->rank+1]; 1926 b->rstart_bs = b->rstart * bs; 1927 b->rend_bs = b->rend * bs; 1928 1929 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1930 b->cowners[0] = 0; 1931 for ( i=2; i<=b->size; i++ ) { 1932 b->cowners[i] += b->cowners[i-1]; 1933 } 1934 b->cstart = b->cowners[b->rank]; 1935 b->cend = b->cowners[b->rank+1]; 1936 b->cstart_bs = b->cstart * bs; 1937 b->cend_bs = b->cend * bs; 1938 1939 1940 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1941 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1942 PLogObjectParent(B,b->A); 1943 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1944 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1945 PLogObjectParent(B,b->B); 1946 1947 /* build cache for off array entries formed */ 1948 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1949 b->donotstash = 0; 1950 b->colmap = 0; 1951 b->garray = 0; 1952 b->roworiented = 1; 1953 1954 /* stuff used in block assembly */ 1955 b->barray = 0; 1956 1957 /* stuff used for matrix vector multiply */ 1958 b->lvec = 0; 1959 b->Mvctx = 0; 1960 1961 /* stuff for MatGetRow() */ 1962 b->rowindices = 0; 1963 b->rowvalues = 0; 1964 b->getrowactive = PETSC_FALSE; 1965 1966 /* hash table stuff */ 1967 b->ht = 0; 1968 b->hd = 0; 1969 b->ht_size = 0; 1970 b->ht_total_ct = 0; 1971 b->ht_insert_ct = 0; 1972 1973 *A = B; 1974 PetscFunctionReturn(0); 1975 } 1976 1977 #undef __FUNC__ 1978 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 1979 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 1980 { 1981 Mat mat; 1982 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 1983 int ierr, len=0, flg; 1984 1985 PetscFunctionBegin; 1986 *newmat = 0; 1987 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 1988 PLogObjectCreate(mat); 1989 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 1990 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1991 mat->destroy = MatDestroy_MPIBAIJ; 1992 mat->view = MatView_MPIBAIJ; 1993 mat->factor = matin->factor; 1994 mat->assembled = PETSC_TRUE; 1995 1996 a->m = mat->m = oldmat->m; 1997 a->n = mat->n = oldmat->n; 1998 a->M = mat->M = oldmat->M; 1999 a->N = mat->N = oldmat->N; 2000 2001 a->bs = oldmat->bs; 2002 a->bs2 = oldmat->bs2; 2003 a->mbs = oldmat->mbs; 2004 a->nbs = oldmat->nbs; 2005 a->Mbs = oldmat->Mbs; 2006 a->Nbs = oldmat->Nbs; 2007 2008 a->rstart = oldmat->rstart; 2009 a->rend = oldmat->rend; 2010 a->cstart = oldmat->cstart; 2011 a->cend = oldmat->cend; 2012 a->size = oldmat->size; 2013 a->rank = oldmat->rank; 2014 mat->insertmode = NOT_SET_VALUES; 2015 a->rowvalues = 0; 2016 a->getrowactive = PETSC_FALSE; 2017 a->barray = 0; 2018 2019 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2020 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2021 a->cowners = a->rowners + a->size + 2; 2022 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2023 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 2024 if (oldmat->colmap) { 2025 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2026 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2027 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2028 } else a->colmap = 0; 2029 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2030 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2031 PLogObjectMemory(mat,len*sizeof(int)); 2032 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2033 } else a->garray = 0; 2034 2035 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2036 PLogObjectParent(mat,a->lvec); 2037 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2038 PLogObjectParent(mat,a->Mvctx); 2039 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 2040 PLogObjectParent(mat,a->A); 2041 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 2042 PLogObjectParent(mat,a->B); 2043 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2044 if (flg) { 2045 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2046 } 2047 *newmat = mat; 2048 PetscFunctionReturn(0); 2049 } 2050 2051 #include "sys.h" 2052 2053 #undef __FUNC__ 2054 #define __FUNC__ "MatLoad_MPIBAIJ" 2055 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2056 { 2057 Mat A; 2058 int i, nz, ierr, j,rstart, rend, fd; 2059 Scalar *vals,*buf; 2060 MPI_Comm comm = ((PetscObject)viewer)->comm; 2061 MPI_Status status; 2062 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2063 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2064 int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 2065 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2066 int dcount,kmax,k,nzcount,tmp; 2067 2068 PetscFunctionBegin; 2069 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2070 bs2 = bs*bs; 2071 2072 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2073 if (!rank) { 2074 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2075 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2076 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2077 if (header[3] < 0) { 2078 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2079 } 2080 } 2081 2082 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2083 M = header[1]; N = header[2]; 2084 2085 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2086 2087 /* 2088 This code adds extra rows to make sure the number of rows is 2089 divisible by the blocksize 2090 */ 2091 Mbs = M/bs; 2092 extra_rows = bs - M + bs*(Mbs); 2093 if (extra_rows == bs) extra_rows = 0; 2094 else Mbs++; 2095 if (extra_rows &&!rank) { 2096 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2097 } 2098 2099 /* determine ownership of all rows */ 2100 mbs = Mbs/size + ((Mbs % size) > rank); 2101 m = mbs * bs; 2102 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2103 browners = rowners + size + 1; 2104 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2105 rowners[0] = 0; 2106 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2107 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2108 rstart = rowners[rank]; 2109 rend = rowners[rank+1]; 2110 2111 /* distribute row lengths to all processors */ 2112 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2113 if (!rank) { 2114 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2115 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2116 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2117 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2118 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2119 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2120 PetscFree(sndcounts); 2121 } else { 2122 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2123 } 2124 2125 if (!rank) { 2126 /* calculate the number of nonzeros on each processor */ 2127 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2128 PetscMemzero(procsnz,size*sizeof(int)); 2129 for ( i=0; i<size; i++ ) { 2130 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2131 procsnz[i] += rowlengths[j]; 2132 } 2133 } 2134 PetscFree(rowlengths); 2135 2136 /* determine max buffer needed and allocate it */ 2137 maxnz = 0; 2138 for ( i=0; i<size; i++ ) { 2139 maxnz = PetscMax(maxnz,procsnz[i]); 2140 } 2141 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2142 2143 /* read in my part of the matrix column indices */ 2144 nz = procsnz[0]; 2145 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2146 mycols = ibuf; 2147 if (size == 1) nz -= extra_rows; 2148 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2149 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2150 2151 /* read in every ones (except the last) and ship off */ 2152 for ( i=1; i<size-1; i++ ) { 2153 nz = procsnz[i]; 2154 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2155 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2156 } 2157 /* read in the stuff for the last proc */ 2158 if ( size != 1 ) { 2159 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2160 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2161 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2162 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2163 } 2164 PetscFree(cols); 2165 } else { 2166 /* determine buffer space needed for message */ 2167 nz = 0; 2168 for ( i=0; i<m; i++ ) { 2169 nz += locrowlens[i]; 2170 } 2171 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2172 mycols = ibuf; 2173 /* receive message of column indices*/ 2174 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2175 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2176 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2177 } 2178 2179 /* loop over local rows, determining number of off diagonal entries */ 2180 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2181 odlens = dlens + (rend-rstart); 2182 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2183 PetscMemzero(mask,3*Mbs*sizeof(int)); 2184 masked1 = mask + Mbs; 2185 masked2 = masked1 + Mbs; 2186 rowcount = 0; nzcount = 0; 2187 for ( i=0; i<mbs; i++ ) { 2188 dcount = 0; 2189 odcount = 0; 2190 for ( j=0; j<bs; j++ ) { 2191 kmax = locrowlens[rowcount]; 2192 for ( k=0; k<kmax; k++ ) { 2193 tmp = mycols[nzcount++]/bs; 2194 if (!mask[tmp]) { 2195 mask[tmp] = 1; 2196 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2197 else masked1[dcount++] = tmp; 2198 } 2199 } 2200 rowcount++; 2201 } 2202 2203 dlens[i] = dcount; 2204 odlens[i] = odcount; 2205 2206 /* zero out the mask elements we set */ 2207 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2208 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2209 } 2210 2211 /* create our matrix */ 2212 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2213 CHKERRQ(ierr); 2214 A = *newmat; 2215 MatSetOption(A,MAT_COLUMNS_SORTED); 2216 2217 if (!rank) { 2218 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2219 /* read in my part of the matrix numerical values */ 2220 nz = procsnz[0]; 2221 vals = buf; 2222 mycols = ibuf; 2223 if (size == 1) nz -= extra_rows; 2224 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2225 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2226 2227 /* insert into matrix */ 2228 jj = rstart*bs; 2229 for ( i=0; i<m; i++ ) { 2230 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2231 mycols += locrowlens[i]; 2232 vals += locrowlens[i]; 2233 jj++; 2234 } 2235 /* read in other processors (except the last one) and ship out */ 2236 for ( i=1; i<size-1; i++ ) { 2237 nz = procsnz[i]; 2238 vals = buf; 2239 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2240 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2241 } 2242 /* the last proc */ 2243 if ( size != 1 ){ 2244 nz = procsnz[i] - extra_rows; 2245 vals = buf; 2246 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2247 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2248 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2249 } 2250 PetscFree(procsnz); 2251 } else { 2252 /* receive numeric values */ 2253 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2254 2255 /* receive message of values*/ 2256 vals = buf; 2257 mycols = ibuf; 2258 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2259 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2260 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2261 2262 /* insert into matrix */ 2263 jj = rstart*bs; 2264 for ( i=0; i<m; i++ ) { 2265 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2266 mycols += locrowlens[i]; 2267 vals += locrowlens[i]; 2268 jj++; 2269 } 2270 } 2271 PetscFree(locrowlens); 2272 PetscFree(buf); 2273 PetscFree(ibuf); 2274 PetscFree(rowners); 2275 PetscFree(dlens); 2276 PetscFree(mask); 2277 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2278 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2279 PetscFunctionReturn(0); 2280 } 2281 2282 2283