1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.102 1998/02/01 07:42:11 balay Exp balay $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 23 static int CreateColmap_MPIBAIJ_Private(Mat mat) 24 { 25 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 26 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27 int nbs = B->nbs,i,bs=B->bs;; 28 29 PetscFunctionBegin; 30 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 31 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 32 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 34 PetscFunctionReturn(0); 35 } 36 37 #define CHUNKSIZE 10 38 39 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 40 { \ 41 \ 42 brow = row/bs; \ 43 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44 rmax = aimax[brow]; nrow = ailen[brow]; \ 45 bcol = col/bs; \ 46 ridx = row % bs; cidx = col % bs; \ 47 low = 0; high = nrow; \ 48 while (high-low > 3) { \ 49 t = (low+high)/2; \ 50 if (rp[t] > bcol) high = t; \ 51 else low = t; \ 52 } \ 53 for ( _i=low; _i<high; _i++ ) { \ 54 if (rp[_i] > bcol) break; \ 55 if (rp[_i] == bcol) { \ 56 bap = ap + bs2*_i + bs*cidx + ridx; \ 57 if (addv == ADD_VALUES) *bap += value; \ 58 else *bap = value; \ 59 goto a_noinsert; \ 60 } \ 61 } \ 62 if (a->nonew == 1) goto a_noinsert; \ 63 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 64 if (nrow >= rmax) { \ 65 /* there is no extra room in row, therefore enlarge */ \ 66 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 67 Scalar *new_a; \ 68 \ 69 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 70 \ 71 /* malloc new storage space */ \ 72 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 73 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 74 new_j = (int *) (new_a + bs2*new_nz); \ 75 new_i = new_j + new_nz; \ 76 \ 77 /* copy over old data into new slots */ \ 78 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 79 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 80 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 81 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 82 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 83 len*sizeof(int)); \ 84 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 85 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 86 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 87 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 88 /* free up old matrix storage */ \ 89 PetscFree(a->a); \ 90 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 91 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 92 a->singlemalloc = 1; \ 93 \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 96 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 97 a->maxnz += bs2*CHUNKSIZE; \ 98 a->reallocs++; \ 99 a->nz++; \ 100 } \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for ( ii=N; ii>=_i; ii-- ) { \ 104 rp[ii+1] = rp[ii]; \ 105 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 106 } \ 107 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 a_noinsert:; \ 111 ailen[brow] = nrow; \ 112 } 113 114 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 brow = row/bs; \ 118 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119 rmax = bimax[brow]; nrow = bilen[brow]; \ 120 bcol = col/bs; \ 121 ridx = row % bs; cidx = col % bs; \ 122 low = 0; high = nrow; \ 123 while (high-low > 3) { \ 124 t = (low+high)/2; \ 125 if (rp[t] > bcol) high = t; \ 126 else low = t; \ 127 } \ 128 for ( _i=low; _i<high; _i++ ) { \ 129 if (rp[_i] > bcol) break; \ 130 if (rp[_i] == bcol) { \ 131 bap = ap + bs2*_i + bs*cidx + ridx; \ 132 if (addv == ADD_VALUES) *bap += value; \ 133 else *bap = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (b->nonew == 1) goto b_noinsert; \ 138 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + bs2*new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158 len*sizeof(int)); \ 159 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163 /* free up old matrix storage */ \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 172 b->maxnz += bs2*CHUNKSIZE; \ 173 b->reallocs++; \ 174 b->nz++; \ 175 } \ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for ( ii=N; ii>=_i; ii-- ) { \ 179 rp[ii+1] = rp[ii]; \ 180 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181 } \ 182 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNC__ 190 #define __FUNC__ "MatSetValues_MPIBAIJ" 191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 194 Scalar value; 195 int ierr,i,j,row,col; 196 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 197 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 198 int cend_orig=baij->cend_bs,bs=baij->bs; 199 200 /* Some Variables required in the macro */ 201 Mat A = baij->A; 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204 Scalar *aa=a->a; 205 206 Mat B = baij->B; 207 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209 Scalar *ba=b->a; 210 211 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212 int low,high,t,ridx,cidx,bs2=a->bs2; 213 Scalar *ap,*bap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 #if defined(USE_PETSC_BOPT_g) 218 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig){ 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 #if defined(USE_PETSC_BOPT_g) 231 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 240 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 241 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 244 B = baij->B; 245 b = (Mat_SeqBAIJ *) (B)->data; 246 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 247 ba=b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !baij->donotstash) { 257 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!baij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272 #undef __FUNC__ 273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277 Scalar *value,*barray=baij->barray; 278 int ierr,i,j,ii,jj,row,col,k,l; 279 int roworiented = baij->roworiented,rstart=baij->rstart ; 280 int rend=baij->rend,cstart=baij->cstart,stepval; 281 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282 283 if(!barray) { 284 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 285 } 286 287 if (roworiented) { 288 stepval = (n-1)*bs; 289 } else { 290 stepval = (m-1)*bs; 291 } 292 for ( i=0; i<m; i++ ) { 293 #if defined(USE_PETSC_BOPT_g) 294 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296 #endif 297 if (im[i] >= rstart && im[i] < rend) { 298 row = im[i] - rstart; 299 for ( j=0; j<n; j++ ) { 300 /* If NumCol = 1 then a copy is not required */ 301 if ((roworiented) && (n == 1)) { 302 barray = v + i*bs2; 303 } else if((!roworiented) && (m == 1)) { 304 barray = v + j*bs2; 305 } else { /* Here a copy is required */ 306 if (roworiented) { 307 value = v + i*(stepval+bs)*bs + j*bs; 308 } else { 309 value = v + j*(stepval+bs)*bs + i*bs; 310 } 311 for ( ii=0; ii<bs; ii++,value+=stepval ) { 312 for (jj=0; jj<bs; jj++ ) { 313 *barray++ = *value++; 314 } 315 } 316 barray -=bs2; 317 } 318 319 if (in[j] >= cstart && in[j] < cend){ 320 col = in[j] - cstart; 321 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322 } 323 #if defined(USE_PETSC_BOPT_g) 324 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326 #endif 327 else { 328 if (mat->was_assembled) { 329 if (!baij->colmap) { 330 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331 } 332 333 #if defined(USE_PETSC_BOPT_g) 334 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335 #endif 336 col = (baij->colmap[in[j]] - 1)/bs; 337 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339 col = in[j]; 340 } 341 } 342 else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (!baij->donotstash) { 348 if (roworiented ) { 349 row = im[i]*bs; 350 value = v + i*(stepval+bs)*bs; 351 for ( j=0; j<bs; j++,row++ ) { 352 for ( k=0; k<n; k++ ) { 353 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355 } 356 } 357 } 358 } else { 359 for ( j=0; j<n; j++ ) { 360 value = v + j*(stepval+bs)*bs + i*bs; 361 col = in[j]*bs; 362 for ( k=0; k<bs; k++,col++,value+=stepval) { 363 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365 } 366 } 367 } 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 #include <math.h> 375 #define HASH_KEY 0.6180339887 376 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 378 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 379 #undef __FUNC__ 380 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 381 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 382 { 383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 384 int ierr,i,j,row,col; 385 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 386 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 387 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 388 double tmp; 389 Scalar ** HD = baij->hd,value; 390 #if defined(USE_PETSC_BOPT_g) 391 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 392 #endif 393 394 PetscFunctionBegin; 395 396 for ( i=0; i<m; i++ ) { 397 #if defined(USE_PETSC_BOPT_g) 398 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 399 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 400 #endif 401 row = im[i]; 402 if (row >= rstart_orig && row < rend_orig) { 403 for ( j=0; j<n; j++ ) { 404 col = in[j]; 405 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 406 /* Look up into the Hash Table */ 407 key = (row/bs)*Nbs+(col/bs)+1; 408 h1 = HASH(size,key,tmp); 409 410 411 idx = h1; 412 #if defined(USE_PETSC_BOPT_g) 413 insert_ct++; 414 total_ct++; 415 if (HT[idx] != key) { 416 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 417 if (idx == size) { 418 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 419 if (idx == h1) { 420 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 421 } 422 } 423 } 424 #else 425 if (HT[idx] != key) { 426 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 427 if (idx == size) { 428 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 429 if (idx == h1) { 430 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 431 } 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else { 440 if (roworiented && !baij->donotstash) { 441 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 442 } else { 443 if (!baij->donotstash) { 444 row = im[i]; 445 for ( j=0; j<n; j++ ) { 446 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 447 } 448 } 449 } 450 } 451 } 452 #if defined(USE_PETSC_BOPT_g) 453 baij->ht_total_ct = total_ct; 454 baij->ht_insert_ct = insert_ct; 455 #endif 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNC__ 460 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 461 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 462 { 463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 464 int ierr,i,j,ii,jj,row,col,k,l; 465 int roworiented = baij->roworiented,rstart=baij->rstart ; 466 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 467 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 468 double tmp; 469 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 470 #if defined(USE_PETSC_BOPT_g) 471 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 472 #endif 473 474 PetscFunctionBegin; 475 476 if (roworiented) { 477 stepval = (n-1)*bs; 478 } else { 479 stepval = (m-1)*bs; 480 } 481 for ( i=0; i<m; i++ ) { 482 #if defined(USE_PETSC_BOPT_g) 483 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 484 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 485 #endif 486 row = im[i]; 487 v_t = v + i*bs2; 488 if (row >= rstart && row < rend) { 489 for ( j=0; j<n; j++ ) { 490 col = in[j]; 491 492 /* Look up into the Hash Table */ 493 key = row*Nbs+col+1; 494 h1 = HASH(size,key,tmp); 495 496 idx = h1; 497 #if defined(USE_PETSC_BOPT_g) 498 total_ct++; 499 insert_ct++; 500 if (HT[idx] != key) { 501 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 502 if (idx == size) { 503 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 504 if (idx == h1) { 505 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 506 } 507 } 508 } 509 #else 510 if (HT[idx] != key) { 511 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 512 if (idx == size) { 513 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 514 if (idx == h1) { 515 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 516 } 517 } 518 } 519 #endif 520 baij_a = HD[idx]; 521 if (roworiented) { 522 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 523 /* value = v + (i*(stepval+bs)+j)*bs; */ 524 value = v_t; 525 v_t += bs; 526 if (addv == ADD_VALUES) { 527 for ( ii=0; ii<bs; ii++,value+=stepval) { 528 for ( jj=ii; jj<bs2; jj+=bs ) { 529 baij_a[jj] += *value++; 530 } 531 } 532 } else { 533 for ( ii=0; ii<bs; ii++,value+=stepval) { 534 for ( jj=ii; jj<bs2; jj+=bs ) { 535 baij_a[jj] = *value++; 536 } 537 } 538 } 539 } else { 540 value = v + j*(stepval+bs)*bs + i*bs; 541 if (addv == ADD_VALUES) { 542 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 543 for ( jj=0; jj<bs; jj++ ) { 544 baij_a[jj] += *value++; 545 } 546 } 547 } else { 548 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 549 for ( jj=0; jj<bs; jj++ ) { 550 baij_a[jj] = *value++; 551 } 552 } 553 } 554 } 555 } 556 } else { 557 if (!baij->donotstash) { 558 if (roworiented ) { 559 row = im[i]*bs; 560 value = v + i*(stepval+bs)*bs; 561 for ( j=0; j<bs; j++,row++ ) { 562 for ( k=0; k<n; k++ ) { 563 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 564 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 565 } 566 } 567 } 568 } else { 569 for ( j=0; j<n; j++ ) { 570 value = v + j*(stepval+bs)*bs + i*bs; 571 col = in[j]*bs; 572 for ( k=0; k<bs; k++,col++,value+=stepval) { 573 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 574 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 } 580 } 581 } 582 #if defined(USE_PETSC_BOPT_g) 583 baij->ht_total_ct = total_ct; 584 baij->ht_insert_ct = insert_ct; 585 #endif 586 PetscFunctionReturn(0); 587 } 588 #undef __FUNC__ 589 #define __FUNC__ "MatGetValues_MPIBAIJ" 590 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 591 { 592 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 593 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 594 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 595 596 for ( i=0; i<m; i++ ) { 597 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 598 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 599 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 600 row = idxm[i] - bsrstart; 601 for ( j=0; j<n; j++ ) { 602 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 603 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 604 if (idxn[j] >= bscstart && idxn[j] < bscend){ 605 col = idxn[j] - bscstart; 606 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 607 } else { 608 if (!baij->colmap) { 609 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 610 } 611 if((baij->colmap[idxn[j]/bs]-1 < 0) || 612 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 613 else { 614 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 615 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 616 } 617 } 618 } 619 } else { 620 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 621 } 622 } 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNC__ 627 #define __FUNC__ "MatNorm_MPIBAIJ" 628 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 629 { 630 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 631 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 632 int ierr, i,bs2=baij->bs2; 633 double sum = 0.0; 634 Scalar *v; 635 636 PetscFunctionBegin; 637 if (baij->size == 1) { 638 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 639 } else { 640 if (type == NORM_FROBENIUS) { 641 v = amat->a; 642 for (i=0; i<amat->nz*bs2; i++ ) { 643 #if defined(USE_PETSC_COMPLEX) 644 sum += real(conj(*v)*(*v)); v++; 645 #else 646 sum += (*v)*(*v); v++; 647 #endif 648 } 649 v = bmat->a; 650 for (i=0; i<bmat->nz*bs2; i++ ) { 651 #if defined(USE_PETSC_COMPLEX) 652 sum += real(conj(*v)*(*v)); v++; 653 #else 654 sum += (*v)*(*v); v++; 655 #endif 656 } 657 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 658 *norm = sqrt(*norm); 659 } else { 660 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 661 } 662 } 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNC__ 667 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 668 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 671 MPI_Comm comm = mat->comm; 672 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 673 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 674 MPI_Request *send_waits,*recv_waits; 675 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 676 InsertMode addv; 677 Scalar *rvalues,*svalues; 678 679 PetscFunctionBegin; 680 /* make sure all processors are either in INSERTMODE or ADDMODE */ 681 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 682 if (addv == (ADD_VALUES|INSERT_VALUES)) { 683 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 684 } 685 mat->insertmode = addv; /* in case this processor had no cache */ 686 687 /* first count number of contributors to each processor */ 688 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 689 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 690 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 691 for ( i=0; i<baij->stash.n; i++ ) { 692 idx = baij->stash.idx[i]; 693 for ( j=0; j<size; j++ ) { 694 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 695 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 696 } 697 } 698 } 699 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 700 701 /* inform other processors of number of messages and max length*/ 702 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 703 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 704 nreceives = work[rank]; 705 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 706 nmax = work[rank]; 707 PetscFree(work); 708 709 /* post receives: 710 1) each message will consist of ordered pairs 711 (global index,value) we store the global index as a double 712 to simplify the message passing. 713 2) since we don't know how long each individual message is we 714 allocate the largest needed buffer for each receive. Potentially 715 this is a lot of wasted space. 716 717 718 This could be done better. 719 */ 720 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 721 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 722 for ( i=0; i<nreceives; i++ ) { 723 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 724 } 725 726 /* do sends: 727 1) starts[i] gives the starting index in svalues for stuff going to 728 the ith processor 729 */ 730 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 731 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 732 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 733 starts[0] = 0; 734 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 735 for ( i=0; i<baij->stash.n; i++ ) { 736 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 737 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 738 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 739 } 740 PetscFree(owner); 741 starts[0] = 0; 742 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 743 count = 0; 744 for ( i=0; i<size; i++ ) { 745 if (procs[i]) { 746 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 747 } 748 } 749 PetscFree(starts); PetscFree(nprocs); 750 751 /* Free cache space */ 752 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 753 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 754 755 baij->svalues = svalues; baij->rvalues = rvalues; 756 baij->nsends = nsends; baij->nrecvs = nreceives; 757 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 758 baij->rmax = nmax; 759 760 PetscFunctionReturn(0); 761 } 762 763 /* 764 Creates the hash table, and sets the table 765 This table is created only once. 766 If new entried need to be added to the matrix 767 then the hash table has to be destroyed and 768 recreated. 769 */ 770 #undef __FUNC__ 771 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 772 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 773 { 774 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 775 Mat A = baij->A, B=baij->B; 776 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 777 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 778 int size,bs2=baij->bs2,rstart=baij->rstart; 779 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 780 int *HT,key; 781 Scalar **HD; 782 double tmp; 783 #if defined(USE_PETSC_BOPT_g) 784 int ct=0,max=0; 785 #endif 786 787 PetscFunctionBegin; 788 baij->ht_size=(int)(factor*nz); 789 size = baij->ht_size; 790 791 if (baij->ht) { 792 PetscFunctionReturn(0); 793 } 794 795 /* Allocate Memory for Hash Table */ 796 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 797 baij->ht = (int*)(baij->hd + size); 798 HD = baij->hd; 799 HT = baij->ht; 800 801 802 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 803 804 805 /* Loop Over A */ 806 for ( i=0; i<a->mbs; i++ ) { 807 for ( j=ai[i]; j<ai[i+1]; j++ ) { 808 row = i+rstart; 809 col = aj[j]+cstart; 810 811 key = row*Nbs + col + 1; 812 h1 = HASH(size,key,tmp); 813 for ( k=0; k<size; k++ ){ 814 if (HT[(h1+k)%size] == 0.0) { 815 HT[(h1+k)%size] = key; 816 HD[(h1+k)%size] = a->a + j*bs2; 817 break; 818 #if defined(USE_PETSC_BOPT_g) 819 } else { 820 ct++; 821 #endif 822 } 823 } 824 #if defined(USE_PETSC_BOPT_g) 825 if (k> max) max = k; 826 #endif 827 } 828 } 829 /* Loop Over B */ 830 for ( i=0; i<b->mbs; i++ ) { 831 for ( j=bi[i]; j<bi[i+1]; j++ ) { 832 row = i+rstart; 833 col = garray[bj[j]]; 834 key = row*Nbs + col + 1; 835 h1 = HASH(size,key,tmp); 836 for ( k=0; k<size; k++ ){ 837 if (HT[(h1+k)%size] == 0.0) { 838 HT[(h1+k)%size] = key; 839 HD[(h1+k)%size] = b->a + j*bs2; 840 break; 841 #if defined(USE_PETSC_BOPT_g) 842 } else { 843 ct++; 844 #endif 845 } 846 } 847 #if defined(USE_PETSC_BOPT_g) 848 if (k> max) max = k; 849 #endif 850 } 851 } 852 853 /* Print Summary */ 854 #if defined(USE_PETSC_BOPT_g) 855 for ( i=0,j=0; i<size; i++) 856 if (HT[i]) {j++;} 857 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 858 (j== 0)? 0.0:((double)(ct+j))/j,max); 859 #endif 860 PetscFunctionReturn(0); 861 } 862 863 #undef __FUNC__ 864 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 865 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 866 { 867 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 868 MPI_Status *send_status,recv_status; 869 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 870 int bs=baij->bs,row,col,other_disassembled,flg; 871 Scalar *values,val; 872 InsertMode addv = mat->insertmode; 873 874 PetscFunctionBegin; 875 /* wait on receives */ 876 while (count) { 877 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 878 /* unpack receives into our local space */ 879 values = baij->rvalues + 3*imdex*baij->rmax; 880 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 881 n = n/3; 882 for ( i=0; i<n; i++ ) { 883 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 884 col = (int) PetscReal(values[3*i+1]); 885 val = values[3*i+2]; 886 if (col >= baij->cstart*bs && col < baij->cend*bs) { 887 col -= baij->cstart*bs; 888 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 889 } else { 890 if (mat->was_assembled) { 891 if (!baij->colmap) { 892 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 893 } 894 col = (baij->colmap[col/bs]) - 1 + col%bs; 895 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 896 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 897 col = (int) PetscReal(values[3*i+1]); 898 } 899 } 900 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 901 } 902 } 903 count--; 904 } 905 PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 906 907 /* wait on sends */ 908 if (baij->nsends) { 909 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 910 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 911 PetscFree(send_status); 912 } 913 PetscFree(baij->send_waits); PetscFree(baij->svalues); 914 915 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 916 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 917 918 /* determine if any processor has disassembled, if so we must 919 also disassemble ourselfs, in order that we may reassemble. */ 920 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 921 if (mat->was_assembled && !other_disassembled) { 922 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 923 } 924 925 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 926 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 927 } 928 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 930 931 #if defined(USE_PETSC_BOPT_g) 932 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 933 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 934 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 935 baij->ht_total_ct = 0; 936 baij->ht_insert_ct = 0; 937 } 938 #endif 939 ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr); 940 if (flg && !baij->ht && mode== MAT_FINAL_ASSEMBLY) { 941 double fact = 1.39; 942 ierr = OptionsGetDouble(PETSC_NULL,"-use_hash",&fact,&flg); CHKERRQ(ierr); 943 if (fact <= 1.0) fact = 1.39; 944 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Hash table Factor used %5.2f\n",fact); 945 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,fact); CHKERRQ(ierr); 946 mat->ops.setvalues = MatSetValues_MPIBAIJ_HT; 947 mat->ops.setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 948 } 949 950 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 951 PetscFunctionReturn(0); 952 } 953 954 #undef __FUNC__ 955 #define __FUNC__ "MatView_MPIBAIJ_Binary" 956 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 957 { 958 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 959 int ierr; 960 961 PetscFunctionBegin; 962 if (baij->size == 1) { 963 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 964 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNC__ 969 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 970 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 971 { 972 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 973 int ierr, format,rank,bs = baij->bs; 974 FILE *fd; 975 ViewerType vtype; 976 977 PetscFunctionBegin; 978 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 979 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 980 ierr = ViewerGetFormat(viewer,&format); 981 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 982 MatInfo info; 983 MPI_Comm_rank(mat->comm,&rank); 984 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 985 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 986 PetscSequentialPhaseBegin(mat->comm,1); 987 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 988 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 989 baij->bs,(int)info.memory); 990 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 991 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 992 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 993 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 994 fflush(fd); 995 PetscSequentialPhaseEnd(mat->comm,1); 996 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 997 PetscFunctionReturn(0); 998 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 999 PetscPrintf(mat->comm," block size is %d\n",bs); 1000 PetscFunctionReturn(0); 1001 } 1002 } 1003 1004 if (vtype == DRAW_VIEWER) { 1005 Draw draw; 1006 PetscTruth isnull; 1007 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 1008 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1009 } 1010 1011 if (vtype == ASCII_FILE_VIEWER) { 1012 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1013 PetscSequentialPhaseBegin(mat->comm,1); 1014 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 1015 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 1016 baij->cstart*bs,baij->cend*bs); 1017 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1018 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 1019 fflush(fd); 1020 PetscSequentialPhaseEnd(mat->comm,1); 1021 } else { 1022 int size = baij->size; 1023 rank = baij->rank; 1024 if (size == 1) { 1025 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1026 } else { 1027 /* assemble the entire matrix onto first processor. */ 1028 Mat A; 1029 Mat_SeqBAIJ *Aloc; 1030 int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 1031 int mbs=baij->mbs; 1032 Scalar *a; 1033 1034 if (!rank) { 1035 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1036 } else { 1037 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1038 } 1039 PLogObjectParent(mat,A); 1040 1041 /* copy over the A part */ 1042 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1043 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1044 row = baij->rstart; 1045 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1046 1047 for ( i=0; i<mbs; i++ ) { 1048 rvals[0] = bs*(baij->rstart + i); 1049 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1050 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1051 col = (baij->cstart+aj[j])*bs; 1052 for (k=0; k<bs; k++ ) { 1053 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1054 col++; a += bs; 1055 } 1056 } 1057 } 1058 /* copy over the B part */ 1059 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1060 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1061 row = baij->rstart*bs; 1062 for ( i=0; i<mbs; i++ ) { 1063 rvals[0] = bs*(baij->rstart + i); 1064 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1065 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1066 col = baij->garray[aj[j]]*bs; 1067 for (k=0; k<bs; k++ ) { 1068 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1069 col++; a += bs; 1070 } 1071 } 1072 } 1073 PetscFree(rvals); 1074 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1075 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1076 /* 1077 Everyone has to call to draw the matrix since the graphics waits are 1078 synchronized across all processors that share the Draw object 1079 */ 1080 if (!rank || vtype == DRAW_VIEWER) { 1081 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1082 } 1083 ierr = MatDestroy(A); CHKERRQ(ierr); 1084 } 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 1090 1091 #undef __FUNC__ 1092 #define __FUNC__ "MatView_MPIBAIJ" 1093 int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 1094 { 1095 Mat mat = (Mat) obj; 1096 int ierr; 1097 ViewerType vtype; 1098 1099 PetscFunctionBegin; 1100 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1101 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 1102 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 1103 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1104 } else if (vtype == BINARY_FILE_VIEWER) { 1105 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1106 } 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNC__ 1111 #define __FUNC__ "MatDestroy_MPIBAIJ" 1112 int MatDestroy_MPIBAIJ(PetscObject obj) 1113 { 1114 Mat mat = (Mat) obj; 1115 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1116 int ierr; 1117 1118 PetscFunctionBegin; 1119 #if defined(USE_PETSC_LOG) 1120 PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 1121 #endif 1122 1123 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1124 PetscFree(baij->rowners); 1125 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1126 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1127 if (baij->colmap) PetscFree(baij->colmap); 1128 if (baij->garray) PetscFree(baij->garray); 1129 if (baij->lvec) VecDestroy(baij->lvec); 1130 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1131 if (baij->rowvalues) PetscFree(baij->rowvalues); 1132 if (baij->barray) PetscFree(baij->barray); 1133 if (baij->hd) PetscFree(baij->hd); 1134 PetscFree(baij); 1135 PLogObjectDestroy(mat); 1136 PetscHeaderDestroy(mat); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNC__ 1141 #define __FUNC__ "MatMult_MPIBAIJ" 1142 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1143 { 1144 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1145 int ierr, nt; 1146 1147 PetscFunctionBegin; 1148 VecGetLocalSize_Fast(xx,nt); 1149 if (nt != a->n) { 1150 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1151 } 1152 VecGetLocalSize_Fast(yy,nt); 1153 if (nt != a->m) { 1154 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1155 } 1156 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1157 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 1158 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1159 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1160 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNC__ 1165 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1166 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1167 { 1168 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1169 int ierr; 1170 1171 PetscFunctionBegin; 1172 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1173 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1174 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1175 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNC__ 1180 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1181 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1182 { 1183 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1184 int ierr; 1185 1186 PetscFunctionBegin; 1187 /* do nondiagonal part */ 1188 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1189 /* send it on its way */ 1190 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1191 /* do local part */ 1192 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1193 /* receive remote parts: note this assumes the values are not actually */ 1194 /* inserted in yy until the next line, which is true for my implementation*/ 1195 /* but is not perhaps always true. */ 1196 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNC__ 1201 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1202 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1203 { 1204 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1205 int ierr; 1206 1207 PetscFunctionBegin; 1208 /* do nondiagonal part */ 1209 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1210 /* send it on its way */ 1211 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1212 /* do local part */ 1213 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1214 /* receive remote parts: note this assumes the values are not actually */ 1215 /* inserted in yy until the next line, which is true for my implementation*/ 1216 /* but is not perhaps always true. */ 1217 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1218 PetscFunctionReturn(0); 1219 } 1220 1221 /* 1222 This only works correctly for square matrices where the subblock A->A is the 1223 diagonal block 1224 */ 1225 #undef __FUNC__ 1226 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1227 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1228 { 1229 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1230 int ierr; 1231 1232 PetscFunctionBegin; 1233 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1234 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNC__ 1239 #define __FUNC__ "MatScale_MPIBAIJ" 1240 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1241 { 1242 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1243 int ierr; 1244 1245 PetscFunctionBegin; 1246 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1247 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNC__ 1252 #define __FUNC__ "MatGetSize_MPIBAIJ" 1253 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1254 { 1255 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1256 1257 PetscFunctionBegin; 1258 if (m) *m = mat->M; 1259 if (n) *n = mat->N; 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNC__ 1264 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1265 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1266 { 1267 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1268 1269 PetscFunctionBegin; 1270 *m = mat->m; *n = mat->N; 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #undef __FUNC__ 1275 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1276 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1277 { 1278 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1279 1280 PetscFunctionBegin; 1281 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1282 PetscFunctionReturn(0); 1283 } 1284 1285 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1286 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1287 1288 #undef __FUNC__ 1289 #define __FUNC__ "MatGetRow_MPIBAIJ" 1290 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1291 { 1292 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1293 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1294 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1295 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1296 int *cmap, *idx_p,cstart = mat->cstart; 1297 1298 PetscFunctionBegin; 1299 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1300 mat->getrowactive = PETSC_TRUE; 1301 1302 if (!mat->rowvalues && (idx || v)) { 1303 /* 1304 allocate enough space to hold information from the longest row. 1305 */ 1306 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1307 int max = 1,mbs = mat->mbs,tmp; 1308 for ( i=0; i<mbs; i++ ) { 1309 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1310 if (max < tmp) { max = tmp; } 1311 } 1312 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1313 CHKPTRQ(mat->rowvalues); 1314 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1315 } 1316 1317 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1318 lrow = row - brstart; 1319 1320 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1321 if (!v) {pvA = 0; pvB = 0;} 1322 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1323 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1324 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1325 nztot = nzA + nzB; 1326 1327 cmap = mat->garray; 1328 if (v || idx) { 1329 if (nztot) { 1330 /* Sort by increasing column numbers, assuming A and B already sorted */ 1331 int imark = -1; 1332 if (v) { 1333 *v = v_p = mat->rowvalues; 1334 for ( i=0; i<nzB; i++ ) { 1335 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1336 else break; 1337 } 1338 imark = i; 1339 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1340 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1341 } 1342 if (idx) { 1343 *idx = idx_p = mat->rowindices; 1344 if (imark > -1) { 1345 for ( i=0; i<imark; i++ ) { 1346 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1347 } 1348 } else { 1349 for ( i=0; i<nzB; i++ ) { 1350 if (cmap[cworkB[i]/bs] < cstart) 1351 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1352 else break; 1353 } 1354 imark = i; 1355 } 1356 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1357 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1358 } 1359 } else { 1360 if (idx) *idx = 0; 1361 if (v) *v = 0; 1362 } 1363 } 1364 *nz = nztot; 1365 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1366 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNC__ 1371 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1372 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1373 { 1374 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1375 1376 PetscFunctionBegin; 1377 if (baij->getrowactive == PETSC_FALSE) { 1378 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1379 } 1380 baij->getrowactive = PETSC_FALSE; 1381 PetscFunctionReturn(0); 1382 } 1383 1384 #undef __FUNC__ 1385 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1386 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1387 { 1388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1389 1390 PetscFunctionBegin; 1391 *bs = baij->bs; 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #undef __FUNC__ 1396 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1397 int MatZeroEntries_MPIBAIJ(Mat A) 1398 { 1399 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1400 int ierr; 1401 1402 PetscFunctionBegin; 1403 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1404 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNC__ 1409 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1410 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1411 { 1412 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1413 Mat A = a->A, B = a->B; 1414 int ierr; 1415 double isend[5], irecv[5]; 1416 1417 PetscFunctionBegin; 1418 info->block_size = (double)a->bs; 1419 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1420 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1421 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1422 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1423 if (flag == MAT_LOCAL) { 1424 info->nz_used = isend[0]; 1425 info->nz_allocated = isend[1]; 1426 info->nz_unneeded = isend[2]; 1427 info->memory = isend[3]; 1428 info->mallocs = isend[4]; 1429 } else if (flag == MAT_GLOBAL_MAX) { 1430 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 1431 info->nz_used = irecv[0]; 1432 info->nz_allocated = irecv[1]; 1433 info->nz_unneeded = irecv[2]; 1434 info->memory = irecv[3]; 1435 info->mallocs = irecv[4]; 1436 } else if (flag == MAT_GLOBAL_SUM) { 1437 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 1438 info->nz_used = irecv[0]; 1439 info->nz_allocated = irecv[1]; 1440 info->nz_unneeded = irecv[2]; 1441 info->memory = irecv[3]; 1442 info->mallocs = irecv[4]; 1443 } 1444 info->rows_global = (double)a->M; 1445 info->columns_global = (double)a->N; 1446 info->rows_local = (double)a->m; 1447 info->columns_local = (double)a->N; 1448 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1449 info->fill_ratio_needed = 0; 1450 info->factor_mallocs = 0; 1451 PetscFunctionReturn(0); 1452 } 1453 1454 #undef __FUNC__ 1455 #define __FUNC__ "MatSetOption_MPIBAIJ" 1456 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1457 { 1458 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1459 1460 PetscFunctionBegin; 1461 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1462 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1463 op == MAT_COLUMNS_UNSORTED || 1464 op == MAT_COLUMNS_SORTED || 1465 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1466 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1467 MatSetOption(a->A,op); 1468 MatSetOption(a->B,op); 1469 } else if (op == MAT_ROW_ORIENTED) { 1470 a->roworiented = 1; 1471 MatSetOption(a->A,op); 1472 MatSetOption(a->B,op); 1473 } else if (op == MAT_ROWS_SORTED || 1474 op == MAT_ROWS_UNSORTED || 1475 op == MAT_SYMMETRIC || 1476 op == MAT_STRUCTURALLY_SYMMETRIC || 1477 op == MAT_YES_NEW_DIAGONALS) 1478 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1479 else if (op == MAT_COLUMN_ORIENTED) { 1480 a->roworiented = 0; 1481 MatSetOption(a->A,op); 1482 MatSetOption(a->B,op); 1483 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1484 a->donotstash = 1; 1485 } else if (op == MAT_NO_NEW_DIAGONALS) { 1486 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1487 } else { 1488 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1489 } 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNC__ 1494 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1495 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1496 { 1497 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1498 Mat_SeqBAIJ *Aloc; 1499 Mat B; 1500 int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 1501 int bs=baij->bs,mbs=baij->mbs; 1502 Scalar *a; 1503 1504 PetscFunctionBegin; 1505 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1506 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1507 CHKERRQ(ierr); 1508 1509 /* copy over the A part */ 1510 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1511 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1512 row = baij->rstart; 1513 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1514 1515 for ( i=0; i<mbs; i++ ) { 1516 rvals[0] = bs*(baij->rstart + i); 1517 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1518 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1519 col = (baij->cstart+aj[j])*bs; 1520 for (k=0; k<bs; k++ ) { 1521 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1522 col++; a += bs; 1523 } 1524 } 1525 } 1526 /* copy over the B part */ 1527 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1528 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1529 row = baij->rstart*bs; 1530 for ( i=0; i<mbs; i++ ) { 1531 rvals[0] = bs*(baij->rstart + i); 1532 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1533 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1534 col = baij->garray[aj[j]]*bs; 1535 for (k=0; k<bs; k++ ) { 1536 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1537 col++; a += bs; 1538 } 1539 } 1540 } 1541 PetscFree(rvals); 1542 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1543 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1544 1545 if (matout != PETSC_NULL) { 1546 *matout = B; 1547 } else { 1548 /* This isn't really an in-place transpose .... but free data structures from baij */ 1549 PetscFree(baij->rowners); 1550 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1551 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1552 if (baij->colmap) PetscFree(baij->colmap); 1553 if (baij->garray) PetscFree(baij->garray); 1554 if (baij->lvec) VecDestroy(baij->lvec); 1555 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1556 PetscFree(baij); 1557 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1558 PetscHeaderDestroy(B); 1559 } 1560 PetscFunctionReturn(0); 1561 } 1562 1563 #undef __FUNC__ 1564 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1565 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1566 { 1567 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1568 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1569 int ierr,s1,s2,s3; 1570 1571 PetscFunctionBegin; 1572 if (ll) { 1573 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1574 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1575 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1576 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1577 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1578 } 1579 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1580 PetscFunctionReturn(0); 1581 } 1582 1583 #undef __FUNC__ 1584 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1585 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1586 { 1587 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1588 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1589 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1590 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1591 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1592 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1593 MPI_Comm comm = A->comm; 1594 MPI_Request *send_waits,*recv_waits; 1595 MPI_Status recv_status,*send_status; 1596 IS istmp; 1597 1598 PetscFunctionBegin; 1599 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1600 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1601 1602 /* first count number of contributors to each processor */ 1603 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1604 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1605 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1606 for ( i=0; i<N; i++ ) { 1607 idx = rows[i]; 1608 found = 0; 1609 for ( j=0; j<size; j++ ) { 1610 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1611 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1612 } 1613 } 1614 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1615 } 1616 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1617 1618 /* inform other processors of number of messages and max length*/ 1619 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1620 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1621 nrecvs = work[rank]; 1622 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1623 nmax = work[rank]; 1624 PetscFree(work); 1625 1626 /* post receives: */ 1627 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1628 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1629 for ( i=0; i<nrecvs; i++ ) { 1630 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1631 } 1632 1633 /* do sends: 1634 1) starts[i] gives the starting index in svalues for stuff going to 1635 the ith processor 1636 */ 1637 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1638 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1639 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1640 starts[0] = 0; 1641 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1642 for ( i=0; i<N; i++ ) { 1643 svalues[starts[owner[i]]++] = rows[i]; 1644 } 1645 ISRestoreIndices(is,&rows); 1646 1647 starts[0] = 0; 1648 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1649 count = 0; 1650 for ( i=0; i<size; i++ ) { 1651 if (procs[i]) { 1652 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1653 } 1654 } 1655 PetscFree(starts); 1656 1657 base = owners[rank]*bs; 1658 1659 /* wait on receives */ 1660 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1661 source = lens + nrecvs; 1662 count = nrecvs; slen = 0; 1663 while (count) { 1664 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1665 /* unpack receives into our local space */ 1666 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1667 source[imdex] = recv_status.MPI_SOURCE; 1668 lens[imdex] = n; 1669 slen += n; 1670 count--; 1671 } 1672 PetscFree(recv_waits); 1673 1674 /* move the data into the send scatter */ 1675 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1676 count = 0; 1677 for ( i=0; i<nrecvs; i++ ) { 1678 values = rvalues + i*nmax; 1679 for ( j=0; j<lens[i]; j++ ) { 1680 lrows[count++] = values[j] - base; 1681 } 1682 } 1683 PetscFree(rvalues); PetscFree(lens); 1684 PetscFree(owner); PetscFree(nprocs); 1685 1686 /* actually zap the local rows */ 1687 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1688 PLogObjectParent(A,istmp); 1689 1690 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1691 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1692 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1693 1694 if (diag) { 1695 for ( i = 0; i < slen; i++ ) { 1696 row = lrows[i] + rstart_bs; 1697 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1698 } 1699 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1700 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1701 } 1702 PetscFree(lrows); 1703 1704 /* wait on sends */ 1705 if (nsends) { 1706 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1707 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1708 PetscFree(send_status); 1709 } 1710 PetscFree(send_waits); PetscFree(svalues); 1711 1712 PetscFunctionReturn(0); 1713 } 1714 extern int MatPrintHelp_SeqBAIJ(Mat); 1715 #undef __FUNC__ 1716 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1717 int MatPrintHelp_MPIBAIJ(Mat A) 1718 { 1719 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1720 int ierr; 1721 1722 PetscFunctionBegin; 1723 if (!a->rank) { 1724 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1725 } 1726 PetscFunctionReturn(0); 1727 } 1728 1729 #undef __FUNC__ 1730 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1731 int MatSetUnfactored_MPIBAIJ(Mat A) 1732 { 1733 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1734 int ierr; 1735 1736 PetscFunctionBegin; 1737 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1738 PetscFunctionReturn(0); 1739 } 1740 1741 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1742 1743 /* -------------------------------------------------------------------*/ 1744 static struct _MatOps MatOps = { 1745 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1746 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1747 0,0,0,0, 1748 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1749 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1750 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1751 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1752 0,0,0,MatGetSize_MPIBAIJ, 1753 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1754 0,0,MatConvertSameType_MPIBAIJ,0,0, 1755 0,0,0,MatGetSubMatrices_MPIBAIJ, 1756 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1757 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1758 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1759 1760 1761 #undef __FUNC__ 1762 #define __FUNC__ "MatCreateMPIBAIJ" 1763 /*@C 1764 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1765 (block compressed row). For good matrix assembly performance 1766 the user should preallocate the matrix storage by setting the parameters 1767 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1768 performance can be increased by more than a factor of 50. 1769 1770 Input Parameters: 1771 . comm - MPI communicator 1772 . bs - size of blockk 1773 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1774 This value should be the same as the local size used in creating the 1775 y vector for the matrix-vector product y = Ax. 1776 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1777 This value should be the same as the local size used in creating the 1778 x vector for the matrix-vector product y = Ax. 1779 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1780 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1781 . d_nz - number of block nonzeros per block row in diagonal portion of local 1782 submatrix (same for all local rows) 1783 . d_nzz - array containing the number of block nonzeros in the various block rows 1784 of the in diagonal portion of the local (possibly different for each block 1785 row) or PETSC_NULL. You must leave room for the diagonal entry even if 1786 it is zero. 1787 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1788 submatrix (same for all local rows). 1789 . o_nzz - array containing the number of nonzeros in the various block rows of the 1790 off-diagonal portion of the local submatrix (possibly different for 1791 each block row) or PETSC_NULL. 1792 1793 Output Parameter: 1794 . A - the matrix 1795 1796 Notes: 1797 The user MUST specify either the local or global matrix dimensions 1798 (possibly both). 1799 1800 Storage Information: 1801 For a square global matrix we define each processor's diagonal portion 1802 to be its local rows and the corresponding columns (a square submatrix); 1803 each processor's off-diagonal portion encompasses the remainder of the 1804 local matrix (a rectangular submatrix). 1805 1806 The user can specify preallocated storage for the diagonal part of 1807 the local submatrix with either d_nz or d_nnz (not both). Set 1808 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1809 memory allocation. Likewise, specify preallocated storage for the 1810 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1811 1812 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1813 the figure below we depict these three local rows and all columns (0-11). 1814 1815 $ 0 1 2 3 4 5 6 7 8 9 10 11 1816 $ ------------------- 1817 $ row 3 | o o o d d d o o o o o o 1818 $ row 4 | o o o d d d o o o o o o 1819 $ row 5 | o o o d d d o o o o o o 1820 $ ------------------- 1821 $ 1822 1823 Thus, any entries in the d locations are stored in the d (diagonal) 1824 submatrix, and any entries in the o locations are stored in the 1825 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1826 stored simply in the MATSEQBAIJ format for compressed row storage. 1827 1828 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1829 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1830 In general, for PDE problems in which most nonzeros are near the diagonal, 1831 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1832 or you will get TERRIBLE performance; see the users' manual chapter on 1833 matrices. 1834 1835 .keywords: matrix, block, aij, compressed row, sparse, parallel 1836 1837 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 1838 @*/ 1839 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1840 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1841 { 1842 Mat B; 1843 Mat_MPIBAIJ *b; 1844 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 1845 1846 PetscFunctionBegin; 1847 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1848 1849 MPI_Comm_size(comm,&size); 1850 if (size == 1) { 1851 if (M == PETSC_DECIDE) M = m; 1852 if (N == PETSC_DECIDE) N = n; 1853 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1854 PetscFunctionReturn(0); 1855 } 1856 1857 *A = 0; 1858 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 1859 PLogObjectCreate(B); 1860 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1861 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1862 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1863 1864 B->destroy = MatDestroy_MPIBAIJ; 1865 B->view = MatView_MPIBAIJ; 1866 B->mapping = 0; 1867 B->factor = 0; 1868 B->assembled = PETSC_FALSE; 1869 1870 B->insertmode = NOT_SET_VALUES; 1871 MPI_Comm_rank(comm,&b->rank); 1872 MPI_Comm_size(comm,&b->size); 1873 1874 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1875 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1876 } 1877 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1878 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1879 } 1880 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1881 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1882 } 1883 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1884 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1885 1886 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1887 work[0] = m; work[1] = n; 1888 mbs = m/bs; nbs = n/bs; 1889 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1890 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1891 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1892 } 1893 if (m == PETSC_DECIDE) { 1894 Mbs = M/bs; 1895 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1896 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1897 m = mbs*bs; 1898 } 1899 if (n == PETSC_DECIDE) { 1900 Nbs = N/bs; 1901 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1902 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1903 n = nbs*bs; 1904 } 1905 if (mbs*bs != m || nbs*bs != n) { 1906 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1907 } 1908 1909 b->m = m; B->m = m; 1910 b->n = n; B->n = n; 1911 b->N = N; B->N = N; 1912 b->M = M; B->M = M; 1913 b->bs = bs; 1914 b->bs2 = bs*bs; 1915 b->mbs = mbs; 1916 b->nbs = nbs; 1917 b->Mbs = Mbs; 1918 b->Nbs = Nbs; 1919 1920 /* build local table of row and column ownerships */ 1921 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1922 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1923 b->cowners = b->rowners + b->size + 2; 1924 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1925 b->rowners[0] = 0; 1926 for ( i=2; i<=b->size; i++ ) { 1927 b->rowners[i] += b->rowners[i-1]; 1928 } 1929 b->rstart = b->rowners[b->rank]; 1930 b->rend = b->rowners[b->rank+1]; 1931 b->rstart_bs = b->rstart * bs; 1932 b->rend_bs = b->rend * bs; 1933 1934 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1935 b->cowners[0] = 0; 1936 for ( i=2; i<=b->size; i++ ) { 1937 b->cowners[i] += b->cowners[i-1]; 1938 } 1939 b->cstart = b->cowners[b->rank]; 1940 b->cend = b->cowners[b->rank+1]; 1941 b->cstart_bs = b->cstart * bs; 1942 b->cend_bs = b->cend * bs; 1943 1944 1945 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1946 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1947 PLogObjectParent(B,b->A); 1948 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1949 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1950 PLogObjectParent(B,b->B); 1951 1952 /* build cache for off array entries formed */ 1953 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1954 b->donotstash = 0; 1955 b->colmap = 0; 1956 b->garray = 0; 1957 b->roworiented = 1; 1958 1959 /* stuff used in block assembly */ 1960 b->barray = 0; 1961 1962 /* stuff used for matrix vector multiply */ 1963 b->lvec = 0; 1964 b->Mvctx = 0; 1965 1966 /* stuff for MatGetRow() */ 1967 b->rowindices = 0; 1968 b->rowvalues = 0; 1969 b->getrowactive = PETSC_FALSE; 1970 1971 /* hash table stuff */ 1972 b->ht = 0; 1973 b->hd = 0; 1974 b->ht_size = 0; 1975 b->ht_total_ct = 0; 1976 b->ht_insert_ct = 0; 1977 1978 *A = B; 1979 PetscFunctionReturn(0); 1980 } 1981 1982 #undef __FUNC__ 1983 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 1984 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 1985 { 1986 Mat mat; 1987 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 1988 int ierr, len=0, flg; 1989 1990 PetscFunctionBegin; 1991 *newmat = 0; 1992 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 1993 PLogObjectCreate(mat); 1994 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 1995 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1996 mat->destroy = MatDestroy_MPIBAIJ; 1997 mat->view = MatView_MPIBAIJ; 1998 mat->factor = matin->factor; 1999 mat->assembled = PETSC_TRUE; 2000 2001 a->m = mat->m = oldmat->m; 2002 a->n = mat->n = oldmat->n; 2003 a->M = mat->M = oldmat->M; 2004 a->N = mat->N = oldmat->N; 2005 2006 a->bs = oldmat->bs; 2007 a->bs2 = oldmat->bs2; 2008 a->mbs = oldmat->mbs; 2009 a->nbs = oldmat->nbs; 2010 a->Mbs = oldmat->Mbs; 2011 a->Nbs = oldmat->Nbs; 2012 2013 a->rstart = oldmat->rstart; 2014 a->rend = oldmat->rend; 2015 a->cstart = oldmat->cstart; 2016 a->cend = oldmat->cend; 2017 a->size = oldmat->size; 2018 a->rank = oldmat->rank; 2019 mat->insertmode = NOT_SET_VALUES; 2020 a->rowvalues = 0; 2021 a->getrowactive = PETSC_FALSE; 2022 a->barray = 0; 2023 2024 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2025 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2026 a->cowners = a->rowners + a->size + 2; 2027 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2028 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 2029 if (oldmat->colmap) { 2030 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2031 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2032 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2033 } else a->colmap = 0; 2034 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2035 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2036 PLogObjectMemory(mat,len*sizeof(int)); 2037 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2038 } else a->garray = 0; 2039 2040 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2041 PLogObjectParent(mat,a->lvec); 2042 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2043 PLogObjectParent(mat,a->Mvctx); 2044 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 2045 PLogObjectParent(mat,a->A); 2046 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 2047 PLogObjectParent(mat,a->B); 2048 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2049 if (flg) { 2050 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2051 } 2052 *newmat = mat; 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #include "sys.h" 2057 2058 #undef __FUNC__ 2059 #define __FUNC__ "MatLoad_MPIBAIJ" 2060 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2061 { 2062 Mat A; 2063 int i, nz, ierr, j,rstart, rend, fd; 2064 Scalar *vals,*buf; 2065 MPI_Comm comm = ((PetscObject)viewer)->comm; 2066 MPI_Status status; 2067 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2068 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2069 int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 2070 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2071 int dcount,kmax,k,nzcount,tmp; 2072 2073 PetscFunctionBegin; 2074 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2075 bs2 = bs*bs; 2076 2077 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2078 if (!rank) { 2079 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2080 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2081 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2082 if (header[3] < 0) { 2083 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2084 } 2085 } 2086 2087 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2088 M = header[1]; N = header[2]; 2089 2090 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2091 2092 /* 2093 This code adds extra rows to make sure the number of rows is 2094 divisible by the blocksize 2095 */ 2096 Mbs = M/bs; 2097 extra_rows = bs - M + bs*(Mbs); 2098 if (extra_rows == bs) extra_rows = 0; 2099 else Mbs++; 2100 if (extra_rows &&!rank) { 2101 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2102 } 2103 2104 /* determine ownership of all rows */ 2105 mbs = Mbs/size + ((Mbs % size) > rank); 2106 m = mbs * bs; 2107 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2108 browners = rowners + size + 1; 2109 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2110 rowners[0] = 0; 2111 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2112 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2113 rstart = rowners[rank]; 2114 rend = rowners[rank+1]; 2115 2116 /* distribute row lengths to all processors */ 2117 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2118 if (!rank) { 2119 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2120 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2121 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2122 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2123 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2124 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2125 PetscFree(sndcounts); 2126 } else { 2127 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2128 } 2129 2130 if (!rank) { 2131 /* calculate the number of nonzeros on each processor */ 2132 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2133 PetscMemzero(procsnz,size*sizeof(int)); 2134 for ( i=0; i<size; i++ ) { 2135 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2136 procsnz[i] += rowlengths[j]; 2137 } 2138 } 2139 PetscFree(rowlengths); 2140 2141 /* determine max buffer needed and allocate it */ 2142 maxnz = 0; 2143 for ( i=0; i<size; i++ ) { 2144 maxnz = PetscMax(maxnz,procsnz[i]); 2145 } 2146 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2147 2148 /* read in my part of the matrix column indices */ 2149 nz = procsnz[0]; 2150 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2151 mycols = ibuf; 2152 if (size == 1) nz -= extra_rows; 2153 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2154 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2155 2156 /* read in every ones (except the last) and ship off */ 2157 for ( i=1; i<size-1; i++ ) { 2158 nz = procsnz[i]; 2159 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2160 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2161 } 2162 /* read in the stuff for the last proc */ 2163 if ( size != 1 ) { 2164 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2165 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2166 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2167 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2168 } 2169 PetscFree(cols); 2170 } else { 2171 /* determine buffer space needed for message */ 2172 nz = 0; 2173 for ( i=0; i<m; i++ ) { 2174 nz += locrowlens[i]; 2175 } 2176 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2177 mycols = ibuf; 2178 /* receive message of column indices*/ 2179 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2180 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2181 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2182 } 2183 2184 /* loop over local rows, determining number of off diagonal entries */ 2185 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2186 odlens = dlens + (rend-rstart); 2187 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2188 PetscMemzero(mask,3*Mbs*sizeof(int)); 2189 masked1 = mask + Mbs; 2190 masked2 = masked1 + Mbs; 2191 rowcount = 0; nzcount = 0; 2192 for ( i=0; i<mbs; i++ ) { 2193 dcount = 0; 2194 odcount = 0; 2195 for ( j=0; j<bs; j++ ) { 2196 kmax = locrowlens[rowcount]; 2197 for ( k=0; k<kmax; k++ ) { 2198 tmp = mycols[nzcount++]/bs; 2199 if (!mask[tmp]) { 2200 mask[tmp] = 1; 2201 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2202 else masked1[dcount++] = tmp; 2203 } 2204 } 2205 rowcount++; 2206 } 2207 2208 dlens[i] = dcount; 2209 odlens[i] = odcount; 2210 2211 /* zero out the mask elements we set */ 2212 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2213 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2214 } 2215 2216 /* create our matrix */ 2217 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2218 CHKERRQ(ierr); 2219 A = *newmat; 2220 MatSetOption(A,MAT_COLUMNS_SORTED); 2221 2222 if (!rank) { 2223 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2224 /* read in my part of the matrix numerical values */ 2225 nz = procsnz[0]; 2226 vals = buf; 2227 mycols = ibuf; 2228 if (size == 1) nz -= extra_rows; 2229 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2230 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2231 2232 /* insert into matrix */ 2233 jj = rstart*bs; 2234 for ( i=0; i<m; i++ ) { 2235 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2236 mycols += locrowlens[i]; 2237 vals += locrowlens[i]; 2238 jj++; 2239 } 2240 /* read in other processors (except the last one) and ship out */ 2241 for ( i=1; i<size-1; i++ ) { 2242 nz = procsnz[i]; 2243 vals = buf; 2244 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2245 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2246 } 2247 /* the last proc */ 2248 if ( size != 1 ){ 2249 nz = procsnz[i] - extra_rows; 2250 vals = buf; 2251 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2252 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2253 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2254 } 2255 PetscFree(procsnz); 2256 } else { 2257 /* receive numeric values */ 2258 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2259 2260 /* receive message of values*/ 2261 vals = buf; 2262 mycols = ibuf; 2263 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2264 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2265 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2266 2267 /* insert into matrix */ 2268 jj = rstart*bs; 2269 for ( i=0; i<m; i++ ) { 2270 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2271 mycols += locrowlens[i]; 2272 vals += locrowlens[i]; 2273 jj++; 2274 } 2275 } 2276 PetscFree(locrowlens); 2277 PetscFree(buf); 2278 PetscFree(ibuf); 2279 PetscFree(rowners); 2280 PetscFree(dlens); 2281 PetscFree(mask); 2282 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2283 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2284 PetscFunctionReturn(0); 2285 } 2286 2287 2288