1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.92 1997/12/06 00:08:36 balay Exp balay $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 23 static int CreateColmap_MPIBAIJ_Private(Mat mat) 24 { 25 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 26 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27 int nbs = B->nbs,i,bs=B->bs;; 28 29 PetscFunctionBegin; 30 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 31 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 32 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 34 PetscFunctionReturn(0); 35 } 36 37 #define CHUNKSIZE 10 38 39 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 40 { \ 41 \ 42 brow = row/bs; \ 43 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44 rmax = aimax[brow]; nrow = ailen[brow]; \ 45 bcol = col/bs; \ 46 ridx = row % bs; cidx = col % bs; \ 47 low = 0; high = nrow; \ 48 while (high-low > 3) { \ 49 t = (low+high)/2; \ 50 if (rp[t] > bcol) high = t; \ 51 else low = t; \ 52 } \ 53 for ( _i=low; _i<high; _i++ ) { \ 54 if (rp[_i] > bcol) break; \ 55 if (rp[_i] == bcol) { \ 56 bap = ap + bs2*_i + bs*cidx + ridx; \ 57 if (addv == ADD_VALUES) *bap += value; \ 58 else *bap = value; \ 59 goto a_noinsert; \ 60 } \ 61 } \ 62 if (a->nonew == 1) goto a_noinsert; \ 63 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 64 if (nrow >= rmax) { \ 65 /* there is no extra room in row, therefore enlarge */ \ 66 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 67 Scalar *new_a; \ 68 \ 69 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 70 \ 71 /* malloc new storage space */ \ 72 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 73 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 74 new_j = (int *) (new_a + bs2*new_nz); \ 75 new_i = new_j + new_nz; \ 76 \ 77 /* copy over old data into new slots */ \ 78 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 79 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 80 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 81 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 82 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 83 len*sizeof(int)); \ 84 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 85 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 86 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 87 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 88 /* free up old matrix storage */ \ 89 PetscFree(a->a); \ 90 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 91 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 92 a->singlemalloc = 1; \ 93 \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 96 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 97 a->maxnz += bs2*CHUNKSIZE; \ 98 a->reallocs++; \ 99 a->nz++; \ 100 } \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for ( ii=N; ii>=_i; ii-- ) { \ 104 rp[ii+1] = rp[ii]; \ 105 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 106 } \ 107 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 a_noinsert:; \ 111 ailen[brow] = nrow; \ 112 } 113 114 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 brow = row/bs; \ 118 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119 rmax = bimax[brow]; nrow = bilen[brow]; \ 120 bcol = col/bs; \ 121 ridx = row % bs; cidx = col % bs; \ 122 low = 0; high = nrow; \ 123 while (high-low > 3) { \ 124 t = (low+high)/2; \ 125 if (rp[t] > bcol) high = t; \ 126 else low = t; \ 127 } \ 128 for ( _i=low; _i<high; _i++ ) { \ 129 if (rp[_i] > bcol) break; \ 130 if (rp[_i] == bcol) { \ 131 bap = ap + bs2*_i + bs*cidx + ridx; \ 132 if (addv == ADD_VALUES) *bap += value; \ 133 else *bap = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (b->nonew == 1) goto b_noinsert; \ 138 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + bs2*new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158 len*sizeof(int)); \ 159 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163 /* free up old matrix storage */ \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 172 b->maxnz += bs2*CHUNKSIZE; \ 173 b->reallocs++; \ 174 b->nz++; \ 175 } \ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for ( ii=N; ii>=_i; ii-- ) { \ 179 rp[ii+1] = rp[ii]; \ 180 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181 } \ 182 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNC__ 190 #define __FUNC__ "MatSetValues_MPIBAIJ" 191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 194 Scalar value; 195 int ierr,i,j,row,col; 196 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 197 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 198 int cend_orig=baij->cend_bs,bs=baij->bs; 199 200 /* Some Variables required in the macro */ 201 Mat A = baij->A; 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204 Scalar *aa=a->a; 205 206 Mat B = baij->B; 207 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209 Scalar *ba=b->a; 210 211 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212 int low,high,t,ridx,cidx,bs2=a->bs2; 213 Scalar *ap,*bap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 #if defined(USE_PETSC_BOPT_g) 218 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig){ 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 #if defined(USE_PETSC_BOPT_g) 231 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 240 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 241 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 244 B = baij->B; 245 b = (Mat_SeqBAIJ *) (B)->data; 246 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 247 ba=b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !baij->donotstash) { 257 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!baij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272 #undef __FUNC__ 273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277 Scalar *value,*barray=baij->barray; 278 int ierr,i,j,ii,jj,row,col,k,l; 279 int roworiented = baij->roworiented,rstart=baij->rstart ; 280 int rend=baij->rend,cstart=baij->cstart,stepval; 281 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282 283 if(!barray) { 284 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 285 } 286 287 if (roworiented) { 288 stepval = (n-1)*bs; 289 } else { 290 stepval = (m-1)*bs; 291 } 292 for ( i=0; i<m; i++ ) { 293 #if defined(USE_PETSC_BOPT_g) 294 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296 #endif 297 if (im[i] >= rstart && im[i] < rend) { 298 row = im[i] - rstart; 299 for ( j=0; j<n; j++ ) { 300 /* If NumCol = 1 then a copy is not required */ 301 if ((roworiented) && (n == 1)) { 302 barray = v + i*bs2; 303 } else if((!roworiented) && (m == 1)) { 304 barray = v + j*bs2; 305 } else { /* Here a copy is required */ 306 if (roworiented) { 307 value = v + i*(stepval+bs)*bs + j*bs; 308 } else { 309 value = v + j*(stepval+bs)*bs + i*bs; 310 } 311 for ( ii=0; ii<bs; ii++,value+=stepval ) { 312 for (jj=0; jj<bs; jj++ ) { 313 *barray++ = *value++; 314 } 315 } 316 barray -=bs2; 317 } 318 319 if (in[j] >= cstart && in[j] < cend){ 320 col = in[j] - cstart; 321 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322 } 323 #if defined(USE_PETSC_BOPT_g) 324 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326 #endif 327 else { 328 if (mat->was_assembled) { 329 if (!baij->colmap) { 330 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331 } 332 333 #if defined(USE_PETSC_BOPT_g) 334 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335 #endif 336 col = (baij->colmap[in[j]] - 1)/bs; 337 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339 col = in[j]; 340 } 341 } 342 else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (!baij->donotstash) { 348 if (roworiented ) { 349 row = im[i]*bs; 350 value = v + i*(stepval+bs)*bs; 351 for ( j=0; j<bs; j++,row++ ) { 352 for ( k=0; k<n; k++ ) { 353 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355 } 356 } 357 } 358 } else { 359 for ( j=0; j<n; j++ ) { 360 value = v + j*(stepval+bs)*bs + i*bs; 361 col = in[j]*bs; 362 for ( k=0; k<bs; k++,col++,value+=stepval) { 363 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365 } 366 } 367 } 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 #include <math.h> 375 #define HASH_KEY 0.6180339887 376 #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) 377 378 #undef __FUNC__ 379 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 380 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 381 { 382 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 383 int ierr,i,j,row,col; 384 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 385 int rend_orig=baij->rend_bs; 386 387 int h1,key,size=baij->ht_size,k,bs=baij->bs; 388 double * HT = baij->ht; 389 Scalar ** HD = (Scalar **)(HT + size),value; 390 391 392 PetscFunctionBegin; 393 394 if(((Mat_SeqBAIJ*)baij->A->data)->nonew !=1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 395 396 for ( i=0; i<m; i++ ) { 397 #if defined(USE_PETSC_BOPT_g) 398 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 399 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 400 #endif 401 if (im[i] >= rstart_orig && im[i] < rend_orig) { 402 row = im[i]; 403 for ( j=0; j<n; j++ ) { 404 col = in[j]; 405 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 406 /* Look up into the Hash Table */ 407 key = (row/bs)*baij->Nbs+(col/bs)+1; 408 h1 = HASH1(size,key); 409 410 for ( k=0; k<size; k++ ){ 411 if (HT[(h1+k)%size] == key) { 412 if (addv == ADD_VALUES) *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) += value; 413 else *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) = value; 414 break; 415 } 416 } 417 if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Hastable Entry not found"); 418 } 419 } else { 420 if (roworiented && !baij->donotstash) { 421 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 422 } else { 423 if (!baij->donotstash) { 424 row = im[i]; 425 for ( j=0; j<n; j++ ) { 426 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 427 } 428 } 429 } 430 } 431 } 432 PetscFunctionReturn(0); 433 } 434 435 #undef __FUNC__ 436 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 437 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 438 { 439 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 440 int ierr,i,j,ii,jj,row,col,k,l; 441 int roworiented = baij->roworiented,rstart=baij->rstart ; 442 int rend=baij->rend,stepval,bs=baij->bs; 443 444 int h1,key,size=baij->ht_size; 445 double * HT = baij->ht; 446 Scalar ** HD = (Scalar **)(HT + size),*value,*baij_a; 447 448 449 if (roworiented) { 450 stepval = (n-1)*bs; 451 } else { 452 stepval = (m-1)*bs; 453 } 454 for ( i=0; i<m; i++ ) { 455 #if defined(USE_PETSC_BOPT_g) 456 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 457 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 458 #endif 459 if (im[i] >= rstart && im[i] < rend) { 460 row = im[i]; 461 for ( j=0; j<n; j++ ) { 462 col = in[j]; 463 464 /* Look up into the Hash Table */ 465 key = row*baij->Nbs+col+1; 466 h1 = HASH1(size,key); 467 468 for ( k=0; k<size; k++ ){ 469 if (HT[(h1+k)%size] == key) { 470 baij_a = HD[(h1+k)%size]; 471 472 if (roworiented) { 473 value = v + i*(stepval+bs)*bs + j*bs; 474 } else { 475 value = v + j*(stepval+bs)*bs + i*bs; 476 } 477 for ( ii=0; ii<bs; ii++,value+=stepval ) { 478 for (jj=0; jj<bs; jj++ ) { 479 if (addv == ADD_VALUES) *baij_a += *value; 480 else *baij_a = *value; 481 baij++; value++; 482 } 483 } 484 break; 485 } 486 } 487 if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Hastable Entry not found"); 488 } 489 } else { 490 if (!baij->donotstash) { 491 if (roworiented ) { 492 row = im[i]*bs; 493 value = v + i*(stepval+bs)*bs; 494 for ( j=0; j<bs; j++,row++ ) { 495 for ( k=0; k<n; k++ ) { 496 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 497 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 498 } 499 } 500 } 501 } else { 502 for ( j=0; j<n; j++ ) { 503 value = v + j*(stepval+bs)*bs + i*bs; 504 col = in[j]*bs; 505 for ( k=0; k<bs; k++,col++,value+=stepval) { 506 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 507 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 508 } 509 } 510 } 511 } 512 } 513 } 514 } 515 PetscFunctionReturn(0); 516 } 517 #undef __FUNC__ 518 #define __FUNC__ "MatGetValues_MPIBAIJ" 519 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 520 { 521 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 522 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 523 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 524 525 for ( i=0; i<m; i++ ) { 526 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 527 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 528 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 529 row = idxm[i] - bsrstart; 530 for ( j=0; j<n; j++ ) { 531 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 532 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 533 if (idxn[j] >= bscstart && idxn[j] < bscend){ 534 col = idxn[j] - bscstart; 535 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 536 } else { 537 if (!baij->colmap) { 538 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 if((baij->colmap[idxn[j]/bs]-1 < 0) || 541 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 542 else { 543 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 544 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 545 } 546 } 547 } 548 } else { 549 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNC__ 556 #define __FUNC__ "MatNorm_MPIBAIJ" 557 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 560 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 561 int ierr, i,bs2=baij->bs2; 562 double sum = 0.0; 563 Scalar *v; 564 565 PetscFunctionBegin; 566 if (baij->size == 1) { 567 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 568 } else { 569 if (type == NORM_FROBENIUS) { 570 v = amat->a; 571 for (i=0; i<amat->nz*bs2; i++ ) { 572 #if defined(USE_PETSC_COMPLEX) 573 sum += real(conj(*v)*(*v)); v++; 574 #else 575 sum += (*v)*(*v); v++; 576 #endif 577 } 578 v = bmat->a; 579 for (i=0; i<bmat->nz*bs2; i++ ) { 580 #if defined(USE_PETSC_COMPLEX) 581 sum += real(conj(*v)*(*v)); v++; 582 #else 583 sum += (*v)*(*v); v++; 584 #endif 585 } 586 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 587 *norm = sqrt(*norm); 588 } else { 589 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 590 } 591 } 592 PetscFunctionReturn(0); 593 } 594 595 #undef __FUNC__ 596 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 597 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 598 { 599 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 600 MPI_Comm comm = mat->comm; 601 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 602 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 603 MPI_Request *send_waits,*recv_waits; 604 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 605 InsertMode addv; 606 Scalar *rvalues,*svalues; 607 608 PetscFunctionBegin; 609 /* make sure all processors are either in INSERTMODE or ADDMODE */ 610 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 611 if (addv == (ADD_VALUES|INSERT_VALUES)) { 612 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 613 } 614 mat->insertmode = addv; /* in case this processor had no cache */ 615 616 /* first count number of contributors to each processor */ 617 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 618 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 619 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 620 for ( i=0; i<baij->stash.n; i++ ) { 621 idx = baij->stash.idx[i]; 622 for ( j=0; j<size; j++ ) { 623 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 624 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 625 } 626 } 627 } 628 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 629 630 /* inform other processors of number of messages and max length*/ 631 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 632 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 633 nreceives = work[rank]; 634 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 635 nmax = work[rank]; 636 PetscFree(work); 637 638 /* post receives: 639 1) each message will consist of ordered pairs 640 (global index,value) we store the global index as a double 641 to simplify the message passing. 642 2) since we don't know how long each individual message is we 643 allocate the largest needed buffer for each receive. Potentially 644 this is a lot of wasted space. 645 646 647 This could be done better. 648 */ 649 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 650 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 651 for ( i=0; i<nreceives; i++ ) { 652 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 653 } 654 655 /* do sends: 656 1) starts[i] gives the starting index in svalues for stuff going to 657 the ith processor 658 */ 659 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 660 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 661 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 662 starts[0] = 0; 663 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 664 for ( i=0; i<baij->stash.n; i++ ) { 665 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 666 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 667 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 668 } 669 PetscFree(owner); 670 starts[0] = 0; 671 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 672 count = 0; 673 for ( i=0; i<size; i++ ) { 674 if (procs[i]) { 675 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 676 } 677 } 678 PetscFree(starts); PetscFree(nprocs); 679 680 /* Free cache space */ 681 PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",rank,baij->stash.n); 682 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 683 684 baij->svalues = svalues; baij->rvalues = rvalues; 685 baij->nsends = nsends; baij->nrecvs = nreceives; 686 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 687 baij->rmax = nmax; 688 689 PetscFunctionReturn(0); 690 } 691 692 int CreateHashTable(Mat mat,double factor) 693 { 694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 695 Mat A = baij->A, B=baij->B; 696 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 697 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 698 int size,ct=0,max1=0,max2=0,bs2=baij->bs2,rstart=baij->rstart; 699 int cstart=baij->cstart,*garray=baij->garray,row,col; 700 double *HT,key; 701 extern int PetscGlobalRank; 702 Scalar **HD; 703 /* 704 Scalar *aa=a->a,*ba=b->a; 705 static double *HT; 706 static int flag=1; 707 */ 708 PetscFunctionBegin; 709 baij->ht_size=(int)(factor*nz); 710 size = baij->ht_size; 711 /* Allocate Memory for Hash Table */ 712 if (baij->ht) { 713 PetscFunctionReturn(0); 714 } 715 716 baij->ht = (double*)PetscMalloc(size*(sizeof(double)+sizeof(Scalar*))+1); CHKPTRQ(baij->ht); 717 HT = baij->ht; 718 HD = (Scalar**)(HT + size); 719 PetscMemzero(HT,size*sizeof(double)+sizeof(Scalar*)); 720 721 722 /* Loop Over A */ 723 for ( i=0; i<a->mbs; i++ ) { 724 for ( j=ai[i]; j<ai[i+1]; j++ ) { 725 row = i+rstart; 726 col = aj[j]+cstart; 727 728 key = row*baij->Nbs + col + 1; 729 /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 730 h1 = HASH1(size,key); 731 732 for ( k=0; k<size; k++ ){ 733 if (HT[(h1+k)%size] == 0.0) { 734 HT[(h1+k)%size] = key; 735 HD[(h1+k)%size] = a->a + j*bs2; 736 break; 737 } else ct++; 738 } 739 if (k> max1) max1 = k; 740 } 741 } 742 /* Loop Over B */ 743 for ( i=0; i<b->mbs; i++ ) { 744 for ( j=bi[i]; j<bi[i+1]; j++ ) { 745 row = i+rstart; 746 col = garray[bj[j]]; 747 key = row*baij->Nbs + col + 1; 748 /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 749 h1 = HASH1(size,key); 750 for ( k=0; k<size; k++ ){ 751 if (HT[(h1+k)%size] == 0.0) { 752 HT[(h1+k)%size] = key; 753 HD[(h1+k)%size] = b->a + j*bs2; 754 break; 755 } else ct++; 756 } 757 if (k> max2) max2 = k; 758 } 759 } 760 761 /* Print Summary */ 762 for ( i=0,key=0.0,j=0; i<size; i++) 763 if (HT[i]) {j++;} 764 765 /*printf("[%d] fact = %3.2f max1 = %5d max2 = %5d Size %5d - Searches %5d Avg %5.2f Keys %5d\n", 766 PetscGlobalRank,factor,max1,max2,size,ct+j,((float)ct+j),j); */ 767 fflush(stdout); 768 PetscFunctionReturn(0); 769 } 770 771 #undef __FUNC__ 772 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 773 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 774 { 775 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 776 MPI_Status *send_status,recv_status; 777 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 778 int bs=baij->bs,row,col,other_disassembled,flg; 779 Scalar *values,val; 780 InsertMode addv = mat->insertmode; 781 782 PetscFunctionBegin; 783 /* wait on receives */ 784 while (count) { 785 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 786 /* unpack receives into our local space */ 787 values = baij->rvalues + 3*imdex*baij->rmax; 788 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 789 n = n/3; 790 for ( i=0; i<n; i++ ) { 791 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 792 col = (int) PetscReal(values[3*i+1]); 793 val = values[3*i+2]; 794 if (col >= baij->cstart*bs && col < baij->cend*bs) { 795 col -= baij->cstart*bs; 796 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 797 } else { 798 if (mat->was_assembled) { 799 if (!baij->colmap) { 800 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 801 } 802 col = (baij->colmap[col/bs]) - 1 + col%bs; 803 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 804 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 805 col = (int) PetscReal(values[3*i+1]); 806 } 807 } 808 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 809 } 810 } 811 count--; 812 } 813 PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 814 815 /* wait on sends */ 816 if (baij->nsends) { 817 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 818 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 819 PetscFree(send_status); 820 } 821 PetscFree(baij->send_waits); PetscFree(baij->svalues); 822 823 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 824 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 825 826 /* determine if any processor has disassembled, if so we must 827 also disassemble ourselfs, in order that we may reassemble. */ 828 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 829 if (mat->was_assembled && !other_disassembled) { 830 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 831 } 832 833 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 834 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 835 } 836 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 837 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 838 839 ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr); 840 if (flg && !baij->ht) { 841 double fact = 1.39; 842 CreateHashTable(mat,fact); 843 mat->ops.setvalues = MatSetValues_MPIBAIJ_HT; 844 mat->ops.setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 845 } 846 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 847 PetscFunctionReturn(0); 848 } 849 850 #undef __FUNC__ 851 #define __FUNC__ "MatView_MPIBAIJ_Binary" 852 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 853 { 854 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 855 int ierr; 856 857 PetscFunctionBegin; 858 if (baij->size == 1) { 859 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 860 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNC__ 865 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 866 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 867 { 868 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 869 int ierr, format,rank,bs = baij->bs; 870 FILE *fd; 871 ViewerType vtype; 872 873 PetscFunctionBegin; 874 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 875 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 876 ierr = ViewerGetFormat(viewer,&format); 877 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 878 MatInfo info; 879 MPI_Comm_rank(mat->comm,&rank); 880 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 881 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 882 PetscSequentialPhaseBegin(mat->comm,1); 883 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 884 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 885 baij->bs,(int)info.memory); 886 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 887 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 888 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 889 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 890 fflush(fd); 891 PetscSequentialPhaseEnd(mat->comm,1); 892 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 893 PetscFunctionReturn(0); 894 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 895 PetscPrintf(mat->comm," block size is %d\n",bs); 896 PetscFunctionReturn(0); 897 } 898 } 899 900 if (vtype == DRAW_VIEWER) { 901 Draw draw; 902 PetscTruth isnull; 903 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 904 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 905 } 906 907 if (vtype == ASCII_FILE_VIEWER) { 908 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 909 PetscSequentialPhaseBegin(mat->comm,1); 910 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 911 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 912 baij->cstart*bs,baij->cend*bs); 913 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 914 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 915 fflush(fd); 916 PetscSequentialPhaseEnd(mat->comm,1); 917 } else { 918 int size = baij->size; 919 rank = baij->rank; 920 if (size == 1) { 921 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 922 } else { 923 /* assemble the entire matrix onto first processor. */ 924 Mat A; 925 Mat_SeqBAIJ *Aloc; 926 int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 927 int mbs=baij->mbs; 928 Scalar *a; 929 930 if (!rank) { 931 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 932 } else { 933 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 934 } 935 PLogObjectParent(mat,A); 936 937 /* copy over the A part */ 938 Aloc = (Mat_SeqBAIJ*) baij->A->data; 939 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 940 row = baij->rstart; 941 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 942 943 for ( i=0; i<mbs; i++ ) { 944 rvals[0] = bs*(baij->rstart + i); 945 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 946 for ( j=ai[i]; j<ai[i+1]; j++ ) { 947 col = (baij->cstart+aj[j])*bs; 948 for (k=0; k<bs; k++ ) { 949 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 950 col++; a += bs; 951 } 952 } 953 } 954 /* copy over the B part */ 955 Aloc = (Mat_SeqBAIJ*) baij->B->data; 956 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 957 row = baij->rstart*bs; 958 for ( i=0; i<mbs; i++ ) { 959 rvals[0] = bs*(baij->rstart + i); 960 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 961 for ( j=ai[i]; j<ai[i+1]; j++ ) { 962 col = baij->garray[aj[j]]*bs; 963 for (k=0; k<bs; k++ ) { 964 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 965 col++; a += bs; 966 } 967 } 968 } 969 PetscFree(rvals); 970 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 971 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 972 /* 973 Everyone has to call to draw the matrix since the graphics waits are 974 synchronized across all processors that share the Draw object 975 */ 976 if (!rank || vtype == DRAW_VIEWER) { 977 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 978 } 979 ierr = MatDestroy(A); CHKERRQ(ierr); 980 } 981 } 982 PetscFunctionReturn(0); 983 } 984 985 986 987 #undef __FUNC__ 988 #define __FUNC__ "MatView_MPIBAIJ" 989 int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 990 { 991 Mat mat = (Mat) obj; 992 int ierr; 993 ViewerType vtype; 994 995 PetscFunctionBegin; 996 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 997 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 998 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 999 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1000 } else if (vtype == BINARY_FILE_VIEWER) { 1001 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1002 } 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNC__ 1007 #define __FUNC__ "MatDestroy_MPIBAIJ" 1008 int MatDestroy_MPIBAIJ(PetscObject obj) 1009 { 1010 Mat mat = (Mat) obj; 1011 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1012 int ierr; 1013 1014 PetscFunctionBegin; 1015 #if defined(USE_PETSC_LOG) 1016 PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 1017 #endif 1018 1019 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1020 PetscFree(baij->rowners); 1021 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1022 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1023 if (baij->colmap) PetscFree(baij->colmap); 1024 if (baij->garray) PetscFree(baij->garray); 1025 if (baij->lvec) VecDestroy(baij->lvec); 1026 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1027 if (baij->rowvalues) PetscFree(baij->rowvalues); 1028 if (baij->barray) PetscFree(baij->barray); 1029 if (baij->ht) PetscFree(baij->ht); 1030 PetscFree(baij); 1031 PLogObjectDestroy(mat); 1032 PetscHeaderDestroy(mat); 1033 PetscFunctionReturn(0); 1034 } 1035 1036 #undef __FUNC__ 1037 #define __FUNC__ "MatMult_MPIBAIJ" 1038 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1039 { 1040 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1041 int ierr, nt; 1042 1043 PetscFunctionBegin; 1044 VecGetLocalSize_Fast(xx,nt); 1045 if (nt != a->n) { 1046 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1047 } 1048 VecGetLocalSize_Fast(yy,nt); 1049 if (nt != a->m) { 1050 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1051 } 1052 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1053 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 1054 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1055 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1056 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNC__ 1061 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1062 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1063 { 1064 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1065 int ierr; 1066 1067 PetscFunctionBegin; 1068 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1069 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1070 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1071 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNC__ 1076 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1077 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1078 { 1079 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1080 int ierr; 1081 1082 PetscFunctionBegin; 1083 /* do nondiagonal part */ 1084 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1085 /* send it on its way */ 1086 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1087 /* do local part */ 1088 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1089 /* receive remote parts: note this assumes the values are not actually */ 1090 /* inserted in yy until the next line, which is true for my implementation*/ 1091 /* but is not perhaps always true. */ 1092 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNC__ 1097 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1098 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1099 { 1100 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1101 int ierr; 1102 1103 PetscFunctionBegin; 1104 /* do nondiagonal part */ 1105 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1106 /* send it on its way */ 1107 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1108 /* do local part */ 1109 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1110 /* receive remote parts: note this assumes the values are not actually */ 1111 /* inserted in yy until the next line, which is true for my implementation*/ 1112 /* but is not perhaps always true. */ 1113 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 /* 1118 This only works correctly for square matrices where the subblock A->A is the 1119 diagonal block 1120 */ 1121 #undef __FUNC__ 1122 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1123 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1124 { 1125 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1126 int ierr; 1127 1128 PetscFunctionBegin; 1129 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1130 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNC__ 1135 #define __FUNC__ "MatScale_MPIBAIJ" 1136 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1137 { 1138 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1139 int ierr; 1140 1141 PetscFunctionBegin; 1142 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1143 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNC__ 1148 #define __FUNC__ "MatGetSize_MPIBAIJ" 1149 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1150 { 1151 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1152 1153 PetscFunctionBegin; 1154 if (m) *m = mat->M; 1155 if (n) *n = mat->N; 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNC__ 1160 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1161 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1162 { 1163 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1164 1165 PetscFunctionBegin; 1166 *m = mat->m; *n = mat->N; 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNC__ 1171 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1172 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1173 { 1174 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1175 1176 PetscFunctionBegin; 1177 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1178 PetscFunctionReturn(0); 1179 } 1180 1181 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1182 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1183 1184 #undef __FUNC__ 1185 #define __FUNC__ "MatGetRow_MPIBAIJ" 1186 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1187 { 1188 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1189 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1190 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1191 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1192 int *cmap, *idx_p,cstart = mat->cstart; 1193 1194 PetscFunctionBegin; 1195 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1196 mat->getrowactive = PETSC_TRUE; 1197 1198 if (!mat->rowvalues && (idx || v)) { 1199 /* 1200 allocate enough space to hold information from the longest row. 1201 */ 1202 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1203 int max = 1,mbs = mat->mbs,tmp; 1204 for ( i=0; i<mbs; i++ ) { 1205 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1206 if (max < tmp) { max = tmp; } 1207 } 1208 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1209 CHKPTRQ(mat->rowvalues); 1210 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1211 } 1212 1213 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1214 lrow = row - brstart; 1215 1216 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1217 if (!v) {pvA = 0; pvB = 0;} 1218 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1219 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1220 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1221 nztot = nzA + nzB; 1222 1223 cmap = mat->garray; 1224 if (v || idx) { 1225 if (nztot) { 1226 /* Sort by increasing column numbers, assuming A and B already sorted */ 1227 int imark = -1; 1228 if (v) { 1229 *v = v_p = mat->rowvalues; 1230 for ( i=0; i<nzB; i++ ) { 1231 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1232 else break; 1233 } 1234 imark = i; 1235 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1236 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1237 } 1238 if (idx) { 1239 *idx = idx_p = mat->rowindices; 1240 if (imark > -1) { 1241 for ( i=0; i<imark; i++ ) { 1242 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1243 } 1244 } else { 1245 for ( i=0; i<nzB; i++ ) { 1246 if (cmap[cworkB[i]/bs] < cstart) 1247 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1248 else break; 1249 } 1250 imark = i; 1251 } 1252 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1253 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1254 } 1255 } else { 1256 if (idx) *idx = 0; 1257 if (v) *v = 0; 1258 } 1259 } 1260 *nz = nztot; 1261 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1262 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNC__ 1267 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1268 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1269 { 1270 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1271 1272 PetscFunctionBegin; 1273 if (baij->getrowactive == PETSC_FALSE) { 1274 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1275 } 1276 baij->getrowactive = PETSC_FALSE; 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNC__ 1281 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1282 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1283 { 1284 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1285 1286 PetscFunctionBegin; 1287 *bs = baij->bs; 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNC__ 1292 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1293 int MatZeroEntries_MPIBAIJ(Mat A) 1294 { 1295 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1296 int ierr; 1297 1298 PetscFunctionBegin; 1299 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1300 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNC__ 1305 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1306 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1307 { 1308 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1309 Mat A = a->A, B = a->B; 1310 int ierr; 1311 double isend[5], irecv[5]; 1312 1313 PetscFunctionBegin; 1314 info->block_size = (double)a->bs; 1315 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1316 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1317 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1318 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1319 if (flag == MAT_LOCAL) { 1320 info->nz_used = isend[0]; 1321 info->nz_allocated = isend[1]; 1322 info->nz_unneeded = isend[2]; 1323 info->memory = isend[3]; 1324 info->mallocs = isend[4]; 1325 } else if (flag == MAT_GLOBAL_MAX) { 1326 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 1327 info->nz_used = irecv[0]; 1328 info->nz_allocated = irecv[1]; 1329 info->nz_unneeded = irecv[2]; 1330 info->memory = irecv[3]; 1331 info->mallocs = irecv[4]; 1332 } else if (flag == MAT_GLOBAL_SUM) { 1333 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 1334 info->nz_used = irecv[0]; 1335 info->nz_allocated = irecv[1]; 1336 info->nz_unneeded = irecv[2]; 1337 info->memory = irecv[3]; 1338 info->mallocs = irecv[4]; 1339 } 1340 info->rows_global = (double)a->M; 1341 info->columns_global = (double)a->N; 1342 info->rows_local = (double)a->m; 1343 info->columns_local = (double)a->N; 1344 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1345 info->fill_ratio_needed = 0; 1346 info->factor_mallocs = 0; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNC__ 1351 #define __FUNC__ "MatSetOption_MPIBAIJ" 1352 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1353 { 1354 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1355 1356 PetscFunctionBegin; 1357 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1358 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1359 op == MAT_COLUMNS_UNSORTED || 1360 op == MAT_COLUMNS_SORTED || 1361 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1362 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1363 MatSetOption(a->A,op); 1364 MatSetOption(a->B,op); 1365 } else if (op == MAT_ROW_ORIENTED) { 1366 a->roworiented = 1; 1367 MatSetOption(a->A,op); 1368 MatSetOption(a->B,op); 1369 } else if (op == MAT_ROWS_SORTED || 1370 op == MAT_ROWS_UNSORTED || 1371 op == MAT_SYMMETRIC || 1372 op == MAT_STRUCTURALLY_SYMMETRIC || 1373 op == MAT_YES_NEW_DIAGONALS) 1374 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1375 else if (op == MAT_COLUMN_ORIENTED) { 1376 a->roworiented = 0; 1377 MatSetOption(a->A,op); 1378 MatSetOption(a->B,op); 1379 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1380 a->donotstash = 1; 1381 } else if (op == MAT_NO_NEW_DIAGONALS) { 1382 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1383 } else { 1384 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1385 } 1386 PetscFunctionReturn(0); 1387 } 1388 1389 #undef __FUNC__ 1390 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1391 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1392 { 1393 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1394 Mat_SeqBAIJ *Aloc; 1395 Mat B; 1396 int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 1397 int bs=baij->bs,mbs=baij->mbs; 1398 Scalar *a; 1399 1400 PetscFunctionBegin; 1401 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1402 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1403 CHKERRQ(ierr); 1404 1405 /* copy over the A part */ 1406 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1407 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1408 row = baij->rstart; 1409 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1410 1411 for ( i=0; i<mbs; i++ ) { 1412 rvals[0] = bs*(baij->rstart + i); 1413 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1414 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1415 col = (baij->cstart+aj[j])*bs; 1416 for (k=0; k<bs; k++ ) { 1417 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1418 col++; a += bs; 1419 } 1420 } 1421 } 1422 /* copy over the B part */ 1423 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1424 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1425 row = baij->rstart*bs; 1426 for ( i=0; i<mbs; i++ ) { 1427 rvals[0] = bs*(baij->rstart + i); 1428 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1429 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1430 col = baij->garray[aj[j]]*bs; 1431 for (k=0; k<bs; k++ ) { 1432 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1433 col++; a += bs; 1434 } 1435 } 1436 } 1437 PetscFree(rvals); 1438 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1439 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1440 1441 if (matout != PETSC_NULL) { 1442 *matout = B; 1443 } else { 1444 /* This isn't really an in-place transpose .... but free data structures from baij */ 1445 PetscFree(baij->rowners); 1446 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1447 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1448 if (baij->colmap) PetscFree(baij->colmap); 1449 if (baij->garray) PetscFree(baij->garray); 1450 if (baij->lvec) VecDestroy(baij->lvec); 1451 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1452 PetscFree(baij); 1453 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1454 PetscHeaderDestroy(B); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNC__ 1460 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1461 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1462 { 1463 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1464 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1465 int ierr,s1,s2,s3; 1466 1467 PetscFunctionBegin; 1468 if (ll) { 1469 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1470 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1471 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1472 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1473 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1474 } 1475 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1476 PetscFunctionReturn(0); 1477 } 1478 1479 #undef __FUNC__ 1480 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1481 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1482 { 1483 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1484 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1485 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1486 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1487 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1488 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1489 MPI_Comm comm = A->comm; 1490 MPI_Request *send_waits,*recv_waits; 1491 MPI_Status recv_status,*send_status; 1492 IS istmp; 1493 1494 PetscFunctionBegin; 1495 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1496 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1497 1498 /* first count number of contributors to each processor */ 1499 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1500 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1501 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1502 for ( i=0; i<N; i++ ) { 1503 idx = rows[i]; 1504 found = 0; 1505 for ( j=0; j<size; j++ ) { 1506 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1507 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1508 } 1509 } 1510 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1511 } 1512 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1513 1514 /* inform other processors of number of messages and max length*/ 1515 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1516 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1517 nrecvs = work[rank]; 1518 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1519 nmax = work[rank]; 1520 PetscFree(work); 1521 1522 /* post receives: */ 1523 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1524 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1525 for ( i=0; i<nrecvs; i++ ) { 1526 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1527 } 1528 1529 /* do sends: 1530 1) starts[i] gives the starting index in svalues for stuff going to 1531 the ith processor 1532 */ 1533 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1534 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1535 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1536 starts[0] = 0; 1537 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1538 for ( i=0; i<N; i++ ) { 1539 svalues[starts[owner[i]]++] = rows[i]; 1540 } 1541 ISRestoreIndices(is,&rows); 1542 1543 starts[0] = 0; 1544 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1545 count = 0; 1546 for ( i=0; i<size; i++ ) { 1547 if (procs[i]) { 1548 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1549 } 1550 } 1551 PetscFree(starts); 1552 1553 base = owners[rank]*bs; 1554 1555 /* wait on receives */ 1556 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1557 source = lens + nrecvs; 1558 count = nrecvs; slen = 0; 1559 while (count) { 1560 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1561 /* unpack receives into our local space */ 1562 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1563 source[imdex] = recv_status.MPI_SOURCE; 1564 lens[imdex] = n; 1565 slen += n; 1566 count--; 1567 } 1568 PetscFree(recv_waits); 1569 1570 /* move the data into the send scatter */ 1571 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1572 count = 0; 1573 for ( i=0; i<nrecvs; i++ ) { 1574 values = rvalues + i*nmax; 1575 for ( j=0; j<lens[i]; j++ ) { 1576 lrows[count++] = values[j] - base; 1577 } 1578 } 1579 PetscFree(rvalues); PetscFree(lens); 1580 PetscFree(owner); PetscFree(nprocs); 1581 1582 /* actually zap the local rows */ 1583 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1584 PLogObjectParent(A,istmp); 1585 1586 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1587 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1588 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1589 1590 if (diag) { 1591 for ( i = 0; i < slen; i++ ) { 1592 row = lrows[i] + rstart_bs; 1593 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1594 } 1595 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1596 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1597 } 1598 PetscFree(lrows); 1599 1600 /* wait on sends */ 1601 if (nsends) { 1602 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1603 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1604 PetscFree(send_status); 1605 } 1606 PetscFree(send_waits); PetscFree(svalues); 1607 1608 PetscFunctionReturn(0); 1609 } 1610 extern int MatPrintHelp_SeqBAIJ(Mat); 1611 #undef __FUNC__ 1612 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1613 int MatPrintHelp_MPIBAIJ(Mat A) 1614 { 1615 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1616 int ierr; 1617 1618 PetscFunctionBegin; 1619 if (!a->rank) { 1620 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1621 } 1622 PetscFunctionReturn(0); 1623 } 1624 1625 #undef __FUNC__ 1626 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1627 int MatSetUnfactored_MPIBAIJ(Mat A) 1628 { 1629 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1630 int ierr; 1631 1632 PetscFunctionBegin; 1633 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1634 PetscFunctionReturn(0); 1635 } 1636 1637 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1638 1639 /* -------------------------------------------------------------------*/ 1640 static struct _MatOps MatOps = { 1641 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1642 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1643 0,0,0,0, 1644 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1645 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1646 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1647 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1648 0,0,0,MatGetSize_MPIBAIJ, 1649 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1650 0,0,MatConvertSameType_MPIBAIJ,0,0, 1651 0,0,0,MatGetSubMatrices_MPIBAIJ, 1652 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1653 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1654 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1655 1656 1657 #undef __FUNC__ 1658 #define __FUNC__ "MatCreateMPIBAIJ" 1659 /*@C 1660 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1661 (block compressed row). For good matrix assembly performance 1662 the user should preallocate the matrix storage by setting the parameters 1663 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1664 performance can be increased by more than a factor of 50. 1665 1666 Input Parameters: 1667 . comm - MPI communicator 1668 . bs - size of blockk 1669 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1670 This value should be the same as the local size used in creating the 1671 y vector for the matrix-vector product y = Ax. 1672 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1673 This value should be the same as the local size used in creating the 1674 x vector for the matrix-vector product y = Ax. 1675 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1676 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1677 . d_nz - number of block nonzeros per block row in diagonal portion of local 1678 submatrix (same for all local rows) 1679 . d_nzz - array containing the number of block nonzeros in the various block rows 1680 of the in diagonal portion of the local (possibly different for each block 1681 row) or PETSC_NULL. You must leave room for the diagonal entry even if 1682 it is zero. 1683 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1684 submatrix (same for all local rows). 1685 . o_nzz - array containing the number of nonzeros in the various block rows of the 1686 off-diagonal portion of the local submatrix (possibly different for 1687 each block row) or PETSC_NULL. 1688 1689 Output Parameter: 1690 . A - the matrix 1691 1692 Notes: 1693 The user MUST specify either the local or global matrix dimensions 1694 (possibly both). 1695 1696 Storage Information: 1697 For a square global matrix we define each processor's diagonal portion 1698 to be its local rows and the corresponding columns (a square submatrix); 1699 each processor's off-diagonal portion encompasses the remainder of the 1700 local matrix (a rectangular submatrix). 1701 1702 The user can specify preallocated storage for the diagonal part of 1703 the local submatrix with either d_nz or d_nnz (not both). Set 1704 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1705 memory allocation. Likewise, specify preallocated storage for the 1706 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1707 1708 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1709 the figure below we depict these three local rows and all columns (0-11). 1710 1711 $ 0 1 2 3 4 5 6 7 8 9 10 11 1712 $ ------------------- 1713 $ row 3 | o o o d d d o o o o o o 1714 $ row 4 | o o o d d d o o o o o o 1715 $ row 5 | o o o d d d o o o o o o 1716 $ ------------------- 1717 $ 1718 1719 Thus, any entries in the d locations are stored in the d (diagonal) 1720 submatrix, and any entries in the o locations are stored in the 1721 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1722 stored simply in the MATSEQBAIJ format for compressed row storage. 1723 1724 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1725 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1726 In general, for PDE problems in which most nonzeros are near the diagonal, 1727 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1728 or you will get TERRIBLE performance; see the users' manual chapter on 1729 matrices. 1730 1731 .keywords: matrix, block, aij, compressed row, sparse, parallel 1732 1733 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 1734 @*/ 1735 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1736 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1737 { 1738 Mat B; 1739 Mat_MPIBAIJ *b; 1740 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 1741 1742 PetscFunctionBegin; 1743 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1744 1745 MPI_Comm_size(comm,&size); 1746 if (size == 1) { 1747 if (M == PETSC_DECIDE) M = m; 1748 if (N == PETSC_DECIDE) N = n; 1749 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1750 PetscFunctionReturn(0); 1751 } 1752 1753 *A = 0; 1754 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 1755 PLogObjectCreate(B); 1756 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1757 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1758 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1759 1760 B->destroy = MatDestroy_MPIBAIJ; 1761 B->view = MatView_MPIBAIJ; 1762 B->mapping = 0; 1763 B->factor = 0; 1764 B->assembled = PETSC_FALSE; 1765 1766 B->insertmode = NOT_SET_VALUES; 1767 MPI_Comm_rank(comm,&b->rank); 1768 MPI_Comm_size(comm,&b->size); 1769 1770 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1771 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1772 } 1773 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1774 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1775 } 1776 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1777 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1778 } 1779 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1780 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1781 1782 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1783 work[0] = m; work[1] = n; 1784 mbs = m/bs; nbs = n/bs; 1785 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1786 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1787 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1788 } 1789 if (m == PETSC_DECIDE) { 1790 Mbs = M/bs; 1791 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1792 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1793 m = mbs*bs; 1794 } 1795 if (n == PETSC_DECIDE) { 1796 Nbs = N/bs; 1797 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1798 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1799 n = nbs*bs; 1800 } 1801 if (mbs*bs != m || nbs*bs != n) { 1802 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1803 } 1804 1805 b->m = m; B->m = m; 1806 b->n = n; B->n = n; 1807 b->N = N; B->N = N; 1808 b->M = M; B->M = M; 1809 b->bs = bs; 1810 b->bs2 = bs*bs; 1811 b->mbs = mbs; 1812 b->nbs = nbs; 1813 b->Mbs = Mbs; 1814 b->Nbs = Nbs; 1815 1816 /* build local table of row and column ownerships */ 1817 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1818 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1819 b->cowners = b->rowners + b->size + 2; 1820 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1821 b->rowners[0] = 0; 1822 for ( i=2; i<=b->size; i++ ) { 1823 b->rowners[i] += b->rowners[i-1]; 1824 } 1825 b->rstart = b->rowners[b->rank]; 1826 b->rend = b->rowners[b->rank+1]; 1827 b->rstart_bs = b->rstart * bs; 1828 b->rend_bs = b->rend * bs; 1829 1830 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1831 b->cowners[0] = 0; 1832 for ( i=2; i<=b->size; i++ ) { 1833 b->cowners[i] += b->cowners[i-1]; 1834 } 1835 b->cstart = b->cowners[b->rank]; 1836 b->cend = b->cowners[b->rank+1]; 1837 b->cstart_bs = b->cstart * bs; 1838 b->cend_bs = b->cend * bs; 1839 1840 1841 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1842 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1843 PLogObjectParent(B,b->A); 1844 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1845 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1846 PLogObjectParent(B,b->B); 1847 1848 /* build cache for off array entries formed */ 1849 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1850 b->donotstash = 0; 1851 b->colmap = 0; 1852 b->garray = 0; 1853 b->roworiented = 1; 1854 1855 /* stuff used in block assembly */ 1856 b->barray = 0; 1857 1858 /* stuff used for matrix vector multiply */ 1859 b->lvec = 0; 1860 b->Mvctx = 0; 1861 1862 /* stuff for MatGetRow() */ 1863 b->rowindices = 0; 1864 b->rowvalues = 0; 1865 b->getrowactive = PETSC_FALSE; 1866 1867 /* hash table stuff */ 1868 b->ht = 0; 1869 b->ht_size = 0; 1870 1871 *A = B; 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNC__ 1876 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 1877 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 1878 { 1879 Mat mat; 1880 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 1881 int ierr, len=0, flg; 1882 1883 PetscFunctionBegin; 1884 *newmat = 0; 1885 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 1886 PLogObjectCreate(mat); 1887 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 1888 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1889 mat->destroy = MatDestroy_MPIBAIJ; 1890 mat->view = MatView_MPIBAIJ; 1891 mat->factor = matin->factor; 1892 mat->assembled = PETSC_TRUE; 1893 1894 a->m = mat->m = oldmat->m; 1895 a->n = mat->n = oldmat->n; 1896 a->M = mat->M = oldmat->M; 1897 a->N = mat->N = oldmat->N; 1898 1899 a->bs = oldmat->bs; 1900 a->bs2 = oldmat->bs2; 1901 a->mbs = oldmat->mbs; 1902 a->nbs = oldmat->nbs; 1903 a->Mbs = oldmat->Mbs; 1904 a->Nbs = oldmat->Nbs; 1905 1906 a->rstart = oldmat->rstart; 1907 a->rend = oldmat->rend; 1908 a->cstart = oldmat->cstart; 1909 a->cend = oldmat->cend; 1910 a->size = oldmat->size; 1911 a->rank = oldmat->rank; 1912 mat->insertmode = NOT_SET_VALUES; 1913 a->rowvalues = 0; 1914 a->getrowactive = PETSC_FALSE; 1915 a->barray = 0; 1916 1917 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1918 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1919 a->cowners = a->rowners + a->size + 2; 1920 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1921 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1922 if (oldmat->colmap) { 1923 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 1924 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1925 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 1926 } else a->colmap = 0; 1927 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 1928 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1929 PLogObjectMemory(mat,len*sizeof(int)); 1930 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1931 } else a->garray = 0; 1932 1933 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1934 PLogObjectParent(mat,a->lvec); 1935 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1936 PLogObjectParent(mat,a->Mvctx); 1937 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1938 PLogObjectParent(mat,a->A); 1939 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1940 PLogObjectParent(mat,a->B); 1941 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1942 if (flg) { 1943 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1944 } 1945 *newmat = mat; 1946 PetscFunctionReturn(0); 1947 } 1948 1949 #include "sys.h" 1950 1951 #undef __FUNC__ 1952 #define __FUNC__ "MatLoad_MPIBAIJ" 1953 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 1954 { 1955 Mat A; 1956 int i, nz, ierr, j,rstart, rend, fd; 1957 Scalar *vals,*buf; 1958 MPI_Comm comm = ((PetscObject)viewer)->comm; 1959 MPI_Status status; 1960 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 1961 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 1962 int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 1963 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1964 int dcount,kmax,k,nzcount,tmp; 1965 1966 PetscFunctionBegin; 1967 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1968 bs2 = bs*bs; 1969 1970 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1971 if (!rank) { 1972 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1973 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1974 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1975 if (header[3] < 0) { 1976 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 1977 } 1978 } 1979 1980 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1981 M = header[1]; N = header[2]; 1982 1983 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1984 1985 /* 1986 This code adds extra rows to make sure the number of rows is 1987 divisible by the blocksize 1988 */ 1989 Mbs = M/bs; 1990 extra_rows = bs - M + bs*(Mbs); 1991 if (extra_rows == bs) extra_rows = 0; 1992 else Mbs++; 1993 if (extra_rows &&!rank) { 1994 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 1995 } 1996 1997 /* determine ownership of all rows */ 1998 mbs = Mbs/size + ((Mbs % size) > rank); 1999 m = mbs * bs; 2000 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2001 browners = rowners + size + 1; 2002 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2003 rowners[0] = 0; 2004 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2005 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2006 rstart = rowners[rank]; 2007 rend = rowners[rank+1]; 2008 2009 /* distribute row lengths to all processors */ 2010 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2011 if (!rank) { 2012 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2013 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2014 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2015 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2016 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2017 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2018 PetscFree(sndcounts); 2019 } else { 2020 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2021 } 2022 2023 if (!rank) { 2024 /* calculate the number of nonzeros on each processor */ 2025 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2026 PetscMemzero(procsnz,size*sizeof(int)); 2027 for ( i=0; i<size; i++ ) { 2028 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2029 procsnz[i] += rowlengths[j]; 2030 } 2031 } 2032 PetscFree(rowlengths); 2033 2034 /* determine max buffer needed and allocate it */ 2035 maxnz = 0; 2036 for ( i=0; i<size; i++ ) { 2037 maxnz = PetscMax(maxnz,procsnz[i]); 2038 } 2039 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2040 2041 /* read in my part of the matrix column indices */ 2042 nz = procsnz[0]; 2043 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2044 mycols = ibuf; 2045 if (size == 1) nz -= extra_rows; 2046 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2047 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2048 2049 /* read in every ones (except the last) and ship off */ 2050 for ( i=1; i<size-1; i++ ) { 2051 nz = procsnz[i]; 2052 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2053 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2054 } 2055 /* read in the stuff for the last proc */ 2056 if ( size != 1 ) { 2057 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2058 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2059 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2060 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2061 } 2062 PetscFree(cols); 2063 } else { 2064 /* determine buffer space needed for message */ 2065 nz = 0; 2066 for ( i=0; i<m; i++ ) { 2067 nz += locrowlens[i]; 2068 } 2069 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2070 mycols = ibuf; 2071 /* receive message of column indices*/ 2072 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2073 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2074 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2075 } 2076 2077 /* loop over local rows, determining number of off diagonal entries */ 2078 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2079 odlens = dlens + (rend-rstart); 2080 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2081 PetscMemzero(mask,3*Mbs*sizeof(int)); 2082 masked1 = mask + Mbs; 2083 masked2 = masked1 + Mbs; 2084 rowcount = 0; nzcount = 0; 2085 for ( i=0; i<mbs; i++ ) { 2086 dcount = 0; 2087 odcount = 0; 2088 for ( j=0; j<bs; j++ ) { 2089 kmax = locrowlens[rowcount]; 2090 for ( k=0; k<kmax; k++ ) { 2091 tmp = mycols[nzcount++]/bs; 2092 if (!mask[tmp]) { 2093 mask[tmp] = 1; 2094 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2095 else masked1[dcount++] = tmp; 2096 } 2097 } 2098 rowcount++; 2099 } 2100 2101 dlens[i] = dcount; 2102 odlens[i] = odcount; 2103 2104 /* zero out the mask elements we set */ 2105 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2106 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2107 } 2108 2109 /* create our matrix */ 2110 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2111 CHKERRQ(ierr); 2112 A = *newmat; 2113 MatSetOption(A,MAT_COLUMNS_SORTED); 2114 2115 if (!rank) { 2116 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2117 /* read in my part of the matrix numerical values */ 2118 nz = procsnz[0]; 2119 vals = buf; 2120 mycols = ibuf; 2121 if (size == 1) nz -= extra_rows; 2122 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2123 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2124 2125 /* insert into matrix */ 2126 jj = rstart*bs; 2127 for ( i=0; i<m; i++ ) { 2128 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2129 mycols += locrowlens[i]; 2130 vals += locrowlens[i]; 2131 jj++; 2132 } 2133 /* read in other processors (except the last one) and ship out */ 2134 for ( i=1; i<size-1; i++ ) { 2135 nz = procsnz[i]; 2136 vals = buf; 2137 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2138 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2139 } 2140 /* the last proc */ 2141 if ( size != 1 ){ 2142 nz = procsnz[i] - extra_rows; 2143 vals = buf; 2144 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2145 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2146 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2147 } 2148 PetscFree(procsnz); 2149 } else { 2150 /* receive numeric values */ 2151 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2152 2153 /* receive message of values*/ 2154 vals = buf; 2155 mycols = ibuf; 2156 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2157 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2158 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2159 2160 /* insert into matrix */ 2161 jj = rstart*bs; 2162 for ( i=0; i<m; i++ ) { 2163 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2164 mycols += locrowlens[i]; 2165 vals += locrowlens[i]; 2166 jj++; 2167 } 2168 } 2169 PetscFree(locrowlens); 2170 PetscFree(buf); 2171 PetscFree(ibuf); 2172 PetscFree(rowners); 2173 PetscFree(dlens); 2174 PetscFree(mask); 2175 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2176 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2177 PetscFunctionReturn(0); 2178 } 2179 2180 2181