1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.94 1998/01/12 17:10:22 balay Exp balay $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 23 static int CreateColmap_MPIBAIJ_Private(Mat mat) 24 { 25 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 26 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27 int nbs = B->nbs,i,bs=B->bs;; 28 29 PetscFunctionBegin; 30 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 31 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 32 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 34 PetscFunctionReturn(0); 35 } 36 37 #define CHUNKSIZE 10 38 39 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 40 { \ 41 \ 42 brow = row/bs; \ 43 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44 rmax = aimax[brow]; nrow = ailen[brow]; \ 45 bcol = col/bs; \ 46 ridx = row % bs; cidx = col % bs; \ 47 low = 0; high = nrow; \ 48 while (high-low > 3) { \ 49 t = (low+high)/2; \ 50 if (rp[t] > bcol) high = t; \ 51 else low = t; \ 52 } \ 53 for ( _i=low; _i<high; _i++ ) { \ 54 if (rp[_i] > bcol) break; \ 55 if (rp[_i] == bcol) { \ 56 bap = ap + bs2*_i + bs*cidx + ridx; \ 57 if (addv == ADD_VALUES) *bap += value; \ 58 else *bap = value; \ 59 goto a_noinsert; \ 60 } \ 61 } \ 62 if (a->nonew == 1) goto a_noinsert; \ 63 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 64 if (nrow >= rmax) { \ 65 /* there is no extra room in row, therefore enlarge */ \ 66 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 67 Scalar *new_a; \ 68 \ 69 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 70 \ 71 /* malloc new storage space */ \ 72 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 73 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 74 new_j = (int *) (new_a + bs2*new_nz); \ 75 new_i = new_j + new_nz; \ 76 \ 77 /* copy over old data into new slots */ \ 78 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 79 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 80 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 81 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 82 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 83 len*sizeof(int)); \ 84 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 85 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 86 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 87 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 88 /* free up old matrix storage */ \ 89 PetscFree(a->a); \ 90 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 91 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 92 a->singlemalloc = 1; \ 93 \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 96 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 97 a->maxnz += bs2*CHUNKSIZE; \ 98 a->reallocs++; \ 99 a->nz++; \ 100 } \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for ( ii=N; ii>=_i; ii-- ) { \ 104 rp[ii+1] = rp[ii]; \ 105 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 106 } \ 107 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 a_noinsert:; \ 111 ailen[brow] = nrow; \ 112 } 113 114 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 brow = row/bs; \ 118 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119 rmax = bimax[brow]; nrow = bilen[brow]; \ 120 bcol = col/bs; \ 121 ridx = row % bs; cidx = col % bs; \ 122 low = 0; high = nrow; \ 123 while (high-low > 3) { \ 124 t = (low+high)/2; \ 125 if (rp[t] > bcol) high = t; \ 126 else low = t; \ 127 } \ 128 for ( _i=low; _i<high; _i++ ) { \ 129 if (rp[_i] > bcol) break; \ 130 if (rp[_i] == bcol) { \ 131 bap = ap + bs2*_i + bs*cidx + ridx; \ 132 if (addv == ADD_VALUES) *bap += value; \ 133 else *bap = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (b->nonew == 1) goto b_noinsert; \ 138 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + bs2*new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158 len*sizeof(int)); \ 159 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163 /* free up old matrix storage */ \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 172 b->maxnz += bs2*CHUNKSIZE; \ 173 b->reallocs++; \ 174 b->nz++; \ 175 } \ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for ( ii=N; ii>=_i; ii-- ) { \ 179 rp[ii+1] = rp[ii]; \ 180 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181 } \ 182 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNC__ 190 #define __FUNC__ "MatSetValues_MPIBAIJ" 191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 194 Scalar value; 195 int ierr,i,j,row,col; 196 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 197 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 198 int cend_orig=baij->cend_bs,bs=baij->bs; 199 200 /* Some Variables required in the macro */ 201 Mat A = baij->A; 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204 Scalar *aa=a->a; 205 206 Mat B = baij->B; 207 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209 Scalar *ba=b->a; 210 211 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212 int low,high,t,ridx,cidx,bs2=a->bs2; 213 Scalar *ap,*bap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 #if defined(USE_PETSC_BOPT_g) 218 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig){ 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 #if defined(USE_PETSC_BOPT_g) 231 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 240 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 241 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 244 B = baij->B; 245 b = (Mat_SeqBAIJ *) (B)->data; 246 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 247 ba=b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !baij->donotstash) { 257 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!baij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272 #undef __FUNC__ 273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277 Scalar *value,*barray=baij->barray; 278 int ierr,i,j,ii,jj,row,col,k,l; 279 int roworiented = baij->roworiented,rstart=baij->rstart ; 280 int rend=baij->rend,cstart=baij->cstart,stepval; 281 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282 283 if(!barray) { 284 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 285 } 286 287 if (roworiented) { 288 stepval = (n-1)*bs; 289 } else { 290 stepval = (m-1)*bs; 291 } 292 for ( i=0; i<m; i++ ) { 293 #if defined(USE_PETSC_BOPT_g) 294 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296 #endif 297 if (im[i] >= rstart && im[i] < rend) { 298 row = im[i] - rstart; 299 for ( j=0; j<n; j++ ) { 300 /* If NumCol = 1 then a copy is not required */ 301 if ((roworiented) && (n == 1)) { 302 barray = v + i*bs2; 303 } else if((!roworiented) && (m == 1)) { 304 barray = v + j*bs2; 305 } else { /* Here a copy is required */ 306 if (roworiented) { 307 value = v + i*(stepval+bs)*bs + j*bs; 308 } else { 309 value = v + j*(stepval+bs)*bs + i*bs; 310 } 311 for ( ii=0; ii<bs; ii++,value+=stepval ) { 312 for (jj=0; jj<bs; jj++ ) { 313 *barray++ = *value++; 314 } 315 } 316 barray -=bs2; 317 } 318 319 if (in[j] >= cstart && in[j] < cend){ 320 col = in[j] - cstart; 321 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322 } 323 #if defined(USE_PETSC_BOPT_g) 324 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326 #endif 327 else { 328 if (mat->was_assembled) { 329 if (!baij->colmap) { 330 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331 } 332 333 #if defined(USE_PETSC_BOPT_g) 334 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335 #endif 336 col = (baij->colmap[in[j]] - 1)/bs; 337 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339 col = in[j]; 340 } 341 } 342 else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (!baij->donotstash) { 348 if (roworiented ) { 349 row = im[i]*bs; 350 value = v + i*(stepval+bs)*bs; 351 for ( j=0; j<bs; j++,row++ ) { 352 for ( k=0; k<n; k++ ) { 353 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355 } 356 } 357 } 358 } else { 359 for ( j=0; j<n; j++ ) { 360 value = v + j*(stepval+bs)*bs + i*bs; 361 col = in[j]*bs; 362 for ( k=0; k<bs; k++,col++,value+=stepval) { 363 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365 } 366 } 367 } 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 #include <math.h> 375 #define HASH_KEY 0.6180339887 376 #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) 377 378 #undef __FUNC__ 379 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 380 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 381 { 382 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 383 int ierr,i,j,row,col; 384 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 385 int rend_orig=baij->rend_bs; 386 387 int h1,key,size=baij->ht_size,k,bs=baij->bs; 388 double * HT = baij->ht; 389 Scalar ** HD = (Scalar **)(HT + size),value; 390 391 392 PetscFunctionBegin; 393 394 if(((Mat_SeqBAIJ*)baij->A->data)->nonew !=1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 395 396 for ( i=0; i<m; i++ ) { 397 #if defined(USE_PETSC_BOPT_g) 398 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 399 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 400 #endif 401 if (im[i] >= rstart_orig && im[i] < rend_orig) { 402 row = im[i]; 403 for ( j=0; j<n; j++ ) { 404 col = in[j]; 405 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 406 /* Look up into the Hash Table */ 407 key = (row/bs)*baij->Nbs+(col/bs)+1; 408 h1 = HASH1(size,key); 409 410 for ( k=0; k<size; k++ ){ 411 if (HT[(h1+k)%size] == key) { 412 if (addv == ADD_VALUES) *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) += value; 413 else *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) = value; 414 break; 415 } 416 } 417 if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Hastable Entry not found"); 418 } 419 } else { 420 if (roworiented && !baij->donotstash) { 421 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 422 } else { 423 if (!baij->donotstash) { 424 row = im[i]; 425 for ( j=0; j<n; j++ ) { 426 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 427 } 428 } 429 } 430 } 431 } 432 PetscFunctionReturn(0); 433 } 434 435 #undef __FUNC__ 436 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 437 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 438 { 439 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 440 int ierr,i,j,ii,jj,row,col,k,l; 441 int roworiented = baij->roworiented,rstart=baij->rstart ; 442 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 443 444 int h1,key,size=baij->ht_size; 445 double * HT = baij->ht; 446 Scalar ** HD = (Scalar **)(HT + size),*value,*baij_a; 447 448 449 if (roworiented) { 450 stepval = (n-1)*bs; 451 } else { 452 stepval = (m-1)*bs; 453 } 454 for ( i=0; i<m; i++ ) { 455 #if defined(USE_PETSC_BOPT_g) 456 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 457 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 458 #endif 459 if (im[i] >= rstart && im[i] < rend) { 460 row = im[i]; 461 for ( j=0; j<n; j++ ) { 462 col = in[j]; 463 464 /* Look up into the Hash Table */ 465 key = row*baij->Nbs+col+1; 466 h1 = HASH1(size,key); 467 468 for ( k=0; k<size; k++ ){ 469 if (HT[(h1+k)%size] == key) { 470 baij_a = HD[(h1+k)%size]; 471 472 if (roworiented) { 473 value = v + i*(stepval+bs)*bs + j*bs; 474 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a++ ) { 475 for ( jj=0; jj<bs2; jj+=bs ) { 476 if (addv == ADD_VALUES) baij_a[jj] += *value++; 477 else baij_a[jj] = *value++; 478 } 479 } 480 481 } else { 482 value = v + j*(stepval+bs)*bs + i*bs; 483 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 484 for ( jj=0; jj<bs; jj++ ) { 485 if (addv == ADD_VALUES) baij_a[jj] += *value++; 486 else baij_a[jj] = *value++; 487 } 488 } 489 } 490 break; 491 } 492 } 493 if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Hastable Entry not found"); 494 } 495 } else { 496 if (!baij->donotstash) { 497 if (roworiented ) { 498 row = im[i]*bs; 499 value = v + i*(stepval+bs)*bs; 500 for ( j=0; j<bs; j++,row++ ) { 501 for ( k=0; k<n; k++ ) { 502 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 503 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 504 } 505 } 506 } 507 } else { 508 for ( j=0; j<n; j++ ) { 509 value = v + j*(stepval+bs)*bs + i*bs; 510 col = in[j]*bs; 511 for ( k=0; k<bs; k++,col++,value+=stepval) { 512 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 513 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 514 } 515 } 516 } 517 } 518 } 519 } 520 } 521 PetscFunctionReturn(0); 522 } 523 #undef __FUNC__ 524 #define __FUNC__ "MatGetValues_MPIBAIJ" 525 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 526 { 527 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 528 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 529 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 530 531 for ( i=0; i<m; i++ ) { 532 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 533 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 534 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 535 row = idxm[i] - bsrstart; 536 for ( j=0; j<n; j++ ) { 537 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 538 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 539 if (idxn[j] >= bscstart && idxn[j] < bscend){ 540 col = idxn[j] - bscstart; 541 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 542 } else { 543 if (!baij->colmap) { 544 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 545 } 546 if((baij->colmap[idxn[j]/bs]-1 < 0) || 547 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 548 else { 549 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 550 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 551 } 552 } 553 } 554 } else { 555 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 556 } 557 } 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNC__ 562 #define __FUNC__ "MatNorm_MPIBAIJ" 563 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 564 { 565 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 566 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 567 int ierr, i,bs2=baij->bs2; 568 double sum = 0.0; 569 Scalar *v; 570 571 PetscFunctionBegin; 572 if (baij->size == 1) { 573 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 574 } else { 575 if (type == NORM_FROBENIUS) { 576 v = amat->a; 577 for (i=0; i<amat->nz*bs2; i++ ) { 578 #if defined(USE_PETSC_COMPLEX) 579 sum += real(conj(*v)*(*v)); v++; 580 #else 581 sum += (*v)*(*v); v++; 582 #endif 583 } 584 v = bmat->a; 585 for (i=0; i<bmat->nz*bs2; i++ ) { 586 #if defined(USE_PETSC_COMPLEX) 587 sum += real(conj(*v)*(*v)); v++; 588 #else 589 sum += (*v)*(*v); v++; 590 #endif 591 } 592 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 593 *norm = sqrt(*norm); 594 } else { 595 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 596 } 597 } 598 PetscFunctionReturn(0); 599 } 600 601 #undef __FUNC__ 602 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 603 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 604 { 605 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 606 MPI_Comm comm = mat->comm; 607 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 608 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 609 MPI_Request *send_waits,*recv_waits; 610 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 611 InsertMode addv; 612 Scalar *rvalues,*svalues; 613 614 PetscFunctionBegin; 615 /* make sure all processors are either in INSERTMODE or ADDMODE */ 616 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 617 if (addv == (ADD_VALUES|INSERT_VALUES)) { 618 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 619 } 620 mat->insertmode = addv; /* in case this processor had no cache */ 621 622 /* first count number of contributors to each processor */ 623 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 624 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 625 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 626 for ( i=0; i<baij->stash.n; i++ ) { 627 idx = baij->stash.idx[i]; 628 for ( j=0; j<size; j++ ) { 629 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 630 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 631 } 632 } 633 } 634 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 635 636 /* inform other processors of number of messages and max length*/ 637 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 638 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 639 nreceives = work[rank]; 640 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 641 nmax = work[rank]; 642 PetscFree(work); 643 644 /* post receives: 645 1) each message will consist of ordered pairs 646 (global index,value) we store the global index as a double 647 to simplify the message passing. 648 2) since we don't know how long each individual message is we 649 allocate the largest needed buffer for each receive. Potentially 650 this is a lot of wasted space. 651 652 653 This could be done better. 654 */ 655 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 656 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 657 for ( i=0; i<nreceives; i++ ) { 658 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 659 } 660 661 /* do sends: 662 1) starts[i] gives the starting index in svalues for stuff going to 663 the ith processor 664 */ 665 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 666 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 667 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 668 starts[0] = 0; 669 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 670 for ( i=0; i<baij->stash.n; i++ ) { 671 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 672 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 673 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 674 } 675 PetscFree(owner); 676 starts[0] = 0; 677 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 678 count = 0; 679 for ( i=0; i<size; i++ ) { 680 if (procs[i]) { 681 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 682 } 683 } 684 PetscFree(starts); PetscFree(nprocs); 685 686 /* Free cache space */ 687 PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",rank,baij->stash.n); 688 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 689 690 baij->svalues = svalues; baij->rvalues = rvalues; 691 baij->nsends = nsends; baij->nrecvs = nreceives; 692 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 693 baij->rmax = nmax; 694 695 PetscFunctionReturn(0); 696 } 697 698 int CreateHashTable(Mat mat,double factor) 699 { 700 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 701 Mat A = baij->A, B=baij->B; 702 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 703 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 704 int size,ct=0,max1=0,max2=0,bs2=baij->bs2,rstart=baij->rstart; 705 int cstart=baij->cstart,*garray=baij->garray,row,col; 706 double *HT,key; 707 extern int PetscGlobalRank; 708 Scalar **HD; 709 /* 710 Scalar *aa=a->a,*ba=b->a; 711 static double *HT; 712 static int flag=1; 713 */ 714 PetscFunctionBegin; 715 baij->ht_size=(int)(factor*nz); 716 size = baij->ht_size; 717 /* Allocate Memory for Hash Table */ 718 if (baij->ht) { 719 PetscFunctionReturn(0); 720 } 721 722 baij->ht = (double*)PetscMalloc(size*(sizeof(double)+sizeof(Scalar*))+1); CHKPTRQ(baij->ht); 723 HT = baij->ht; 724 HD = (Scalar**)(HT + size); 725 PetscMemzero(HT,size*sizeof(double)+sizeof(Scalar*)); 726 727 728 /* Loop Over A */ 729 for ( i=0; i<a->mbs; i++ ) { 730 for ( j=ai[i]; j<ai[i+1]; j++ ) { 731 row = i+rstart; 732 col = aj[j]+cstart; 733 734 key = row*baij->Nbs + col + 1; 735 /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 736 h1 = HASH1(size,key); 737 738 for ( k=0; k<size; k++ ){ 739 if (HT[(h1+k)%size] == 0.0) { 740 HT[(h1+k)%size] = key; 741 HD[(h1+k)%size] = a->a + j*bs2; 742 break; 743 } else ct++; 744 } 745 if (k> max1) max1 = k; 746 } 747 } 748 /* Loop Over B */ 749 for ( i=0; i<b->mbs; i++ ) { 750 for ( j=bi[i]; j<bi[i+1]; j++ ) { 751 row = i+rstart; 752 col = garray[bj[j]]; 753 key = row*baij->Nbs + col + 1; 754 /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 755 h1 = HASH1(size,key); 756 for ( k=0; k<size; k++ ){ 757 if (HT[(h1+k)%size] == 0.0) { 758 HT[(h1+k)%size] = key; 759 HD[(h1+k)%size] = b->a + j*bs2; 760 break; 761 } else ct++; 762 } 763 if (k> max2) max2 = k; 764 } 765 } 766 767 /* Print Summary */ 768 for ( i=0,key=0.0,j=0; i<size; i++) 769 if (HT[i]) {j++;} 770 771 /*printf("[%d] fact = %3.2f max1 = %5d max2 = %5d Size %5d - Searches %5d Avg %5.2f Keys %5d\n", 772 PetscGlobalRank,factor,max1,max2,size,ct+j,((float)ct+j),j); */ 773 fflush(stdout); 774 PetscFunctionReturn(0); 775 } 776 777 #undef __FUNC__ 778 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 779 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 780 { 781 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 782 MPI_Status *send_status,recv_status; 783 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 784 int bs=baij->bs,row,col,other_disassembled,flg; 785 Scalar *values,val; 786 InsertMode addv = mat->insertmode; 787 788 PetscFunctionBegin; 789 /* wait on receives */ 790 while (count) { 791 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 792 /* unpack receives into our local space */ 793 values = baij->rvalues + 3*imdex*baij->rmax; 794 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 795 n = n/3; 796 for ( i=0; i<n; i++ ) { 797 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 798 col = (int) PetscReal(values[3*i+1]); 799 val = values[3*i+2]; 800 if (col >= baij->cstart*bs && col < baij->cend*bs) { 801 col -= baij->cstart*bs; 802 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 803 } else { 804 if (mat->was_assembled) { 805 if (!baij->colmap) { 806 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 807 } 808 col = (baij->colmap[col/bs]) - 1 + col%bs; 809 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 810 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 811 col = (int) PetscReal(values[3*i+1]); 812 } 813 } 814 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 815 } 816 } 817 count--; 818 } 819 PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 820 821 /* wait on sends */ 822 if (baij->nsends) { 823 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 824 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 825 PetscFree(send_status); 826 } 827 PetscFree(baij->send_waits); PetscFree(baij->svalues); 828 829 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 830 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 831 832 /* determine if any processor has disassembled, if so we must 833 also disassemble ourselfs, in order that we may reassemble. */ 834 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 835 if (mat->was_assembled && !other_disassembled) { 836 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 837 } 838 839 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 840 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 841 } 842 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 843 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 844 845 ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr); 846 if (flg && !baij->ht && mode== MAT_FINAL_ASSEMBLY) { 847 double fact = 1.39; 848 CreateHashTable(mat,fact); 849 mat->ops.setvalues = MatSetValues_MPIBAIJ_HT; 850 mat->ops.setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 851 } 852 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 853 PetscFunctionReturn(0); 854 } 855 856 #undef __FUNC__ 857 #define __FUNC__ "MatView_MPIBAIJ_Binary" 858 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 859 { 860 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 861 int ierr; 862 863 PetscFunctionBegin; 864 if (baij->size == 1) { 865 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 866 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNC__ 871 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 872 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 873 { 874 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 875 int ierr, format,rank,bs = baij->bs; 876 FILE *fd; 877 ViewerType vtype; 878 879 PetscFunctionBegin; 880 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 881 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 882 ierr = ViewerGetFormat(viewer,&format); 883 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 884 MatInfo info; 885 MPI_Comm_rank(mat->comm,&rank); 886 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 887 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 888 PetscSequentialPhaseBegin(mat->comm,1); 889 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 890 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 891 baij->bs,(int)info.memory); 892 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 893 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 894 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 895 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 896 fflush(fd); 897 PetscSequentialPhaseEnd(mat->comm,1); 898 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 901 PetscPrintf(mat->comm," block size is %d\n",bs); 902 PetscFunctionReturn(0); 903 } 904 } 905 906 if (vtype == DRAW_VIEWER) { 907 Draw draw; 908 PetscTruth isnull; 909 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 910 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 911 } 912 913 if (vtype == ASCII_FILE_VIEWER) { 914 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 915 PetscSequentialPhaseBegin(mat->comm,1); 916 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 917 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 918 baij->cstart*bs,baij->cend*bs); 919 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 920 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 921 fflush(fd); 922 PetscSequentialPhaseEnd(mat->comm,1); 923 } else { 924 int size = baij->size; 925 rank = baij->rank; 926 if (size == 1) { 927 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 928 } else { 929 /* assemble the entire matrix onto first processor. */ 930 Mat A; 931 Mat_SeqBAIJ *Aloc; 932 int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 933 int mbs=baij->mbs; 934 Scalar *a; 935 936 if (!rank) { 937 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 938 } else { 939 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 940 } 941 PLogObjectParent(mat,A); 942 943 /* copy over the A part */ 944 Aloc = (Mat_SeqBAIJ*) baij->A->data; 945 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 946 row = baij->rstart; 947 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 948 949 for ( i=0; i<mbs; i++ ) { 950 rvals[0] = bs*(baij->rstart + i); 951 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 952 for ( j=ai[i]; j<ai[i+1]; j++ ) { 953 col = (baij->cstart+aj[j])*bs; 954 for (k=0; k<bs; k++ ) { 955 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 956 col++; a += bs; 957 } 958 } 959 } 960 /* copy over the B part */ 961 Aloc = (Mat_SeqBAIJ*) baij->B->data; 962 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 963 row = baij->rstart*bs; 964 for ( i=0; i<mbs; i++ ) { 965 rvals[0] = bs*(baij->rstart + i); 966 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 967 for ( j=ai[i]; j<ai[i+1]; j++ ) { 968 col = baij->garray[aj[j]]*bs; 969 for (k=0; k<bs; k++ ) { 970 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 971 col++; a += bs; 972 } 973 } 974 } 975 PetscFree(rvals); 976 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 977 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 978 /* 979 Everyone has to call to draw the matrix since the graphics waits are 980 synchronized across all processors that share the Draw object 981 */ 982 if (!rank || vtype == DRAW_VIEWER) { 983 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 984 } 985 ierr = MatDestroy(A); CHKERRQ(ierr); 986 } 987 } 988 PetscFunctionReturn(0); 989 } 990 991 992 993 #undef __FUNC__ 994 #define __FUNC__ "MatView_MPIBAIJ" 995 int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 996 { 997 Mat mat = (Mat) obj; 998 int ierr; 999 ViewerType vtype; 1000 1001 PetscFunctionBegin; 1002 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1003 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 1004 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 1005 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1006 } else if (vtype == BINARY_FILE_VIEWER) { 1007 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNC__ 1013 #define __FUNC__ "MatDestroy_MPIBAIJ" 1014 int MatDestroy_MPIBAIJ(PetscObject obj) 1015 { 1016 Mat mat = (Mat) obj; 1017 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1018 int ierr; 1019 1020 PetscFunctionBegin; 1021 #if defined(USE_PETSC_LOG) 1022 PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 1023 #endif 1024 1025 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1026 PetscFree(baij->rowners); 1027 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1028 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1029 if (baij->colmap) PetscFree(baij->colmap); 1030 if (baij->garray) PetscFree(baij->garray); 1031 if (baij->lvec) VecDestroy(baij->lvec); 1032 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1033 if (baij->rowvalues) PetscFree(baij->rowvalues); 1034 if (baij->barray) PetscFree(baij->barray); 1035 if (baij->ht) PetscFree(baij->ht); 1036 PetscFree(baij); 1037 PLogObjectDestroy(mat); 1038 PetscHeaderDestroy(mat); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 #undef __FUNC__ 1043 #define __FUNC__ "MatMult_MPIBAIJ" 1044 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1045 { 1046 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1047 int ierr, nt; 1048 1049 PetscFunctionBegin; 1050 VecGetLocalSize_Fast(xx,nt); 1051 if (nt != a->n) { 1052 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1053 } 1054 VecGetLocalSize_Fast(yy,nt); 1055 if (nt != a->m) { 1056 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1057 } 1058 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1059 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 1060 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1061 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1062 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNC__ 1067 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1068 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1069 { 1070 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1071 int ierr; 1072 1073 PetscFunctionBegin; 1074 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1075 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1076 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1077 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNC__ 1082 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1083 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1084 { 1085 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1086 int ierr; 1087 1088 PetscFunctionBegin; 1089 /* do nondiagonal part */ 1090 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1091 /* send it on its way */ 1092 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1093 /* do local part */ 1094 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1095 /* receive remote parts: note this assumes the values are not actually */ 1096 /* inserted in yy until the next line, which is true for my implementation*/ 1097 /* but is not perhaps always true. */ 1098 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNC__ 1103 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1104 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1105 { 1106 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1107 int ierr; 1108 1109 PetscFunctionBegin; 1110 /* do nondiagonal part */ 1111 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1112 /* send it on its way */ 1113 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1114 /* do local part */ 1115 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1116 /* receive remote parts: note this assumes the values are not actually */ 1117 /* inserted in yy until the next line, which is true for my implementation*/ 1118 /* but is not perhaps always true. */ 1119 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 /* 1124 This only works correctly for square matrices where the subblock A->A is the 1125 diagonal block 1126 */ 1127 #undef __FUNC__ 1128 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1129 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1130 { 1131 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1132 int ierr; 1133 1134 PetscFunctionBegin; 1135 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1136 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNC__ 1141 #define __FUNC__ "MatScale_MPIBAIJ" 1142 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1143 { 1144 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1145 int ierr; 1146 1147 PetscFunctionBegin; 1148 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1149 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNC__ 1154 #define __FUNC__ "MatGetSize_MPIBAIJ" 1155 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1156 { 1157 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1158 1159 PetscFunctionBegin; 1160 if (m) *m = mat->M; 1161 if (n) *n = mat->N; 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNC__ 1166 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1167 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1168 { 1169 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1170 1171 PetscFunctionBegin; 1172 *m = mat->m; *n = mat->N; 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNC__ 1177 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1178 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1179 { 1180 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1181 1182 PetscFunctionBegin; 1183 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1184 PetscFunctionReturn(0); 1185 } 1186 1187 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1188 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1189 1190 #undef __FUNC__ 1191 #define __FUNC__ "MatGetRow_MPIBAIJ" 1192 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1193 { 1194 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1195 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1196 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1197 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1198 int *cmap, *idx_p,cstart = mat->cstart; 1199 1200 PetscFunctionBegin; 1201 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1202 mat->getrowactive = PETSC_TRUE; 1203 1204 if (!mat->rowvalues && (idx || v)) { 1205 /* 1206 allocate enough space to hold information from the longest row. 1207 */ 1208 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1209 int max = 1,mbs = mat->mbs,tmp; 1210 for ( i=0; i<mbs; i++ ) { 1211 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1212 if (max < tmp) { max = tmp; } 1213 } 1214 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1215 CHKPTRQ(mat->rowvalues); 1216 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1217 } 1218 1219 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1220 lrow = row - brstart; 1221 1222 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1223 if (!v) {pvA = 0; pvB = 0;} 1224 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1225 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1226 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1227 nztot = nzA + nzB; 1228 1229 cmap = mat->garray; 1230 if (v || idx) { 1231 if (nztot) { 1232 /* Sort by increasing column numbers, assuming A and B already sorted */ 1233 int imark = -1; 1234 if (v) { 1235 *v = v_p = mat->rowvalues; 1236 for ( i=0; i<nzB; i++ ) { 1237 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1238 else break; 1239 } 1240 imark = i; 1241 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1242 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1243 } 1244 if (idx) { 1245 *idx = idx_p = mat->rowindices; 1246 if (imark > -1) { 1247 for ( i=0; i<imark; i++ ) { 1248 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1249 } 1250 } else { 1251 for ( i=0; i<nzB; i++ ) { 1252 if (cmap[cworkB[i]/bs] < cstart) 1253 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1254 else break; 1255 } 1256 imark = i; 1257 } 1258 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1259 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1260 } 1261 } else { 1262 if (idx) *idx = 0; 1263 if (v) *v = 0; 1264 } 1265 } 1266 *nz = nztot; 1267 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1268 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 #undef __FUNC__ 1273 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1274 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1275 { 1276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1277 1278 PetscFunctionBegin; 1279 if (baij->getrowactive == PETSC_FALSE) { 1280 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1281 } 1282 baij->getrowactive = PETSC_FALSE; 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNC__ 1287 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1288 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1289 { 1290 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1291 1292 PetscFunctionBegin; 1293 *bs = baij->bs; 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNC__ 1298 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1299 int MatZeroEntries_MPIBAIJ(Mat A) 1300 { 1301 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1302 int ierr; 1303 1304 PetscFunctionBegin; 1305 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1306 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNC__ 1311 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1312 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1313 { 1314 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1315 Mat A = a->A, B = a->B; 1316 int ierr; 1317 double isend[5], irecv[5]; 1318 1319 PetscFunctionBegin; 1320 info->block_size = (double)a->bs; 1321 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1322 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1323 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1324 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1325 if (flag == MAT_LOCAL) { 1326 info->nz_used = isend[0]; 1327 info->nz_allocated = isend[1]; 1328 info->nz_unneeded = isend[2]; 1329 info->memory = isend[3]; 1330 info->mallocs = isend[4]; 1331 } else if (flag == MAT_GLOBAL_MAX) { 1332 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 1333 info->nz_used = irecv[0]; 1334 info->nz_allocated = irecv[1]; 1335 info->nz_unneeded = irecv[2]; 1336 info->memory = irecv[3]; 1337 info->mallocs = irecv[4]; 1338 } else if (flag == MAT_GLOBAL_SUM) { 1339 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 1340 info->nz_used = irecv[0]; 1341 info->nz_allocated = irecv[1]; 1342 info->nz_unneeded = irecv[2]; 1343 info->memory = irecv[3]; 1344 info->mallocs = irecv[4]; 1345 } 1346 info->rows_global = (double)a->M; 1347 info->columns_global = (double)a->N; 1348 info->rows_local = (double)a->m; 1349 info->columns_local = (double)a->N; 1350 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1351 info->fill_ratio_needed = 0; 1352 info->factor_mallocs = 0; 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNC__ 1357 #define __FUNC__ "MatSetOption_MPIBAIJ" 1358 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1359 { 1360 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1361 1362 PetscFunctionBegin; 1363 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1364 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1365 op == MAT_COLUMNS_UNSORTED || 1366 op == MAT_COLUMNS_SORTED || 1367 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1368 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1369 MatSetOption(a->A,op); 1370 MatSetOption(a->B,op); 1371 } else if (op == MAT_ROW_ORIENTED) { 1372 a->roworiented = 1; 1373 MatSetOption(a->A,op); 1374 MatSetOption(a->B,op); 1375 } else if (op == MAT_ROWS_SORTED || 1376 op == MAT_ROWS_UNSORTED || 1377 op == MAT_SYMMETRIC || 1378 op == MAT_STRUCTURALLY_SYMMETRIC || 1379 op == MAT_YES_NEW_DIAGONALS) 1380 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1381 else if (op == MAT_COLUMN_ORIENTED) { 1382 a->roworiented = 0; 1383 MatSetOption(a->A,op); 1384 MatSetOption(a->B,op); 1385 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1386 a->donotstash = 1; 1387 } else if (op == MAT_NO_NEW_DIAGONALS) { 1388 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1389 } else { 1390 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1391 } 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #undef __FUNC__ 1396 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1397 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1398 { 1399 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1400 Mat_SeqBAIJ *Aloc; 1401 Mat B; 1402 int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 1403 int bs=baij->bs,mbs=baij->mbs; 1404 Scalar *a; 1405 1406 PetscFunctionBegin; 1407 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1408 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1409 CHKERRQ(ierr); 1410 1411 /* copy over the A part */ 1412 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1413 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1414 row = baij->rstart; 1415 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1416 1417 for ( i=0; i<mbs; i++ ) { 1418 rvals[0] = bs*(baij->rstart + i); 1419 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1420 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1421 col = (baij->cstart+aj[j])*bs; 1422 for (k=0; k<bs; k++ ) { 1423 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1424 col++; a += bs; 1425 } 1426 } 1427 } 1428 /* copy over the B part */ 1429 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1430 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1431 row = baij->rstart*bs; 1432 for ( i=0; i<mbs; i++ ) { 1433 rvals[0] = bs*(baij->rstart + i); 1434 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1435 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1436 col = baij->garray[aj[j]]*bs; 1437 for (k=0; k<bs; k++ ) { 1438 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1439 col++; a += bs; 1440 } 1441 } 1442 } 1443 PetscFree(rvals); 1444 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1445 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1446 1447 if (matout != PETSC_NULL) { 1448 *matout = B; 1449 } else { 1450 /* This isn't really an in-place transpose .... but free data structures from baij */ 1451 PetscFree(baij->rowners); 1452 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1453 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1454 if (baij->colmap) PetscFree(baij->colmap); 1455 if (baij->garray) PetscFree(baij->garray); 1456 if (baij->lvec) VecDestroy(baij->lvec); 1457 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1458 PetscFree(baij); 1459 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1460 PetscHeaderDestroy(B); 1461 } 1462 PetscFunctionReturn(0); 1463 } 1464 1465 #undef __FUNC__ 1466 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1467 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1468 { 1469 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1470 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1471 int ierr,s1,s2,s3; 1472 1473 PetscFunctionBegin; 1474 if (ll) { 1475 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1476 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1477 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1478 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1479 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1480 } 1481 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNC__ 1486 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1487 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1488 { 1489 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1490 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1491 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1492 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1493 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1494 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1495 MPI_Comm comm = A->comm; 1496 MPI_Request *send_waits,*recv_waits; 1497 MPI_Status recv_status,*send_status; 1498 IS istmp; 1499 1500 PetscFunctionBegin; 1501 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1502 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1503 1504 /* first count number of contributors to each processor */ 1505 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1506 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1507 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1508 for ( i=0; i<N; i++ ) { 1509 idx = rows[i]; 1510 found = 0; 1511 for ( j=0; j<size; j++ ) { 1512 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1513 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1514 } 1515 } 1516 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1517 } 1518 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1519 1520 /* inform other processors of number of messages and max length*/ 1521 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1522 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1523 nrecvs = work[rank]; 1524 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1525 nmax = work[rank]; 1526 PetscFree(work); 1527 1528 /* post receives: */ 1529 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1530 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1531 for ( i=0; i<nrecvs; i++ ) { 1532 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1533 } 1534 1535 /* do sends: 1536 1) starts[i] gives the starting index in svalues for stuff going to 1537 the ith processor 1538 */ 1539 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1540 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1541 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1542 starts[0] = 0; 1543 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1544 for ( i=0; i<N; i++ ) { 1545 svalues[starts[owner[i]]++] = rows[i]; 1546 } 1547 ISRestoreIndices(is,&rows); 1548 1549 starts[0] = 0; 1550 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1551 count = 0; 1552 for ( i=0; i<size; i++ ) { 1553 if (procs[i]) { 1554 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1555 } 1556 } 1557 PetscFree(starts); 1558 1559 base = owners[rank]*bs; 1560 1561 /* wait on receives */ 1562 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1563 source = lens + nrecvs; 1564 count = nrecvs; slen = 0; 1565 while (count) { 1566 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1567 /* unpack receives into our local space */ 1568 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1569 source[imdex] = recv_status.MPI_SOURCE; 1570 lens[imdex] = n; 1571 slen += n; 1572 count--; 1573 } 1574 PetscFree(recv_waits); 1575 1576 /* move the data into the send scatter */ 1577 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1578 count = 0; 1579 for ( i=0; i<nrecvs; i++ ) { 1580 values = rvalues + i*nmax; 1581 for ( j=0; j<lens[i]; j++ ) { 1582 lrows[count++] = values[j] - base; 1583 } 1584 } 1585 PetscFree(rvalues); PetscFree(lens); 1586 PetscFree(owner); PetscFree(nprocs); 1587 1588 /* actually zap the local rows */ 1589 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1590 PLogObjectParent(A,istmp); 1591 1592 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1593 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1594 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1595 1596 if (diag) { 1597 for ( i = 0; i < slen; i++ ) { 1598 row = lrows[i] + rstart_bs; 1599 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1600 } 1601 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1602 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1603 } 1604 PetscFree(lrows); 1605 1606 /* wait on sends */ 1607 if (nsends) { 1608 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1609 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1610 PetscFree(send_status); 1611 } 1612 PetscFree(send_waits); PetscFree(svalues); 1613 1614 PetscFunctionReturn(0); 1615 } 1616 extern int MatPrintHelp_SeqBAIJ(Mat); 1617 #undef __FUNC__ 1618 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1619 int MatPrintHelp_MPIBAIJ(Mat A) 1620 { 1621 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1622 int ierr; 1623 1624 PetscFunctionBegin; 1625 if (!a->rank) { 1626 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1627 } 1628 PetscFunctionReturn(0); 1629 } 1630 1631 #undef __FUNC__ 1632 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1633 int MatSetUnfactored_MPIBAIJ(Mat A) 1634 { 1635 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1636 int ierr; 1637 1638 PetscFunctionBegin; 1639 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1640 PetscFunctionReturn(0); 1641 } 1642 1643 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1644 1645 /* -------------------------------------------------------------------*/ 1646 static struct _MatOps MatOps = { 1647 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1648 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1649 0,0,0,0, 1650 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1651 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1652 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1653 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1654 0,0,0,MatGetSize_MPIBAIJ, 1655 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1656 0,0,MatConvertSameType_MPIBAIJ,0,0, 1657 0,0,0,MatGetSubMatrices_MPIBAIJ, 1658 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1659 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1660 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1661 1662 1663 #undef __FUNC__ 1664 #define __FUNC__ "MatCreateMPIBAIJ" 1665 /*@C 1666 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1667 (block compressed row). For good matrix assembly performance 1668 the user should preallocate the matrix storage by setting the parameters 1669 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1670 performance can be increased by more than a factor of 50. 1671 1672 Input Parameters: 1673 . comm - MPI communicator 1674 . bs - size of blockk 1675 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1676 This value should be the same as the local size used in creating the 1677 y vector for the matrix-vector product y = Ax. 1678 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1679 This value should be the same as the local size used in creating the 1680 x vector for the matrix-vector product y = Ax. 1681 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1682 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1683 . d_nz - number of block nonzeros per block row in diagonal portion of local 1684 submatrix (same for all local rows) 1685 . d_nzz - array containing the number of block nonzeros in the various block rows 1686 of the in diagonal portion of the local (possibly different for each block 1687 row) or PETSC_NULL. You must leave room for the diagonal entry even if 1688 it is zero. 1689 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1690 submatrix (same for all local rows). 1691 . o_nzz - array containing the number of nonzeros in the various block rows of the 1692 off-diagonal portion of the local submatrix (possibly different for 1693 each block row) or PETSC_NULL. 1694 1695 Output Parameter: 1696 . A - the matrix 1697 1698 Notes: 1699 The user MUST specify either the local or global matrix dimensions 1700 (possibly both). 1701 1702 Storage Information: 1703 For a square global matrix we define each processor's diagonal portion 1704 to be its local rows and the corresponding columns (a square submatrix); 1705 each processor's off-diagonal portion encompasses the remainder of the 1706 local matrix (a rectangular submatrix). 1707 1708 The user can specify preallocated storage for the diagonal part of 1709 the local submatrix with either d_nz or d_nnz (not both). Set 1710 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1711 memory allocation. Likewise, specify preallocated storage for the 1712 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1713 1714 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1715 the figure below we depict these three local rows and all columns (0-11). 1716 1717 $ 0 1 2 3 4 5 6 7 8 9 10 11 1718 $ ------------------- 1719 $ row 3 | o o o d d d o o o o o o 1720 $ row 4 | o o o d d d o o o o o o 1721 $ row 5 | o o o d d d o o o o o o 1722 $ ------------------- 1723 $ 1724 1725 Thus, any entries in the d locations are stored in the d (diagonal) 1726 submatrix, and any entries in the o locations are stored in the 1727 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1728 stored simply in the MATSEQBAIJ format for compressed row storage. 1729 1730 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1731 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1732 In general, for PDE problems in which most nonzeros are near the diagonal, 1733 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1734 or you will get TERRIBLE performance; see the users' manual chapter on 1735 matrices. 1736 1737 .keywords: matrix, block, aij, compressed row, sparse, parallel 1738 1739 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 1740 @*/ 1741 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1742 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1743 { 1744 Mat B; 1745 Mat_MPIBAIJ *b; 1746 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 1747 1748 PetscFunctionBegin; 1749 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1750 1751 MPI_Comm_size(comm,&size); 1752 if (size == 1) { 1753 if (M == PETSC_DECIDE) M = m; 1754 if (N == PETSC_DECIDE) N = n; 1755 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 1759 *A = 0; 1760 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 1761 PLogObjectCreate(B); 1762 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1763 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1764 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1765 1766 B->destroy = MatDestroy_MPIBAIJ; 1767 B->view = MatView_MPIBAIJ; 1768 B->mapping = 0; 1769 B->factor = 0; 1770 B->assembled = PETSC_FALSE; 1771 1772 B->insertmode = NOT_SET_VALUES; 1773 MPI_Comm_rank(comm,&b->rank); 1774 MPI_Comm_size(comm,&b->size); 1775 1776 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1777 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1778 } 1779 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1780 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1781 } 1782 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1783 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1784 } 1785 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1786 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1787 1788 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1789 work[0] = m; work[1] = n; 1790 mbs = m/bs; nbs = n/bs; 1791 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1792 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1793 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1794 } 1795 if (m == PETSC_DECIDE) { 1796 Mbs = M/bs; 1797 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1798 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1799 m = mbs*bs; 1800 } 1801 if (n == PETSC_DECIDE) { 1802 Nbs = N/bs; 1803 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1804 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1805 n = nbs*bs; 1806 } 1807 if (mbs*bs != m || nbs*bs != n) { 1808 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1809 } 1810 1811 b->m = m; B->m = m; 1812 b->n = n; B->n = n; 1813 b->N = N; B->N = N; 1814 b->M = M; B->M = M; 1815 b->bs = bs; 1816 b->bs2 = bs*bs; 1817 b->mbs = mbs; 1818 b->nbs = nbs; 1819 b->Mbs = Mbs; 1820 b->Nbs = Nbs; 1821 1822 /* build local table of row and column ownerships */ 1823 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1824 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1825 b->cowners = b->rowners + b->size + 2; 1826 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1827 b->rowners[0] = 0; 1828 for ( i=2; i<=b->size; i++ ) { 1829 b->rowners[i] += b->rowners[i-1]; 1830 } 1831 b->rstart = b->rowners[b->rank]; 1832 b->rend = b->rowners[b->rank+1]; 1833 b->rstart_bs = b->rstart * bs; 1834 b->rend_bs = b->rend * bs; 1835 1836 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1837 b->cowners[0] = 0; 1838 for ( i=2; i<=b->size; i++ ) { 1839 b->cowners[i] += b->cowners[i-1]; 1840 } 1841 b->cstart = b->cowners[b->rank]; 1842 b->cend = b->cowners[b->rank+1]; 1843 b->cstart_bs = b->cstart * bs; 1844 b->cend_bs = b->cend * bs; 1845 1846 1847 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1848 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1849 PLogObjectParent(B,b->A); 1850 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1851 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1852 PLogObjectParent(B,b->B); 1853 1854 /* build cache for off array entries formed */ 1855 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1856 b->donotstash = 0; 1857 b->colmap = 0; 1858 b->garray = 0; 1859 b->roworiented = 1; 1860 1861 /* stuff used in block assembly */ 1862 b->barray = 0; 1863 1864 /* stuff used for matrix vector multiply */ 1865 b->lvec = 0; 1866 b->Mvctx = 0; 1867 1868 /* stuff for MatGetRow() */ 1869 b->rowindices = 0; 1870 b->rowvalues = 0; 1871 b->getrowactive = PETSC_FALSE; 1872 1873 /* hash table stuff */ 1874 b->ht = 0; 1875 b->ht_size = 0; 1876 1877 *A = B; 1878 PetscFunctionReturn(0); 1879 } 1880 1881 #undef __FUNC__ 1882 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 1883 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 1884 { 1885 Mat mat; 1886 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 1887 int ierr, len=0, flg; 1888 1889 PetscFunctionBegin; 1890 *newmat = 0; 1891 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 1892 PLogObjectCreate(mat); 1893 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 1894 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1895 mat->destroy = MatDestroy_MPIBAIJ; 1896 mat->view = MatView_MPIBAIJ; 1897 mat->factor = matin->factor; 1898 mat->assembled = PETSC_TRUE; 1899 1900 a->m = mat->m = oldmat->m; 1901 a->n = mat->n = oldmat->n; 1902 a->M = mat->M = oldmat->M; 1903 a->N = mat->N = oldmat->N; 1904 1905 a->bs = oldmat->bs; 1906 a->bs2 = oldmat->bs2; 1907 a->mbs = oldmat->mbs; 1908 a->nbs = oldmat->nbs; 1909 a->Mbs = oldmat->Mbs; 1910 a->Nbs = oldmat->Nbs; 1911 1912 a->rstart = oldmat->rstart; 1913 a->rend = oldmat->rend; 1914 a->cstart = oldmat->cstart; 1915 a->cend = oldmat->cend; 1916 a->size = oldmat->size; 1917 a->rank = oldmat->rank; 1918 mat->insertmode = NOT_SET_VALUES; 1919 a->rowvalues = 0; 1920 a->getrowactive = PETSC_FALSE; 1921 a->barray = 0; 1922 1923 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1924 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1925 a->cowners = a->rowners + a->size + 2; 1926 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1927 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1928 if (oldmat->colmap) { 1929 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 1930 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1931 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 1932 } else a->colmap = 0; 1933 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 1934 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1935 PLogObjectMemory(mat,len*sizeof(int)); 1936 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1937 } else a->garray = 0; 1938 1939 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1940 PLogObjectParent(mat,a->lvec); 1941 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1942 PLogObjectParent(mat,a->Mvctx); 1943 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1944 PLogObjectParent(mat,a->A); 1945 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1946 PLogObjectParent(mat,a->B); 1947 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1948 if (flg) { 1949 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1950 } 1951 *newmat = mat; 1952 PetscFunctionReturn(0); 1953 } 1954 1955 #include "sys.h" 1956 1957 #undef __FUNC__ 1958 #define __FUNC__ "MatLoad_MPIBAIJ" 1959 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 1960 { 1961 Mat A; 1962 int i, nz, ierr, j,rstart, rend, fd; 1963 Scalar *vals,*buf; 1964 MPI_Comm comm = ((PetscObject)viewer)->comm; 1965 MPI_Status status; 1966 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 1967 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 1968 int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 1969 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1970 int dcount,kmax,k,nzcount,tmp; 1971 1972 PetscFunctionBegin; 1973 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1974 bs2 = bs*bs; 1975 1976 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1977 if (!rank) { 1978 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1979 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1980 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1981 if (header[3] < 0) { 1982 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 1983 } 1984 } 1985 1986 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1987 M = header[1]; N = header[2]; 1988 1989 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1990 1991 /* 1992 This code adds extra rows to make sure the number of rows is 1993 divisible by the blocksize 1994 */ 1995 Mbs = M/bs; 1996 extra_rows = bs - M + bs*(Mbs); 1997 if (extra_rows == bs) extra_rows = 0; 1998 else Mbs++; 1999 if (extra_rows &&!rank) { 2000 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2001 } 2002 2003 /* determine ownership of all rows */ 2004 mbs = Mbs/size + ((Mbs % size) > rank); 2005 m = mbs * bs; 2006 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2007 browners = rowners + size + 1; 2008 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2009 rowners[0] = 0; 2010 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2011 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2012 rstart = rowners[rank]; 2013 rend = rowners[rank+1]; 2014 2015 /* distribute row lengths to all processors */ 2016 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2017 if (!rank) { 2018 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2019 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2020 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2021 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2022 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2023 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2024 PetscFree(sndcounts); 2025 } else { 2026 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2027 } 2028 2029 if (!rank) { 2030 /* calculate the number of nonzeros on each processor */ 2031 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2032 PetscMemzero(procsnz,size*sizeof(int)); 2033 for ( i=0; i<size; i++ ) { 2034 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2035 procsnz[i] += rowlengths[j]; 2036 } 2037 } 2038 PetscFree(rowlengths); 2039 2040 /* determine max buffer needed and allocate it */ 2041 maxnz = 0; 2042 for ( i=0; i<size; i++ ) { 2043 maxnz = PetscMax(maxnz,procsnz[i]); 2044 } 2045 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2046 2047 /* read in my part of the matrix column indices */ 2048 nz = procsnz[0]; 2049 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2050 mycols = ibuf; 2051 if (size == 1) nz -= extra_rows; 2052 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2053 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2054 2055 /* read in every ones (except the last) and ship off */ 2056 for ( i=1; i<size-1; i++ ) { 2057 nz = procsnz[i]; 2058 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2059 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2060 } 2061 /* read in the stuff for the last proc */ 2062 if ( size != 1 ) { 2063 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2064 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2065 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2066 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2067 } 2068 PetscFree(cols); 2069 } else { 2070 /* determine buffer space needed for message */ 2071 nz = 0; 2072 for ( i=0; i<m; i++ ) { 2073 nz += locrowlens[i]; 2074 } 2075 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2076 mycols = ibuf; 2077 /* receive message of column indices*/ 2078 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2079 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2080 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2081 } 2082 2083 /* loop over local rows, determining number of off diagonal entries */ 2084 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2085 odlens = dlens + (rend-rstart); 2086 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2087 PetscMemzero(mask,3*Mbs*sizeof(int)); 2088 masked1 = mask + Mbs; 2089 masked2 = masked1 + Mbs; 2090 rowcount = 0; nzcount = 0; 2091 for ( i=0; i<mbs; i++ ) { 2092 dcount = 0; 2093 odcount = 0; 2094 for ( j=0; j<bs; j++ ) { 2095 kmax = locrowlens[rowcount]; 2096 for ( k=0; k<kmax; k++ ) { 2097 tmp = mycols[nzcount++]/bs; 2098 if (!mask[tmp]) { 2099 mask[tmp] = 1; 2100 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2101 else masked1[dcount++] = tmp; 2102 } 2103 } 2104 rowcount++; 2105 } 2106 2107 dlens[i] = dcount; 2108 odlens[i] = odcount; 2109 2110 /* zero out the mask elements we set */ 2111 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2112 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2113 } 2114 2115 /* create our matrix */ 2116 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2117 CHKERRQ(ierr); 2118 A = *newmat; 2119 MatSetOption(A,MAT_COLUMNS_SORTED); 2120 2121 if (!rank) { 2122 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2123 /* read in my part of the matrix numerical values */ 2124 nz = procsnz[0]; 2125 vals = buf; 2126 mycols = ibuf; 2127 if (size == 1) nz -= extra_rows; 2128 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2129 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2130 2131 /* insert into matrix */ 2132 jj = rstart*bs; 2133 for ( i=0; i<m; i++ ) { 2134 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2135 mycols += locrowlens[i]; 2136 vals += locrowlens[i]; 2137 jj++; 2138 } 2139 /* read in other processors (except the last one) and ship out */ 2140 for ( i=1; i<size-1; i++ ) { 2141 nz = procsnz[i]; 2142 vals = buf; 2143 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2144 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2145 } 2146 /* the last proc */ 2147 if ( size != 1 ){ 2148 nz = procsnz[i] - extra_rows; 2149 vals = buf; 2150 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2151 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2152 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2153 } 2154 PetscFree(procsnz); 2155 } else { 2156 /* receive numeric values */ 2157 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2158 2159 /* receive message of values*/ 2160 vals = buf; 2161 mycols = ibuf; 2162 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2163 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2164 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2165 2166 /* insert into matrix */ 2167 jj = rstart*bs; 2168 for ( i=0; i<m; i++ ) { 2169 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2170 mycols += locrowlens[i]; 2171 vals += locrowlens[i]; 2172 jj++; 2173 } 2174 } 2175 PetscFree(locrowlens); 2176 PetscFree(buf); 2177 PetscFree(ibuf); 2178 PetscFree(rowners); 2179 PetscFree(dlens); 2180 PetscFree(mask); 2181 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2182 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2183 PetscFunctionReturn(0); 2184 } 2185 2186 2187