1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.95 1998/01/12 17:17:31 balay Exp balay $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 23 static int CreateColmap_MPIBAIJ_Private(Mat mat) 24 { 25 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 26 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27 int nbs = B->nbs,i,bs=B->bs;; 28 29 PetscFunctionBegin; 30 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 31 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 32 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 34 PetscFunctionReturn(0); 35 } 36 37 #define CHUNKSIZE 10 38 39 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 40 { \ 41 \ 42 brow = row/bs; \ 43 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44 rmax = aimax[brow]; nrow = ailen[brow]; \ 45 bcol = col/bs; \ 46 ridx = row % bs; cidx = col % bs; \ 47 low = 0; high = nrow; \ 48 while (high-low > 3) { \ 49 t = (low+high)/2; \ 50 if (rp[t] > bcol) high = t; \ 51 else low = t; \ 52 } \ 53 for ( _i=low; _i<high; _i++ ) { \ 54 if (rp[_i] > bcol) break; \ 55 if (rp[_i] == bcol) { \ 56 bap = ap + bs2*_i + bs*cidx + ridx; \ 57 if (addv == ADD_VALUES) *bap += value; \ 58 else *bap = value; \ 59 goto a_noinsert; \ 60 } \ 61 } \ 62 if (a->nonew == 1) goto a_noinsert; \ 63 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 64 if (nrow >= rmax) { \ 65 /* there is no extra room in row, therefore enlarge */ \ 66 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 67 Scalar *new_a; \ 68 \ 69 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 70 \ 71 /* malloc new storage space */ \ 72 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 73 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 74 new_j = (int *) (new_a + bs2*new_nz); \ 75 new_i = new_j + new_nz; \ 76 \ 77 /* copy over old data into new slots */ \ 78 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 79 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 80 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 81 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 82 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 83 len*sizeof(int)); \ 84 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 85 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 86 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 87 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 88 /* free up old matrix storage */ \ 89 PetscFree(a->a); \ 90 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 91 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 92 a->singlemalloc = 1; \ 93 \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 96 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 97 a->maxnz += bs2*CHUNKSIZE; \ 98 a->reallocs++; \ 99 a->nz++; \ 100 } \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for ( ii=N; ii>=_i; ii-- ) { \ 104 rp[ii+1] = rp[ii]; \ 105 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 106 } \ 107 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 a_noinsert:; \ 111 ailen[brow] = nrow; \ 112 } 113 114 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 brow = row/bs; \ 118 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119 rmax = bimax[brow]; nrow = bilen[brow]; \ 120 bcol = col/bs; \ 121 ridx = row % bs; cidx = col % bs; \ 122 low = 0; high = nrow; \ 123 while (high-low > 3) { \ 124 t = (low+high)/2; \ 125 if (rp[t] > bcol) high = t; \ 126 else low = t; \ 127 } \ 128 for ( _i=low; _i<high; _i++ ) { \ 129 if (rp[_i] > bcol) break; \ 130 if (rp[_i] == bcol) { \ 131 bap = ap + bs2*_i + bs*cidx + ridx; \ 132 if (addv == ADD_VALUES) *bap += value; \ 133 else *bap = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (b->nonew == 1) goto b_noinsert; \ 138 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + bs2*new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158 len*sizeof(int)); \ 159 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163 /* free up old matrix storage */ \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 172 b->maxnz += bs2*CHUNKSIZE; \ 173 b->reallocs++; \ 174 b->nz++; \ 175 } \ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for ( ii=N; ii>=_i; ii-- ) { \ 179 rp[ii+1] = rp[ii]; \ 180 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181 } \ 182 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNC__ 190 #define __FUNC__ "MatSetValues_MPIBAIJ" 191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 194 Scalar value; 195 int ierr,i,j,row,col; 196 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 197 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 198 int cend_orig=baij->cend_bs,bs=baij->bs; 199 200 /* Some Variables required in the macro */ 201 Mat A = baij->A; 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204 Scalar *aa=a->a; 205 206 Mat B = baij->B; 207 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209 Scalar *ba=b->a; 210 211 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212 int low,high,t,ridx,cidx,bs2=a->bs2; 213 Scalar *ap,*bap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 #if defined(USE_PETSC_BOPT_g) 218 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig){ 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 #if defined(USE_PETSC_BOPT_g) 231 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 240 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 241 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 244 B = baij->B; 245 b = (Mat_SeqBAIJ *) (B)->data; 246 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 247 ba=b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !baij->donotstash) { 257 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!baij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272 #undef __FUNC__ 273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277 Scalar *value,*barray=baij->barray; 278 int ierr,i,j,ii,jj,row,col,k,l; 279 int roworiented = baij->roworiented,rstart=baij->rstart ; 280 int rend=baij->rend,cstart=baij->cstart,stepval; 281 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282 283 if(!barray) { 284 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 285 } 286 287 if (roworiented) { 288 stepval = (n-1)*bs; 289 } else { 290 stepval = (m-1)*bs; 291 } 292 for ( i=0; i<m; i++ ) { 293 #if defined(USE_PETSC_BOPT_g) 294 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296 #endif 297 if (im[i] >= rstart && im[i] < rend) { 298 row = im[i] - rstart; 299 for ( j=0; j<n; j++ ) { 300 /* If NumCol = 1 then a copy is not required */ 301 if ((roworiented) && (n == 1)) { 302 barray = v + i*bs2; 303 } else if((!roworiented) && (m == 1)) { 304 barray = v + j*bs2; 305 } else { /* Here a copy is required */ 306 if (roworiented) { 307 value = v + i*(stepval+bs)*bs + j*bs; 308 } else { 309 value = v + j*(stepval+bs)*bs + i*bs; 310 } 311 for ( ii=0; ii<bs; ii++,value+=stepval ) { 312 for (jj=0; jj<bs; jj++ ) { 313 *barray++ = *value++; 314 } 315 } 316 barray -=bs2; 317 } 318 319 if (in[j] >= cstart && in[j] < cend){ 320 col = in[j] - cstart; 321 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322 } 323 #if defined(USE_PETSC_BOPT_g) 324 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326 #endif 327 else { 328 if (mat->was_assembled) { 329 if (!baij->colmap) { 330 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331 } 332 333 #if defined(USE_PETSC_BOPT_g) 334 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335 #endif 336 col = (baij->colmap[in[j]] - 1)/bs; 337 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339 col = in[j]; 340 } 341 } 342 else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (!baij->donotstash) { 348 if (roworiented ) { 349 row = im[i]*bs; 350 value = v + i*(stepval+bs)*bs; 351 for ( j=0; j<bs; j++,row++ ) { 352 for ( k=0; k<n; k++ ) { 353 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355 } 356 } 357 } 358 } else { 359 for ( j=0; j<n; j++ ) { 360 value = v + j*(stepval+bs)*bs + i*bs; 361 col = in[j]*bs; 362 for ( k=0; k<bs; k++,col++,value+=stepval) { 363 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365 } 366 } 367 } 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 #include <math.h> 375 #define HASH_KEY 0.6180339887 376 #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) 377 378 #undef __FUNC__ 379 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 380 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 381 { 382 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 383 int ierr,i,j,row,col; 384 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 385 int rend_orig=baij->rend_bs; 386 387 int h1,key,size=baij->ht_size,k,bs=baij->bs; 388 double * HT = baij->ht; 389 Scalar ** HD = (Scalar **)(HT + size),value; 390 391 392 PetscFunctionBegin; 393 394 if(((Mat_SeqBAIJ*)baij->A->data)->nonew !=1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 395 396 for ( i=0; i<m; i++ ) { 397 #if defined(USE_PETSC_BOPT_g) 398 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 399 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 400 #endif 401 if (im[i] >= rstart_orig && im[i] < rend_orig) { 402 row = im[i]; 403 for ( j=0; j<n; j++ ) { 404 col = in[j]; 405 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 406 /* Look up into the Hash Table */ 407 key = (row/bs)*baij->Nbs+(col/bs)+1; 408 h1 = HASH1(size,key); 409 410 for ( k=0; k<size; k++ ){ 411 if (HT[(h1+k)%size] == key) { 412 if (addv == ADD_VALUES) *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) += value; 413 else *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) = value; 414 break; 415 } 416 } 417 if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Hastable Entry not found"); 418 } 419 } else { 420 if (roworiented && !baij->donotstash) { 421 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 422 } else { 423 if (!baij->donotstash) { 424 row = im[i]; 425 for ( j=0; j<n; j++ ) { 426 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 427 } 428 } 429 } 430 } 431 } 432 PetscFunctionReturn(0); 433 } 434 435 #undef __FUNC__ 436 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 437 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 438 { 439 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 440 int ierr,i,j,ii,jj,row,col,k,l; 441 int roworiented = baij->roworiented,rstart=baij->rstart ; 442 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 443 444 int h1,key,size=baij->ht_size; 445 double * HT = baij->ht; 446 Scalar ** HD = (Scalar **)(HT + size),*value,*baij_a; 447 448 PetscFunctionBegin; 449 450 if(((Mat_SeqBAIJ*)baij->A->data)->nonew !=1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 451 452 if (roworiented) { 453 stepval = (n-1)*bs; 454 } else { 455 stepval = (m-1)*bs; 456 } 457 for ( i=0; i<m; i++ ) { 458 #if defined(USE_PETSC_BOPT_g) 459 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 460 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 461 #endif 462 if (im[i] >= rstart && im[i] < rend) { 463 row = im[i]; 464 for ( j=0; j<n; j++ ) { 465 col = in[j]; 466 467 /* Look up into the Hash Table */ 468 key = row*baij->Nbs+col+1; 469 h1 = HASH1(size,key); 470 471 for ( k=0; k<size; k++ ){ 472 if (HT[(h1+k)%size] == key) { 473 baij_a = HD[(h1+k)%size]; 474 475 if (roworiented) { 476 value = v + i*(stepval+bs)*bs + j*bs; 477 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a++ ) { 478 for ( jj=0; jj<bs2; jj+=bs ) { 479 if (addv == ADD_VALUES) baij_a[jj] += *value++; 480 else baij_a[jj] = *value++; 481 } 482 } 483 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 487 for ( jj=0; jj<bs; jj++ ) { 488 if (addv == ADD_VALUES) baij_a[jj] += *value++; 489 else baij_a[jj] = *value++; 490 } 491 } 492 } 493 break; 494 } 495 } 496 if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Hastable Entry not found"); 497 } 498 } else { 499 if (!baij->donotstash) { 500 if (roworiented ) { 501 row = im[i]*bs; 502 value = v + i*(stepval+bs)*bs; 503 for ( j=0; j<bs; j++,row++ ) { 504 for ( k=0; k<n; k++ ) { 505 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 506 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 507 } 508 } 509 } 510 } else { 511 for ( j=0; j<n; j++ ) { 512 value = v + j*(stepval+bs)*bs + i*bs; 513 col = in[j]*bs; 514 for ( k=0; k<bs; k++,col++,value+=stepval) { 515 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 516 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 517 } 518 } 519 } 520 } 521 } 522 } 523 } 524 PetscFunctionReturn(0); 525 } 526 #undef __FUNC__ 527 #define __FUNC__ "MatGetValues_MPIBAIJ" 528 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 529 { 530 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 531 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 532 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 533 534 for ( i=0; i<m; i++ ) { 535 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 536 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 537 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 538 row = idxm[i] - bsrstart; 539 for ( j=0; j<n; j++ ) { 540 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 541 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 542 if (idxn[j] >= bscstart && idxn[j] < bscend){ 543 col = idxn[j] - bscstart; 544 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 545 } else { 546 if (!baij->colmap) { 547 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 548 } 549 if((baij->colmap[idxn[j]/bs]-1 < 0) || 550 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 551 else { 552 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 553 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 554 } 555 } 556 } 557 } else { 558 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 559 } 560 } 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNC__ 565 #define __FUNC__ "MatNorm_MPIBAIJ" 566 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 567 { 568 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 569 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 570 int ierr, i,bs2=baij->bs2; 571 double sum = 0.0; 572 Scalar *v; 573 574 PetscFunctionBegin; 575 if (baij->size == 1) { 576 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 577 } else { 578 if (type == NORM_FROBENIUS) { 579 v = amat->a; 580 for (i=0; i<amat->nz*bs2; i++ ) { 581 #if defined(USE_PETSC_COMPLEX) 582 sum += real(conj(*v)*(*v)); v++; 583 #else 584 sum += (*v)*(*v); v++; 585 #endif 586 } 587 v = bmat->a; 588 for (i=0; i<bmat->nz*bs2; i++ ) { 589 #if defined(USE_PETSC_COMPLEX) 590 sum += real(conj(*v)*(*v)); v++; 591 #else 592 sum += (*v)*(*v); v++; 593 #endif 594 } 595 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 596 *norm = sqrt(*norm); 597 } else { 598 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNC__ 605 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 606 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 607 { 608 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 609 MPI_Comm comm = mat->comm; 610 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 611 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 612 MPI_Request *send_waits,*recv_waits; 613 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 614 InsertMode addv; 615 Scalar *rvalues,*svalues; 616 617 PetscFunctionBegin; 618 /* make sure all processors are either in INSERTMODE or ADDMODE */ 619 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 620 if (addv == (ADD_VALUES|INSERT_VALUES)) { 621 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 622 } 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 /* first count number of contributors to each processor */ 626 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 627 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 628 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 629 for ( i=0; i<baij->stash.n; i++ ) { 630 idx = baij->stash.idx[i]; 631 for ( j=0; j<size; j++ ) { 632 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 633 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 634 } 635 } 636 } 637 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 638 639 /* inform other processors of number of messages and max length*/ 640 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 641 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 642 nreceives = work[rank]; 643 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 644 nmax = work[rank]; 645 PetscFree(work); 646 647 /* post receives: 648 1) each message will consist of ordered pairs 649 (global index,value) we store the global index as a double 650 to simplify the message passing. 651 2) since we don't know how long each individual message is we 652 allocate the largest needed buffer for each receive. Potentially 653 this is a lot of wasted space. 654 655 656 This could be done better. 657 */ 658 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 659 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 660 for ( i=0; i<nreceives; i++ ) { 661 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 662 } 663 664 /* do sends: 665 1) starts[i] gives the starting index in svalues for stuff going to 666 the ith processor 667 */ 668 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 669 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 670 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 671 starts[0] = 0; 672 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 673 for ( i=0; i<baij->stash.n; i++ ) { 674 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 675 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 676 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 677 } 678 PetscFree(owner); 679 starts[0] = 0; 680 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 681 count = 0; 682 for ( i=0; i<size; i++ ) { 683 if (procs[i]) { 684 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 685 } 686 } 687 PetscFree(starts); PetscFree(nprocs); 688 689 /* Free cache space */ 690 PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",rank,baij->stash.n); 691 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 692 693 baij->svalues = svalues; baij->rvalues = rvalues; 694 baij->nsends = nsends; baij->nrecvs = nreceives; 695 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 696 baij->rmax = nmax; 697 698 PetscFunctionReturn(0); 699 } 700 701 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 702 { 703 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 704 Mat A = baij->A, B=baij->B; 705 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 706 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 707 int size,ct=0,max1=0,max2=0,bs2=baij->bs2,rstart=baij->rstart; 708 int cstart=baij->cstart,*garray=baij->garray,row,col; 709 double *HT,key; 710 extern int PetscGlobalRank; 711 Scalar **HD; 712 /* 713 Scalar *aa=a->a,*ba=b->a; 714 static double *HT; 715 static int flag=1; 716 */ 717 PetscFunctionBegin; 718 baij->ht_size=(int)(factor*nz); 719 size = baij->ht_size; 720 /* Allocate Memory for Hash Table */ 721 if (baij->ht) { 722 PetscFunctionReturn(0); 723 } 724 725 baij->ht = (double*)PetscMalloc(size*(sizeof(double)+sizeof(Scalar*))+1); CHKPTRQ(baij->ht); 726 HT = baij->ht; 727 HD = (Scalar**)(HT + size); 728 PetscMemzero(HT,size*sizeof(double)+sizeof(Scalar*)); 729 730 731 /* Loop Over A */ 732 for ( i=0; i<a->mbs; i++ ) { 733 for ( j=ai[i]; j<ai[i+1]; j++ ) { 734 row = i+rstart; 735 col = aj[j]+cstart; 736 737 key = row*baij->Nbs + col + 1; 738 /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 739 h1 = HASH1(size,key); 740 741 for ( k=0; k<size; k++ ){ 742 if (HT[(h1+k)%size] == 0.0) { 743 HT[(h1+k)%size] = key; 744 HD[(h1+k)%size] = a->a + j*bs2; 745 break; 746 } else ct++; 747 } 748 if (k> max1) max1 = k; 749 } 750 } 751 /* Loop Over B */ 752 for ( i=0; i<b->mbs; i++ ) { 753 for ( j=bi[i]; j<bi[i+1]; j++ ) { 754 row = i+rstart; 755 col = garray[bj[j]]; 756 key = row*baij->Nbs + col + 1; 757 /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 758 h1 = HASH1(size,key); 759 for ( k=0; k<size; k++ ){ 760 if (HT[(h1+k)%size] == 0.0) { 761 HT[(h1+k)%size] = key; 762 HD[(h1+k)%size] = b->a + j*bs2; 763 break; 764 } else ct++; 765 } 766 if (k> max2) max2 = k; 767 } 768 } 769 770 /* Print Summary */ 771 for ( i=0,key=0.0,j=0; i<size; i++) 772 if (HT[i]) {j++;} 773 774 /*printf("[%d] fact = %3.2f max1 = %5d max2 = %5d Size %5d - Searches %5d Avg %5.2f Keys %5d\n", 775 PetscGlobalRank,factor,max1,max2,size,ct+j,((float)ct+j),j); */ 776 fflush(stdout); 777 PetscFunctionReturn(0); 778 } 779 780 #undef __FUNC__ 781 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 782 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 783 { 784 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 785 MPI_Status *send_status,recv_status; 786 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 787 int bs=baij->bs,row,col,other_disassembled,flg; 788 Scalar *values,val; 789 InsertMode addv = mat->insertmode; 790 791 PetscFunctionBegin; 792 /* wait on receives */ 793 while (count) { 794 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 795 /* unpack receives into our local space */ 796 values = baij->rvalues + 3*imdex*baij->rmax; 797 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 798 n = n/3; 799 for ( i=0; i<n; i++ ) { 800 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 801 col = (int) PetscReal(values[3*i+1]); 802 val = values[3*i+2]; 803 if (col >= baij->cstart*bs && col < baij->cend*bs) { 804 col -= baij->cstart*bs; 805 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 806 } else { 807 if (mat->was_assembled) { 808 if (!baij->colmap) { 809 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 810 } 811 col = (baij->colmap[col/bs]) - 1 + col%bs; 812 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 813 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 814 col = (int) PetscReal(values[3*i+1]); 815 } 816 } 817 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 818 } 819 } 820 count--; 821 } 822 PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 823 824 /* wait on sends */ 825 if (baij->nsends) { 826 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 827 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 828 PetscFree(send_status); 829 } 830 PetscFree(baij->send_waits); PetscFree(baij->svalues); 831 832 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 833 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 834 835 /* determine if any processor has disassembled, if so we must 836 also disassemble ourselfs, in order that we may reassemble. */ 837 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 838 if (mat->was_assembled && !other_disassembled) { 839 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 840 } 841 842 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 843 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 844 } 845 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 846 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 847 848 ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr); 849 if (flg && !baij->ht && mode== MAT_FINAL_ASSEMBLY) { 850 double fact = 1.39; 851 MatCreateHashTable_MPIBAIJ_Private(mat,fact); 852 mat->ops.setvalues = MatSetValues_MPIBAIJ_HT; 853 mat->ops.setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 854 } 855 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 856 PetscFunctionReturn(0); 857 } 858 859 #undef __FUNC__ 860 #define __FUNC__ "MatView_MPIBAIJ_Binary" 861 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 862 { 863 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 864 int ierr; 865 866 PetscFunctionBegin; 867 if (baij->size == 1) { 868 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 869 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNC__ 874 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 875 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 876 { 877 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 878 int ierr, format,rank,bs = baij->bs; 879 FILE *fd; 880 ViewerType vtype; 881 882 PetscFunctionBegin; 883 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 884 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 885 ierr = ViewerGetFormat(viewer,&format); 886 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 887 MatInfo info; 888 MPI_Comm_rank(mat->comm,&rank); 889 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 890 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 891 PetscSequentialPhaseBegin(mat->comm,1); 892 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 893 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 894 baij->bs,(int)info.memory); 895 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 896 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 897 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 898 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 899 fflush(fd); 900 PetscSequentialPhaseEnd(mat->comm,1); 901 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 904 PetscPrintf(mat->comm," block size is %d\n",bs); 905 PetscFunctionReturn(0); 906 } 907 } 908 909 if (vtype == DRAW_VIEWER) { 910 Draw draw; 911 PetscTruth isnull; 912 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 913 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 914 } 915 916 if (vtype == ASCII_FILE_VIEWER) { 917 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 918 PetscSequentialPhaseBegin(mat->comm,1); 919 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 920 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 921 baij->cstart*bs,baij->cend*bs); 922 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 923 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 924 fflush(fd); 925 PetscSequentialPhaseEnd(mat->comm,1); 926 } else { 927 int size = baij->size; 928 rank = baij->rank; 929 if (size == 1) { 930 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 931 } else { 932 /* assemble the entire matrix onto first processor. */ 933 Mat A; 934 Mat_SeqBAIJ *Aloc; 935 int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 936 int mbs=baij->mbs; 937 Scalar *a; 938 939 if (!rank) { 940 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 941 } else { 942 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 943 } 944 PLogObjectParent(mat,A); 945 946 /* copy over the A part */ 947 Aloc = (Mat_SeqBAIJ*) baij->A->data; 948 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 949 row = baij->rstart; 950 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 951 952 for ( i=0; i<mbs; i++ ) { 953 rvals[0] = bs*(baij->rstart + i); 954 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 955 for ( j=ai[i]; j<ai[i+1]; j++ ) { 956 col = (baij->cstart+aj[j])*bs; 957 for (k=0; k<bs; k++ ) { 958 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 959 col++; a += bs; 960 } 961 } 962 } 963 /* copy over the B part */ 964 Aloc = (Mat_SeqBAIJ*) baij->B->data; 965 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 966 row = baij->rstart*bs; 967 for ( i=0; i<mbs; i++ ) { 968 rvals[0] = bs*(baij->rstart + i); 969 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 970 for ( j=ai[i]; j<ai[i+1]; j++ ) { 971 col = baij->garray[aj[j]]*bs; 972 for (k=0; k<bs; k++ ) { 973 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 974 col++; a += bs; 975 } 976 } 977 } 978 PetscFree(rvals); 979 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 980 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 981 /* 982 Everyone has to call to draw the matrix since the graphics waits are 983 synchronized across all processors that share the Draw object 984 */ 985 if (!rank || vtype == DRAW_VIEWER) { 986 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 987 } 988 ierr = MatDestroy(A); CHKERRQ(ierr); 989 } 990 } 991 PetscFunctionReturn(0); 992 } 993 994 995 996 #undef __FUNC__ 997 #define __FUNC__ "MatView_MPIBAIJ" 998 int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 999 { 1000 Mat mat = (Mat) obj; 1001 int ierr; 1002 ViewerType vtype; 1003 1004 PetscFunctionBegin; 1005 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1006 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 1007 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 1008 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1009 } else if (vtype == BINARY_FILE_VIEWER) { 1010 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1011 } 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNC__ 1016 #define __FUNC__ "MatDestroy_MPIBAIJ" 1017 int MatDestroy_MPIBAIJ(PetscObject obj) 1018 { 1019 Mat mat = (Mat) obj; 1020 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1021 int ierr; 1022 1023 PetscFunctionBegin; 1024 #if defined(USE_PETSC_LOG) 1025 PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 1026 #endif 1027 1028 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1029 PetscFree(baij->rowners); 1030 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1031 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1032 if (baij->colmap) PetscFree(baij->colmap); 1033 if (baij->garray) PetscFree(baij->garray); 1034 if (baij->lvec) VecDestroy(baij->lvec); 1035 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1036 if (baij->rowvalues) PetscFree(baij->rowvalues); 1037 if (baij->barray) PetscFree(baij->barray); 1038 if (baij->ht) PetscFree(baij->ht); 1039 PetscFree(baij); 1040 PLogObjectDestroy(mat); 1041 PetscHeaderDestroy(mat); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 #undef __FUNC__ 1046 #define __FUNC__ "MatMult_MPIBAIJ" 1047 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1048 { 1049 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1050 int ierr, nt; 1051 1052 PetscFunctionBegin; 1053 VecGetLocalSize_Fast(xx,nt); 1054 if (nt != a->n) { 1055 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1056 } 1057 VecGetLocalSize_Fast(yy,nt); 1058 if (nt != a->m) { 1059 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1060 } 1061 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1062 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 1063 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1064 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1065 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNC__ 1070 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1071 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1072 { 1073 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1074 int ierr; 1075 1076 PetscFunctionBegin; 1077 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1078 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1079 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1080 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNC__ 1085 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1086 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1087 { 1088 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1089 int ierr; 1090 1091 PetscFunctionBegin; 1092 /* do nondiagonal part */ 1093 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1094 /* send it on its way */ 1095 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1096 /* do local part */ 1097 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1098 /* receive remote parts: note this assumes the values are not actually */ 1099 /* inserted in yy until the next line, which is true for my implementation*/ 1100 /* but is not perhaps always true. */ 1101 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNC__ 1106 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1107 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1108 { 1109 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1110 int ierr; 1111 1112 PetscFunctionBegin; 1113 /* do nondiagonal part */ 1114 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1115 /* send it on its way */ 1116 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1117 /* do local part */ 1118 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1119 /* receive remote parts: note this assumes the values are not actually */ 1120 /* inserted in yy until the next line, which is true for my implementation*/ 1121 /* but is not perhaps always true. */ 1122 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 /* 1127 This only works correctly for square matrices where the subblock A->A is the 1128 diagonal block 1129 */ 1130 #undef __FUNC__ 1131 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1132 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1133 { 1134 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1135 int ierr; 1136 1137 PetscFunctionBegin; 1138 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1139 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNC__ 1144 #define __FUNC__ "MatScale_MPIBAIJ" 1145 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1146 { 1147 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1148 int ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1152 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNC__ 1157 #define __FUNC__ "MatGetSize_MPIBAIJ" 1158 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1159 { 1160 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1161 1162 PetscFunctionBegin; 1163 if (m) *m = mat->M; 1164 if (n) *n = mat->N; 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNC__ 1169 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1170 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1171 { 1172 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1173 1174 PetscFunctionBegin; 1175 *m = mat->m; *n = mat->N; 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNC__ 1180 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1181 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1182 { 1183 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1184 1185 PetscFunctionBegin; 1186 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1187 PetscFunctionReturn(0); 1188 } 1189 1190 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1191 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1192 1193 #undef __FUNC__ 1194 #define __FUNC__ "MatGetRow_MPIBAIJ" 1195 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1196 { 1197 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1198 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1199 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1200 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1201 int *cmap, *idx_p,cstart = mat->cstart; 1202 1203 PetscFunctionBegin; 1204 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1205 mat->getrowactive = PETSC_TRUE; 1206 1207 if (!mat->rowvalues && (idx || v)) { 1208 /* 1209 allocate enough space to hold information from the longest row. 1210 */ 1211 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1212 int max = 1,mbs = mat->mbs,tmp; 1213 for ( i=0; i<mbs; i++ ) { 1214 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1215 if (max < tmp) { max = tmp; } 1216 } 1217 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1218 CHKPTRQ(mat->rowvalues); 1219 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1220 } 1221 1222 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1223 lrow = row - brstart; 1224 1225 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1226 if (!v) {pvA = 0; pvB = 0;} 1227 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1228 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1229 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1230 nztot = nzA + nzB; 1231 1232 cmap = mat->garray; 1233 if (v || idx) { 1234 if (nztot) { 1235 /* Sort by increasing column numbers, assuming A and B already sorted */ 1236 int imark = -1; 1237 if (v) { 1238 *v = v_p = mat->rowvalues; 1239 for ( i=0; i<nzB; i++ ) { 1240 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1241 else break; 1242 } 1243 imark = i; 1244 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1245 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1246 } 1247 if (idx) { 1248 *idx = idx_p = mat->rowindices; 1249 if (imark > -1) { 1250 for ( i=0; i<imark; i++ ) { 1251 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1252 } 1253 } else { 1254 for ( i=0; i<nzB; i++ ) { 1255 if (cmap[cworkB[i]/bs] < cstart) 1256 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1257 else break; 1258 } 1259 imark = i; 1260 } 1261 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1262 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1263 } 1264 } else { 1265 if (idx) *idx = 0; 1266 if (v) *v = 0; 1267 } 1268 } 1269 *nz = nztot; 1270 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1271 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNC__ 1276 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1277 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1278 { 1279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1280 1281 PetscFunctionBegin; 1282 if (baij->getrowactive == PETSC_FALSE) { 1283 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1284 } 1285 baij->getrowactive = PETSC_FALSE; 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNC__ 1290 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1291 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1292 { 1293 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1294 1295 PetscFunctionBegin; 1296 *bs = baij->bs; 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNC__ 1301 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1302 int MatZeroEntries_MPIBAIJ(Mat A) 1303 { 1304 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1305 int ierr; 1306 1307 PetscFunctionBegin; 1308 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1309 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNC__ 1314 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1315 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1316 { 1317 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1318 Mat A = a->A, B = a->B; 1319 int ierr; 1320 double isend[5], irecv[5]; 1321 1322 PetscFunctionBegin; 1323 info->block_size = (double)a->bs; 1324 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1325 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1326 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1327 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1328 if (flag == MAT_LOCAL) { 1329 info->nz_used = isend[0]; 1330 info->nz_allocated = isend[1]; 1331 info->nz_unneeded = isend[2]; 1332 info->memory = isend[3]; 1333 info->mallocs = isend[4]; 1334 } else if (flag == MAT_GLOBAL_MAX) { 1335 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 1336 info->nz_used = irecv[0]; 1337 info->nz_allocated = irecv[1]; 1338 info->nz_unneeded = irecv[2]; 1339 info->memory = irecv[3]; 1340 info->mallocs = irecv[4]; 1341 } else if (flag == MAT_GLOBAL_SUM) { 1342 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 1343 info->nz_used = irecv[0]; 1344 info->nz_allocated = irecv[1]; 1345 info->nz_unneeded = irecv[2]; 1346 info->memory = irecv[3]; 1347 info->mallocs = irecv[4]; 1348 } 1349 info->rows_global = (double)a->M; 1350 info->columns_global = (double)a->N; 1351 info->rows_local = (double)a->m; 1352 info->columns_local = (double)a->N; 1353 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1354 info->fill_ratio_needed = 0; 1355 info->factor_mallocs = 0; 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNC__ 1360 #define __FUNC__ "MatSetOption_MPIBAIJ" 1361 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1362 { 1363 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1364 1365 PetscFunctionBegin; 1366 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1367 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1368 op == MAT_COLUMNS_UNSORTED || 1369 op == MAT_COLUMNS_SORTED || 1370 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1371 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1372 MatSetOption(a->A,op); 1373 MatSetOption(a->B,op); 1374 } else if (op == MAT_ROW_ORIENTED) { 1375 a->roworiented = 1; 1376 MatSetOption(a->A,op); 1377 MatSetOption(a->B,op); 1378 } else if (op == MAT_ROWS_SORTED || 1379 op == MAT_ROWS_UNSORTED || 1380 op == MAT_SYMMETRIC || 1381 op == MAT_STRUCTURALLY_SYMMETRIC || 1382 op == MAT_YES_NEW_DIAGONALS) 1383 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1384 else if (op == MAT_COLUMN_ORIENTED) { 1385 a->roworiented = 0; 1386 MatSetOption(a->A,op); 1387 MatSetOption(a->B,op); 1388 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1389 a->donotstash = 1; 1390 } else if (op == MAT_NO_NEW_DIAGONALS) { 1391 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1392 } else { 1393 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1394 } 1395 PetscFunctionReturn(0); 1396 } 1397 1398 #undef __FUNC__ 1399 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1400 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1401 { 1402 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1403 Mat_SeqBAIJ *Aloc; 1404 Mat B; 1405 int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 1406 int bs=baij->bs,mbs=baij->mbs; 1407 Scalar *a; 1408 1409 PetscFunctionBegin; 1410 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1411 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1412 CHKERRQ(ierr); 1413 1414 /* copy over the A part */ 1415 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1416 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1417 row = baij->rstart; 1418 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1419 1420 for ( i=0; i<mbs; i++ ) { 1421 rvals[0] = bs*(baij->rstart + i); 1422 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1423 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1424 col = (baij->cstart+aj[j])*bs; 1425 for (k=0; k<bs; k++ ) { 1426 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1427 col++; a += bs; 1428 } 1429 } 1430 } 1431 /* copy over the B part */ 1432 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1433 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1434 row = baij->rstart*bs; 1435 for ( i=0; i<mbs; i++ ) { 1436 rvals[0] = bs*(baij->rstart + i); 1437 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1438 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1439 col = baij->garray[aj[j]]*bs; 1440 for (k=0; k<bs; k++ ) { 1441 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1442 col++; a += bs; 1443 } 1444 } 1445 } 1446 PetscFree(rvals); 1447 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1448 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1449 1450 if (matout != PETSC_NULL) { 1451 *matout = B; 1452 } else { 1453 /* This isn't really an in-place transpose .... but free data structures from baij */ 1454 PetscFree(baij->rowners); 1455 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1456 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1457 if (baij->colmap) PetscFree(baij->colmap); 1458 if (baij->garray) PetscFree(baij->garray); 1459 if (baij->lvec) VecDestroy(baij->lvec); 1460 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1461 PetscFree(baij); 1462 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1463 PetscHeaderDestroy(B); 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNC__ 1469 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1470 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1471 { 1472 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1473 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1474 int ierr,s1,s2,s3; 1475 1476 PetscFunctionBegin; 1477 if (ll) { 1478 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1479 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1480 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1481 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1482 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1483 } 1484 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNC__ 1489 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1490 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1491 { 1492 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1493 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1494 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1495 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1496 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1497 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1498 MPI_Comm comm = A->comm; 1499 MPI_Request *send_waits,*recv_waits; 1500 MPI_Status recv_status,*send_status; 1501 IS istmp; 1502 1503 PetscFunctionBegin; 1504 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1505 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1506 1507 /* first count number of contributors to each processor */ 1508 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1509 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1510 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1511 for ( i=0; i<N; i++ ) { 1512 idx = rows[i]; 1513 found = 0; 1514 for ( j=0; j<size; j++ ) { 1515 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1516 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1517 } 1518 } 1519 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1520 } 1521 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1522 1523 /* inform other processors of number of messages and max length*/ 1524 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1525 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1526 nrecvs = work[rank]; 1527 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1528 nmax = work[rank]; 1529 PetscFree(work); 1530 1531 /* post receives: */ 1532 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1533 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1534 for ( i=0; i<nrecvs; i++ ) { 1535 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1536 } 1537 1538 /* do sends: 1539 1) starts[i] gives the starting index in svalues for stuff going to 1540 the ith processor 1541 */ 1542 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1543 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1544 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1545 starts[0] = 0; 1546 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1547 for ( i=0; i<N; i++ ) { 1548 svalues[starts[owner[i]]++] = rows[i]; 1549 } 1550 ISRestoreIndices(is,&rows); 1551 1552 starts[0] = 0; 1553 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1554 count = 0; 1555 for ( i=0; i<size; i++ ) { 1556 if (procs[i]) { 1557 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1558 } 1559 } 1560 PetscFree(starts); 1561 1562 base = owners[rank]*bs; 1563 1564 /* wait on receives */ 1565 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1566 source = lens + nrecvs; 1567 count = nrecvs; slen = 0; 1568 while (count) { 1569 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1570 /* unpack receives into our local space */ 1571 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1572 source[imdex] = recv_status.MPI_SOURCE; 1573 lens[imdex] = n; 1574 slen += n; 1575 count--; 1576 } 1577 PetscFree(recv_waits); 1578 1579 /* move the data into the send scatter */ 1580 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1581 count = 0; 1582 for ( i=0; i<nrecvs; i++ ) { 1583 values = rvalues + i*nmax; 1584 for ( j=0; j<lens[i]; j++ ) { 1585 lrows[count++] = values[j] - base; 1586 } 1587 } 1588 PetscFree(rvalues); PetscFree(lens); 1589 PetscFree(owner); PetscFree(nprocs); 1590 1591 /* actually zap the local rows */ 1592 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1593 PLogObjectParent(A,istmp); 1594 1595 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1596 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1597 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1598 1599 if (diag) { 1600 for ( i = 0; i < slen; i++ ) { 1601 row = lrows[i] + rstart_bs; 1602 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1603 } 1604 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1605 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1606 } 1607 PetscFree(lrows); 1608 1609 /* wait on sends */ 1610 if (nsends) { 1611 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1612 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1613 PetscFree(send_status); 1614 } 1615 PetscFree(send_waits); PetscFree(svalues); 1616 1617 PetscFunctionReturn(0); 1618 } 1619 extern int MatPrintHelp_SeqBAIJ(Mat); 1620 #undef __FUNC__ 1621 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1622 int MatPrintHelp_MPIBAIJ(Mat A) 1623 { 1624 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1625 int ierr; 1626 1627 PetscFunctionBegin; 1628 if (!a->rank) { 1629 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1630 } 1631 PetscFunctionReturn(0); 1632 } 1633 1634 #undef __FUNC__ 1635 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1636 int MatSetUnfactored_MPIBAIJ(Mat A) 1637 { 1638 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1639 int ierr; 1640 1641 PetscFunctionBegin; 1642 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1643 PetscFunctionReturn(0); 1644 } 1645 1646 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1647 1648 /* -------------------------------------------------------------------*/ 1649 static struct _MatOps MatOps = { 1650 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1651 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1652 0,0,0,0, 1653 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1654 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1655 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1656 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1657 0,0,0,MatGetSize_MPIBAIJ, 1658 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1659 0,0,MatConvertSameType_MPIBAIJ,0,0, 1660 0,0,0,MatGetSubMatrices_MPIBAIJ, 1661 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1662 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1663 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1664 1665 1666 #undef __FUNC__ 1667 #define __FUNC__ "MatCreateMPIBAIJ" 1668 /*@C 1669 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1670 (block compressed row). For good matrix assembly performance 1671 the user should preallocate the matrix storage by setting the parameters 1672 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1673 performance can be increased by more than a factor of 50. 1674 1675 Input Parameters: 1676 . comm - MPI communicator 1677 . bs - size of blockk 1678 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1679 This value should be the same as the local size used in creating the 1680 y vector for the matrix-vector product y = Ax. 1681 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1682 This value should be the same as the local size used in creating the 1683 x vector for the matrix-vector product y = Ax. 1684 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1685 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1686 . d_nz - number of block nonzeros per block row in diagonal portion of local 1687 submatrix (same for all local rows) 1688 . d_nzz - array containing the number of block nonzeros in the various block rows 1689 of the in diagonal portion of the local (possibly different for each block 1690 row) or PETSC_NULL. You must leave room for the diagonal entry even if 1691 it is zero. 1692 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1693 submatrix (same for all local rows). 1694 . o_nzz - array containing the number of nonzeros in the various block rows of the 1695 off-diagonal portion of the local submatrix (possibly different for 1696 each block row) or PETSC_NULL. 1697 1698 Output Parameter: 1699 . A - the matrix 1700 1701 Notes: 1702 The user MUST specify either the local or global matrix dimensions 1703 (possibly both). 1704 1705 Storage Information: 1706 For a square global matrix we define each processor's diagonal portion 1707 to be its local rows and the corresponding columns (a square submatrix); 1708 each processor's off-diagonal portion encompasses the remainder of the 1709 local matrix (a rectangular submatrix). 1710 1711 The user can specify preallocated storage for the diagonal part of 1712 the local submatrix with either d_nz or d_nnz (not both). Set 1713 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1714 memory allocation. Likewise, specify preallocated storage for the 1715 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1716 1717 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1718 the figure below we depict these three local rows and all columns (0-11). 1719 1720 $ 0 1 2 3 4 5 6 7 8 9 10 11 1721 $ ------------------- 1722 $ row 3 | o o o d d d o o o o o o 1723 $ row 4 | o o o d d d o o o o o o 1724 $ row 5 | o o o d d d o o o o o o 1725 $ ------------------- 1726 $ 1727 1728 Thus, any entries in the d locations are stored in the d (diagonal) 1729 submatrix, and any entries in the o locations are stored in the 1730 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1731 stored simply in the MATSEQBAIJ format for compressed row storage. 1732 1733 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1734 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1735 In general, for PDE problems in which most nonzeros are near the diagonal, 1736 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1737 or you will get TERRIBLE performance; see the users' manual chapter on 1738 matrices. 1739 1740 .keywords: matrix, block, aij, compressed row, sparse, parallel 1741 1742 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 1743 @*/ 1744 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1745 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1746 { 1747 Mat B; 1748 Mat_MPIBAIJ *b; 1749 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 1750 1751 PetscFunctionBegin; 1752 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1753 1754 MPI_Comm_size(comm,&size); 1755 if (size == 1) { 1756 if (M == PETSC_DECIDE) M = m; 1757 if (N == PETSC_DECIDE) N = n; 1758 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1759 PetscFunctionReturn(0); 1760 } 1761 1762 *A = 0; 1763 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 1764 PLogObjectCreate(B); 1765 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1766 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1767 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1768 1769 B->destroy = MatDestroy_MPIBAIJ; 1770 B->view = MatView_MPIBAIJ; 1771 B->mapping = 0; 1772 B->factor = 0; 1773 B->assembled = PETSC_FALSE; 1774 1775 B->insertmode = NOT_SET_VALUES; 1776 MPI_Comm_rank(comm,&b->rank); 1777 MPI_Comm_size(comm,&b->size); 1778 1779 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1780 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1781 } 1782 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1783 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1784 } 1785 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1786 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1787 } 1788 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1789 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1790 1791 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1792 work[0] = m; work[1] = n; 1793 mbs = m/bs; nbs = n/bs; 1794 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1795 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1796 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1797 } 1798 if (m == PETSC_DECIDE) { 1799 Mbs = M/bs; 1800 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1801 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1802 m = mbs*bs; 1803 } 1804 if (n == PETSC_DECIDE) { 1805 Nbs = N/bs; 1806 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1807 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1808 n = nbs*bs; 1809 } 1810 if (mbs*bs != m || nbs*bs != n) { 1811 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1812 } 1813 1814 b->m = m; B->m = m; 1815 b->n = n; B->n = n; 1816 b->N = N; B->N = N; 1817 b->M = M; B->M = M; 1818 b->bs = bs; 1819 b->bs2 = bs*bs; 1820 b->mbs = mbs; 1821 b->nbs = nbs; 1822 b->Mbs = Mbs; 1823 b->Nbs = Nbs; 1824 1825 /* build local table of row and column ownerships */ 1826 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1827 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1828 b->cowners = b->rowners + b->size + 2; 1829 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1830 b->rowners[0] = 0; 1831 for ( i=2; i<=b->size; i++ ) { 1832 b->rowners[i] += b->rowners[i-1]; 1833 } 1834 b->rstart = b->rowners[b->rank]; 1835 b->rend = b->rowners[b->rank+1]; 1836 b->rstart_bs = b->rstart * bs; 1837 b->rend_bs = b->rend * bs; 1838 1839 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1840 b->cowners[0] = 0; 1841 for ( i=2; i<=b->size; i++ ) { 1842 b->cowners[i] += b->cowners[i-1]; 1843 } 1844 b->cstart = b->cowners[b->rank]; 1845 b->cend = b->cowners[b->rank+1]; 1846 b->cstart_bs = b->cstart * bs; 1847 b->cend_bs = b->cend * bs; 1848 1849 1850 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1851 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1852 PLogObjectParent(B,b->A); 1853 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1854 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1855 PLogObjectParent(B,b->B); 1856 1857 /* build cache for off array entries formed */ 1858 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1859 b->donotstash = 0; 1860 b->colmap = 0; 1861 b->garray = 0; 1862 b->roworiented = 1; 1863 1864 /* stuff used in block assembly */ 1865 b->barray = 0; 1866 1867 /* stuff used for matrix vector multiply */ 1868 b->lvec = 0; 1869 b->Mvctx = 0; 1870 1871 /* stuff for MatGetRow() */ 1872 b->rowindices = 0; 1873 b->rowvalues = 0; 1874 b->getrowactive = PETSC_FALSE; 1875 1876 /* hash table stuff */ 1877 b->ht = 0; 1878 b->ht_size = 0; 1879 1880 *A = B; 1881 PetscFunctionReturn(0); 1882 } 1883 1884 #undef __FUNC__ 1885 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 1886 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 1887 { 1888 Mat mat; 1889 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 1890 int ierr, len=0, flg; 1891 1892 PetscFunctionBegin; 1893 *newmat = 0; 1894 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 1895 PLogObjectCreate(mat); 1896 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 1897 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1898 mat->destroy = MatDestroy_MPIBAIJ; 1899 mat->view = MatView_MPIBAIJ; 1900 mat->factor = matin->factor; 1901 mat->assembled = PETSC_TRUE; 1902 1903 a->m = mat->m = oldmat->m; 1904 a->n = mat->n = oldmat->n; 1905 a->M = mat->M = oldmat->M; 1906 a->N = mat->N = oldmat->N; 1907 1908 a->bs = oldmat->bs; 1909 a->bs2 = oldmat->bs2; 1910 a->mbs = oldmat->mbs; 1911 a->nbs = oldmat->nbs; 1912 a->Mbs = oldmat->Mbs; 1913 a->Nbs = oldmat->Nbs; 1914 1915 a->rstart = oldmat->rstart; 1916 a->rend = oldmat->rend; 1917 a->cstart = oldmat->cstart; 1918 a->cend = oldmat->cend; 1919 a->size = oldmat->size; 1920 a->rank = oldmat->rank; 1921 mat->insertmode = NOT_SET_VALUES; 1922 a->rowvalues = 0; 1923 a->getrowactive = PETSC_FALSE; 1924 a->barray = 0; 1925 1926 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1927 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1928 a->cowners = a->rowners + a->size + 2; 1929 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1930 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1931 if (oldmat->colmap) { 1932 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 1933 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1934 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 1935 } else a->colmap = 0; 1936 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 1937 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1938 PLogObjectMemory(mat,len*sizeof(int)); 1939 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1940 } else a->garray = 0; 1941 1942 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1943 PLogObjectParent(mat,a->lvec); 1944 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1945 PLogObjectParent(mat,a->Mvctx); 1946 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1947 PLogObjectParent(mat,a->A); 1948 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1949 PLogObjectParent(mat,a->B); 1950 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1951 if (flg) { 1952 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1953 } 1954 *newmat = mat; 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #include "sys.h" 1959 1960 #undef __FUNC__ 1961 #define __FUNC__ "MatLoad_MPIBAIJ" 1962 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 1963 { 1964 Mat A; 1965 int i, nz, ierr, j,rstart, rend, fd; 1966 Scalar *vals,*buf; 1967 MPI_Comm comm = ((PetscObject)viewer)->comm; 1968 MPI_Status status; 1969 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 1970 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 1971 int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 1972 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1973 int dcount,kmax,k,nzcount,tmp; 1974 1975 PetscFunctionBegin; 1976 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1977 bs2 = bs*bs; 1978 1979 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1980 if (!rank) { 1981 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1982 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1983 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1984 if (header[3] < 0) { 1985 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 1986 } 1987 } 1988 1989 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1990 M = header[1]; N = header[2]; 1991 1992 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1993 1994 /* 1995 This code adds extra rows to make sure the number of rows is 1996 divisible by the blocksize 1997 */ 1998 Mbs = M/bs; 1999 extra_rows = bs - M + bs*(Mbs); 2000 if (extra_rows == bs) extra_rows = 0; 2001 else Mbs++; 2002 if (extra_rows &&!rank) { 2003 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2004 } 2005 2006 /* determine ownership of all rows */ 2007 mbs = Mbs/size + ((Mbs % size) > rank); 2008 m = mbs * bs; 2009 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2010 browners = rowners + size + 1; 2011 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2012 rowners[0] = 0; 2013 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2014 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2015 rstart = rowners[rank]; 2016 rend = rowners[rank+1]; 2017 2018 /* distribute row lengths to all processors */ 2019 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2020 if (!rank) { 2021 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2022 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2023 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2024 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2025 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2026 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2027 PetscFree(sndcounts); 2028 } else { 2029 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2030 } 2031 2032 if (!rank) { 2033 /* calculate the number of nonzeros on each processor */ 2034 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2035 PetscMemzero(procsnz,size*sizeof(int)); 2036 for ( i=0; i<size; i++ ) { 2037 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2038 procsnz[i] += rowlengths[j]; 2039 } 2040 } 2041 PetscFree(rowlengths); 2042 2043 /* determine max buffer needed and allocate it */ 2044 maxnz = 0; 2045 for ( i=0; i<size; i++ ) { 2046 maxnz = PetscMax(maxnz,procsnz[i]); 2047 } 2048 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2049 2050 /* read in my part of the matrix column indices */ 2051 nz = procsnz[0]; 2052 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2053 mycols = ibuf; 2054 if (size == 1) nz -= extra_rows; 2055 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2056 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2057 2058 /* read in every ones (except the last) and ship off */ 2059 for ( i=1; i<size-1; i++ ) { 2060 nz = procsnz[i]; 2061 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2062 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2063 } 2064 /* read in the stuff for the last proc */ 2065 if ( size != 1 ) { 2066 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2067 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2068 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2069 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2070 } 2071 PetscFree(cols); 2072 } else { 2073 /* determine buffer space needed for message */ 2074 nz = 0; 2075 for ( i=0; i<m; i++ ) { 2076 nz += locrowlens[i]; 2077 } 2078 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2079 mycols = ibuf; 2080 /* receive message of column indices*/ 2081 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2082 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2083 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2084 } 2085 2086 /* loop over local rows, determining number of off diagonal entries */ 2087 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2088 odlens = dlens + (rend-rstart); 2089 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2090 PetscMemzero(mask,3*Mbs*sizeof(int)); 2091 masked1 = mask + Mbs; 2092 masked2 = masked1 + Mbs; 2093 rowcount = 0; nzcount = 0; 2094 for ( i=0; i<mbs; i++ ) { 2095 dcount = 0; 2096 odcount = 0; 2097 for ( j=0; j<bs; j++ ) { 2098 kmax = locrowlens[rowcount]; 2099 for ( k=0; k<kmax; k++ ) { 2100 tmp = mycols[nzcount++]/bs; 2101 if (!mask[tmp]) { 2102 mask[tmp] = 1; 2103 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2104 else masked1[dcount++] = tmp; 2105 } 2106 } 2107 rowcount++; 2108 } 2109 2110 dlens[i] = dcount; 2111 odlens[i] = odcount; 2112 2113 /* zero out the mask elements we set */ 2114 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2115 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2116 } 2117 2118 /* create our matrix */ 2119 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2120 CHKERRQ(ierr); 2121 A = *newmat; 2122 MatSetOption(A,MAT_COLUMNS_SORTED); 2123 2124 if (!rank) { 2125 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2126 /* read in my part of the matrix numerical values */ 2127 nz = procsnz[0]; 2128 vals = buf; 2129 mycols = ibuf; 2130 if (size == 1) nz -= extra_rows; 2131 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2132 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2133 2134 /* insert into matrix */ 2135 jj = rstart*bs; 2136 for ( i=0; i<m; i++ ) { 2137 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2138 mycols += locrowlens[i]; 2139 vals += locrowlens[i]; 2140 jj++; 2141 } 2142 /* read in other processors (except the last one) and ship out */ 2143 for ( i=1; i<size-1; i++ ) { 2144 nz = procsnz[i]; 2145 vals = buf; 2146 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2147 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2148 } 2149 /* the last proc */ 2150 if ( size != 1 ){ 2151 nz = procsnz[i] - extra_rows; 2152 vals = buf; 2153 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2154 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2155 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2156 } 2157 PetscFree(procsnz); 2158 } else { 2159 /* receive numeric values */ 2160 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2161 2162 /* receive message of values*/ 2163 vals = buf; 2164 mycols = ibuf; 2165 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2166 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2167 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2168 2169 /* insert into matrix */ 2170 jj = rstart*bs; 2171 for ( i=0; i<m; i++ ) { 2172 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2173 mycols += locrowlens[i]; 2174 vals += locrowlens[i]; 2175 jj++; 2176 } 2177 } 2178 PetscFree(locrowlens); 2179 PetscFree(buf); 2180 PetscFree(ibuf); 2181 PetscFree(rowners); 2182 PetscFree(dlens); 2183 PetscFree(mask); 2184 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2185 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2186 PetscFunctionReturn(0); 2187 } 2188 2189 2190