1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.96 1998/01/12 18:35:34 balay Exp balay $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 23 static int CreateColmap_MPIBAIJ_Private(Mat mat) 24 { 25 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 26 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27 int nbs = B->nbs,i,bs=B->bs;; 28 29 PetscFunctionBegin; 30 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 31 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 32 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 34 PetscFunctionReturn(0); 35 } 36 37 #define CHUNKSIZE 10 38 39 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 40 { \ 41 \ 42 brow = row/bs; \ 43 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44 rmax = aimax[brow]; nrow = ailen[brow]; \ 45 bcol = col/bs; \ 46 ridx = row % bs; cidx = col % bs; \ 47 low = 0; high = nrow; \ 48 while (high-low > 3) { \ 49 t = (low+high)/2; \ 50 if (rp[t] > bcol) high = t; \ 51 else low = t; \ 52 } \ 53 for ( _i=low; _i<high; _i++ ) { \ 54 if (rp[_i] > bcol) break; \ 55 if (rp[_i] == bcol) { \ 56 bap = ap + bs2*_i + bs*cidx + ridx; \ 57 if (addv == ADD_VALUES) *bap += value; \ 58 else *bap = value; \ 59 goto a_noinsert; \ 60 } \ 61 } \ 62 if (a->nonew == 1) goto a_noinsert; \ 63 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 64 if (nrow >= rmax) { \ 65 /* there is no extra room in row, therefore enlarge */ \ 66 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 67 Scalar *new_a; \ 68 \ 69 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 70 \ 71 /* malloc new storage space */ \ 72 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 73 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 74 new_j = (int *) (new_a + bs2*new_nz); \ 75 new_i = new_j + new_nz; \ 76 \ 77 /* copy over old data into new slots */ \ 78 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 79 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 80 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 81 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 82 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 83 len*sizeof(int)); \ 84 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 85 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 86 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 87 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 88 /* free up old matrix storage */ \ 89 PetscFree(a->a); \ 90 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 91 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 92 a->singlemalloc = 1; \ 93 \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 96 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 97 a->maxnz += bs2*CHUNKSIZE; \ 98 a->reallocs++; \ 99 a->nz++; \ 100 } \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for ( ii=N; ii>=_i; ii-- ) { \ 104 rp[ii+1] = rp[ii]; \ 105 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 106 } \ 107 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 a_noinsert:; \ 111 ailen[brow] = nrow; \ 112 } 113 114 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 brow = row/bs; \ 118 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119 rmax = bimax[brow]; nrow = bilen[brow]; \ 120 bcol = col/bs; \ 121 ridx = row % bs; cidx = col % bs; \ 122 low = 0; high = nrow; \ 123 while (high-low > 3) { \ 124 t = (low+high)/2; \ 125 if (rp[t] > bcol) high = t; \ 126 else low = t; \ 127 } \ 128 for ( _i=low; _i<high; _i++ ) { \ 129 if (rp[_i] > bcol) break; \ 130 if (rp[_i] == bcol) { \ 131 bap = ap + bs2*_i + bs*cidx + ridx; \ 132 if (addv == ADD_VALUES) *bap += value; \ 133 else *bap = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (b->nonew == 1) goto b_noinsert; \ 138 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + bs2*new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158 len*sizeof(int)); \ 159 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163 /* free up old matrix storage */ \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 172 b->maxnz += bs2*CHUNKSIZE; \ 173 b->reallocs++; \ 174 b->nz++; \ 175 } \ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for ( ii=N; ii>=_i; ii-- ) { \ 179 rp[ii+1] = rp[ii]; \ 180 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181 } \ 182 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNC__ 190 #define __FUNC__ "MatSetValues_MPIBAIJ" 191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 194 Scalar value; 195 int ierr,i,j,row,col; 196 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 197 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 198 int cend_orig=baij->cend_bs,bs=baij->bs; 199 200 /* Some Variables required in the macro */ 201 Mat A = baij->A; 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204 Scalar *aa=a->a; 205 206 Mat B = baij->B; 207 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209 Scalar *ba=b->a; 210 211 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212 int low,high,t,ridx,cidx,bs2=a->bs2; 213 Scalar *ap,*bap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 #if defined(USE_PETSC_BOPT_g) 218 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig){ 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 #if defined(USE_PETSC_BOPT_g) 231 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 240 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 241 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 244 B = baij->B; 245 b = (Mat_SeqBAIJ *) (B)->data; 246 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 247 ba=b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !baij->donotstash) { 257 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!baij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272 #undef __FUNC__ 273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277 Scalar *value,*barray=baij->barray; 278 int ierr,i,j,ii,jj,row,col,k,l; 279 int roworiented = baij->roworiented,rstart=baij->rstart ; 280 int rend=baij->rend,cstart=baij->cstart,stepval; 281 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282 283 if(!barray) { 284 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 285 } 286 287 if (roworiented) { 288 stepval = (n-1)*bs; 289 } else { 290 stepval = (m-1)*bs; 291 } 292 for ( i=0; i<m; i++ ) { 293 #if defined(USE_PETSC_BOPT_g) 294 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296 #endif 297 if (im[i] >= rstart && im[i] < rend) { 298 row = im[i] - rstart; 299 for ( j=0; j<n; j++ ) { 300 /* If NumCol = 1 then a copy is not required */ 301 if ((roworiented) && (n == 1)) { 302 barray = v + i*bs2; 303 } else if((!roworiented) && (m == 1)) { 304 barray = v + j*bs2; 305 } else { /* Here a copy is required */ 306 if (roworiented) { 307 value = v + i*(stepval+bs)*bs + j*bs; 308 } else { 309 value = v + j*(stepval+bs)*bs + i*bs; 310 } 311 for ( ii=0; ii<bs; ii++,value+=stepval ) { 312 for (jj=0; jj<bs; jj++ ) { 313 *barray++ = *value++; 314 } 315 } 316 barray -=bs2; 317 } 318 319 if (in[j] >= cstart && in[j] < cend){ 320 col = in[j] - cstart; 321 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322 } 323 #if defined(USE_PETSC_BOPT_g) 324 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326 #endif 327 else { 328 if (mat->was_assembled) { 329 if (!baij->colmap) { 330 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331 } 332 333 #if defined(USE_PETSC_BOPT_g) 334 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335 #endif 336 col = (baij->colmap[in[j]] - 1)/bs; 337 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339 col = in[j]; 340 } 341 } 342 else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (!baij->donotstash) { 348 if (roworiented ) { 349 row = im[i]*bs; 350 value = v + i*(stepval+bs)*bs; 351 for ( j=0; j<bs; j++,row++ ) { 352 for ( k=0; k<n; k++ ) { 353 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355 } 356 } 357 } 358 } else { 359 for ( j=0; j<n; j++ ) { 360 value = v + j*(stepval+bs)*bs + i*bs; 361 col = in[j]*bs; 362 for ( k=0; k<bs; k++,col++,value+=stepval) { 363 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365 } 366 } 367 } 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 #include <math.h> 375 #define HASH_KEY 0.6180339887 376 #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) 377 378 #undef __FUNC__ 379 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 380 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 381 { 382 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 383 int ierr,i,j,row,col; 384 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 385 int rend_orig=baij->rend_bs; 386 387 int h1,key,size=baij->ht_size,k,bs=baij->bs; 388 int * HT = baij->ht; 389 Scalar ** HD = (Scalar **)(HT + size),value; 390 391 392 PetscFunctionBegin; 393 394 if(((Mat_SeqBAIJ*)baij->A->data)->nonew !=1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 395 396 for ( i=0; i<m; i++ ) { 397 #if defined(USE_PETSC_BOPT_g) 398 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 399 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 400 #endif 401 if (im[i] >= rstart_orig && im[i] < rend_orig) { 402 row = im[i]; 403 for ( j=0; j<n; j++ ) { 404 col = in[j]; 405 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 406 /* Look up into the Hash Table */ 407 key = (row/bs)*baij->Nbs+(col/bs)+1; 408 h1 = HASH1(size,key); 409 410 for ( k=0; k<size; k++ ){ 411 if (HT[(h1+k)%size] == key) { 412 if (addv == ADD_VALUES) *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) += value; 413 else *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) = value; 414 break; 415 } 416 } 417 if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Hash table Entry not found"); 418 } 419 } else { 420 if (roworiented && !baij->donotstash) { 421 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 422 } else { 423 if (!baij->donotstash) { 424 row = im[i]; 425 for ( j=0; j<n; j++ ) { 426 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 427 } 428 } 429 } 430 } 431 } 432 PetscFunctionReturn(0); 433 } 434 435 #undef __FUNC__ 436 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 437 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 438 { 439 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 440 int ierr,i,j,ii,jj,row,col,k,l; 441 int roworiented = baij->roworiented,rstart=baij->rstart ; 442 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 443 444 int h1,key,size=baij->ht_size; 445 int * HT = baij->ht; 446 Scalar ** HD = (Scalar **)(HT + size),*value,*baij_a; 447 448 PetscFunctionBegin; 449 450 if(((Mat_SeqBAIJ*)baij->A->data)->nonew !=1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 451 452 if (roworiented) { 453 stepval = (n-1)*bs; 454 } else { 455 stepval = (m-1)*bs; 456 } 457 for ( i=0; i<m; i++ ) { 458 #if defined(USE_PETSC_BOPT_g) 459 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 460 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 461 #endif 462 if (im[i] >= rstart && im[i] < rend) { 463 row = im[i]; 464 for ( j=0; j<n; j++ ) { 465 col = in[j]; 466 467 /* Look up into the Hash Table */ 468 key = row*baij->Nbs+col+1; 469 h1 = HASH1(size,key); 470 471 for ( k=0; k<size; k++ ){ 472 if (HT[(h1+k)%size] == key) { 473 baij_a = HD[(h1+k)%size]; 474 475 if (roworiented) { 476 value = v + i*(stepval+bs)*bs + j*bs; 477 if (addv == ADD_VALUES) { 478 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a++ ) { 479 for ( jj=0; jj<bs2; jj+=bs ) { 480 baij_a[jj] += *value++; 481 } 482 } 483 } else { 484 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a++ ) { 485 for ( jj=0; jj<bs2; jj+=bs ) { 486 baij_a[jj] = *value++; 487 } 488 } 489 } 490 } else { 491 value = v + j*(stepval+bs)*bs + i*bs; 492 if (addv == ADD_VALUES) { 493 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 494 for ( jj=0; jj<bs; jj++ ) { 495 baij_a[jj] += *value++; 496 } 497 } 498 } else { 499 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 500 for ( jj=0; jj<bs; jj++ ) { 501 baij_a[jj] = *value++; 502 } 503 } 504 } 505 } 506 break; 507 } 508 } 509 if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 510 } 511 } else { 512 if (!baij->donotstash) { 513 if (roworiented ) { 514 row = im[i]*bs; 515 value = v + i*(stepval+bs)*bs; 516 for ( j=0; j<bs; j++,row++ ) { 517 for ( k=0; k<n; k++ ) { 518 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 519 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 520 } 521 } 522 } 523 } else { 524 for ( j=0; j<n; j++ ) { 525 value = v + j*(stepval+bs)*bs + i*bs; 526 col = in[j]*bs; 527 for ( k=0; k<bs; k++,col++,value+=stepval) { 528 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 529 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 530 } 531 } 532 } 533 } 534 } 535 } 536 } 537 PetscFunctionReturn(0); 538 } 539 #undef __FUNC__ 540 #define __FUNC__ "MatGetValues_MPIBAIJ" 541 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 542 { 543 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 544 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 545 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 546 547 for ( i=0; i<m; i++ ) { 548 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 549 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 550 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 551 row = idxm[i] - bsrstart; 552 for ( j=0; j<n; j++ ) { 553 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 554 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 555 if (idxn[j] >= bscstart && idxn[j] < bscend){ 556 col = idxn[j] - bscstart; 557 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 558 } else { 559 if (!baij->colmap) { 560 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 561 } 562 if((baij->colmap[idxn[j]/bs]-1 < 0) || 563 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 564 else { 565 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 566 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 567 } 568 } 569 } 570 } else { 571 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 572 } 573 } 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNC__ 578 #define __FUNC__ "MatNorm_MPIBAIJ" 579 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 582 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 583 int ierr, i,bs2=baij->bs2; 584 double sum = 0.0; 585 Scalar *v; 586 587 PetscFunctionBegin; 588 if (baij->size == 1) { 589 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 590 } else { 591 if (type == NORM_FROBENIUS) { 592 v = amat->a; 593 for (i=0; i<amat->nz*bs2; i++ ) { 594 #if defined(USE_PETSC_COMPLEX) 595 sum += real(conj(*v)*(*v)); v++; 596 #else 597 sum += (*v)*(*v); v++; 598 #endif 599 } 600 v = bmat->a; 601 for (i=0; i<bmat->nz*bs2; i++ ) { 602 #if defined(USE_PETSC_COMPLEX) 603 sum += real(conj(*v)*(*v)); v++; 604 #else 605 sum += (*v)*(*v); v++; 606 #endif 607 } 608 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 609 *norm = sqrt(*norm); 610 } else { 611 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 612 } 613 } 614 PetscFunctionReturn(0); 615 } 616 617 #undef __FUNC__ 618 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 619 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 620 { 621 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 622 MPI_Comm comm = mat->comm; 623 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 624 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 625 MPI_Request *send_waits,*recv_waits; 626 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 627 InsertMode addv; 628 Scalar *rvalues,*svalues; 629 630 PetscFunctionBegin; 631 /* make sure all processors are either in INSERTMODE or ADDMODE */ 632 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 633 if (addv == (ADD_VALUES|INSERT_VALUES)) { 634 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 635 } 636 mat->insertmode = addv; /* in case this processor had no cache */ 637 638 /* first count number of contributors to each processor */ 639 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 640 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 641 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 642 for ( i=0; i<baij->stash.n; i++ ) { 643 idx = baij->stash.idx[i]; 644 for ( j=0; j<size; j++ ) { 645 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 646 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 647 } 648 } 649 } 650 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 651 652 /* inform other processors of number of messages and max length*/ 653 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 654 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 655 nreceives = work[rank]; 656 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 657 nmax = work[rank]; 658 PetscFree(work); 659 660 /* post receives: 661 1) each message will consist of ordered pairs 662 (global index,value) we store the global index as a double 663 to simplify the message passing. 664 2) since we don't know how long each individual message is we 665 allocate the largest needed buffer for each receive. Potentially 666 this is a lot of wasted space. 667 668 669 This could be done better. 670 */ 671 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 672 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 673 for ( i=0; i<nreceives; i++ ) { 674 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 675 } 676 677 /* do sends: 678 1) starts[i] gives the starting index in svalues for stuff going to 679 the ith processor 680 */ 681 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 682 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 683 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 684 starts[0] = 0; 685 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 686 for ( i=0; i<baij->stash.n; i++ ) { 687 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 688 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 689 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 690 } 691 PetscFree(owner); 692 starts[0] = 0; 693 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 694 count = 0; 695 for ( i=0; i<size; i++ ) { 696 if (procs[i]) { 697 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 698 } 699 } 700 PetscFree(starts); PetscFree(nprocs); 701 702 /* Free cache space */ 703 PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",rank,baij->stash.n); 704 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 705 706 baij->svalues = svalues; baij->rvalues = rvalues; 707 baij->nsends = nsends; baij->nrecvs = nreceives; 708 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 709 baij->rmax = nmax; 710 711 PetscFunctionReturn(0); 712 } 713 714 /* 715 Creates the hash table, and sets the table 716 This table is created only once. 717 If new entried need to be added to the matrix 718 then the hash table has to be destroyed and 719 recreated. 720 */ 721 #undef __FUNC__ 722 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 723 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 724 { 725 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 726 Mat A = baij->A, B=baij->B; 727 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 728 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 729 int size,ct=0,max1=0,max2=0,bs2=baij->bs2,rstart=baij->rstart; 730 int cstart=baij->cstart,*garray=baij->garray,row,col; 731 int *HT,key; 732 Scalar **HD; 733 734 PetscFunctionBegin; 735 baij->ht_size=(int)(factor*nz); 736 size = baij->ht_size; 737 738 if (baij->ht) { 739 PetscFunctionReturn(0); 740 } 741 742 /* Allocate Memory for Hash Table */ 743 baij->ht = (int*)PetscMalloc(size*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->ht); 744 HT = baij->ht; 745 HD = (Scalar**)(HT + size); 746 PetscMemzero(HT,size*sizeof(int)+sizeof(Scalar*)); 747 748 749 /* Loop Over A */ 750 for ( i=0; i<a->mbs; i++ ) { 751 for ( j=ai[i]; j<ai[i+1]; j++ ) { 752 row = i+rstart; 753 col = aj[j]+cstart; 754 755 key = row*baij->Nbs + col + 1; 756 /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 757 h1 = HASH1(size,key); 758 759 for ( k=0; k<size; k++ ){ 760 if (HT[(h1+k)%size] == 0.0) { 761 HT[(h1+k)%size] = key; 762 HD[(h1+k)%size] = a->a + j*bs2; 763 break; 764 } else ct++; 765 } 766 if (k> max1) max1 = k; 767 } 768 } 769 /* Loop Over B */ 770 for ( i=0; i<b->mbs; i++ ) { 771 for ( j=bi[i]; j<bi[i+1]; j++ ) { 772 row = i+rstart; 773 col = garray[bj[j]]; 774 key = row*baij->Nbs + col + 1; 775 /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 776 h1 = HASH1(size,key); 777 for ( k=0; k<size; k++ ){ 778 if (HT[(h1+k)%size] == 0.0) { 779 HT[(h1+k)%size] = key; 780 HD[(h1+k)%size] = b->a + j*bs2; 781 break; 782 } else ct++; 783 } 784 if (k> max2) max2 = k; 785 } 786 } 787 788 /* Print Summary */ 789 for ( i=0,key=0.0,j=0; i<size; i++) 790 if (HT[i]) {j++;} 791 792 /*printf("[%d] fact = %3.2f max1 = %5d max2 = %5d Size %5d - Searches %5d Avg %5.2f Keys %5d\n", 793 PetscGlobalRank,factor,max1,max2,size,ct+j,((float)ct+j),j); */ 794 fflush(stdout); 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNC__ 799 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 800 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 801 { 802 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 803 MPI_Status *send_status,recv_status; 804 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 805 int bs=baij->bs,row,col,other_disassembled,flg; 806 Scalar *values,val; 807 InsertMode addv = mat->insertmode; 808 809 PetscFunctionBegin; 810 /* wait on receives */ 811 while (count) { 812 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 813 /* unpack receives into our local space */ 814 values = baij->rvalues + 3*imdex*baij->rmax; 815 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 816 n = n/3; 817 for ( i=0; i<n; i++ ) { 818 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 819 col = (int) PetscReal(values[3*i+1]); 820 val = values[3*i+2]; 821 if (col >= baij->cstart*bs && col < baij->cend*bs) { 822 col -= baij->cstart*bs; 823 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 824 } else { 825 if (mat->was_assembled) { 826 if (!baij->colmap) { 827 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 828 } 829 col = (baij->colmap[col/bs]) - 1 + col%bs; 830 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 831 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 832 col = (int) PetscReal(values[3*i+1]); 833 } 834 } 835 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 836 } 837 } 838 count--; 839 } 840 PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 841 842 /* wait on sends */ 843 if (baij->nsends) { 844 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 845 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 846 PetscFree(send_status); 847 } 848 PetscFree(baij->send_waits); PetscFree(baij->svalues); 849 850 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 851 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 852 853 /* determine if any processor has disassembled, if so we must 854 also disassemble ourselfs, in order that we may reassemble. */ 855 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 856 if (mat->was_assembled && !other_disassembled) { 857 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 858 } 859 860 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 861 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 862 } 863 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 864 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 865 866 ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr); 867 if (flg && !baij->ht && mode== MAT_FINAL_ASSEMBLY) { 868 double fact = 1.39; 869 ierr = OptionsGetDouble(PETSC_NULL,"-use_hash",&fact,&flg); CHKERRQ(ierr); 870 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,fact); CHKERRQ(ierr); 871 mat->ops.setvalues = MatSetValues_MPIBAIJ_HT; 872 mat->ops.setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 873 } 874 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNC__ 879 #define __FUNC__ "MatView_MPIBAIJ_Binary" 880 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 881 { 882 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 883 int ierr; 884 885 PetscFunctionBegin; 886 if (baij->size == 1) { 887 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 888 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 889 PetscFunctionReturn(0); 890 } 891 892 #undef __FUNC__ 893 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 894 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 895 { 896 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 897 int ierr, format,rank,bs = baij->bs; 898 FILE *fd; 899 ViewerType vtype; 900 901 PetscFunctionBegin; 902 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 903 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 904 ierr = ViewerGetFormat(viewer,&format); 905 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 906 MatInfo info; 907 MPI_Comm_rank(mat->comm,&rank); 908 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 909 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 910 PetscSequentialPhaseBegin(mat->comm,1); 911 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 912 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 913 baij->bs,(int)info.memory); 914 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 915 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 916 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 917 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 918 fflush(fd); 919 PetscSequentialPhaseEnd(mat->comm,1); 920 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 921 PetscFunctionReturn(0); 922 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 923 PetscPrintf(mat->comm," block size is %d\n",bs); 924 PetscFunctionReturn(0); 925 } 926 } 927 928 if (vtype == DRAW_VIEWER) { 929 Draw draw; 930 PetscTruth isnull; 931 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 932 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 933 } 934 935 if (vtype == ASCII_FILE_VIEWER) { 936 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 937 PetscSequentialPhaseBegin(mat->comm,1); 938 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 939 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 940 baij->cstart*bs,baij->cend*bs); 941 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 942 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 943 fflush(fd); 944 PetscSequentialPhaseEnd(mat->comm,1); 945 } else { 946 int size = baij->size; 947 rank = baij->rank; 948 if (size == 1) { 949 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 950 } else { 951 /* assemble the entire matrix onto first processor. */ 952 Mat A; 953 Mat_SeqBAIJ *Aloc; 954 int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 955 int mbs=baij->mbs; 956 Scalar *a; 957 958 if (!rank) { 959 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 960 } else { 961 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 962 } 963 PLogObjectParent(mat,A); 964 965 /* copy over the A part */ 966 Aloc = (Mat_SeqBAIJ*) baij->A->data; 967 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 968 row = baij->rstart; 969 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 970 971 for ( i=0; i<mbs; i++ ) { 972 rvals[0] = bs*(baij->rstart + i); 973 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 974 for ( j=ai[i]; j<ai[i+1]; j++ ) { 975 col = (baij->cstart+aj[j])*bs; 976 for (k=0; k<bs; k++ ) { 977 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 978 col++; a += bs; 979 } 980 } 981 } 982 /* copy over the B part */ 983 Aloc = (Mat_SeqBAIJ*) baij->B->data; 984 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 985 row = baij->rstart*bs; 986 for ( i=0; i<mbs; i++ ) { 987 rvals[0] = bs*(baij->rstart + i); 988 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 989 for ( j=ai[i]; j<ai[i+1]; j++ ) { 990 col = baij->garray[aj[j]]*bs; 991 for (k=0; k<bs; k++ ) { 992 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 993 col++; a += bs; 994 } 995 } 996 } 997 PetscFree(rvals); 998 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 999 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1000 /* 1001 Everyone has to call to draw the matrix since the graphics waits are 1002 synchronized across all processors that share the Draw object 1003 */ 1004 if (!rank || vtype == DRAW_VIEWER) { 1005 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1006 } 1007 ierr = MatDestroy(A); CHKERRQ(ierr); 1008 } 1009 } 1010 PetscFunctionReturn(0); 1011 } 1012 1013 1014 1015 #undef __FUNC__ 1016 #define __FUNC__ "MatView_MPIBAIJ" 1017 int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 1018 { 1019 Mat mat = (Mat) obj; 1020 int ierr; 1021 ViewerType vtype; 1022 1023 PetscFunctionBegin; 1024 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1025 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 1026 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 1027 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1028 } else if (vtype == BINARY_FILE_VIEWER) { 1029 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1030 } 1031 PetscFunctionReturn(0); 1032 } 1033 1034 #undef __FUNC__ 1035 #define __FUNC__ "MatDestroy_MPIBAIJ" 1036 int MatDestroy_MPIBAIJ(PetscObject obj) 1037 { 1038 Mat mat = (Mat) obj; 1039 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1040 int ierr; 1041 1042 PetscFunctionBegin; 1043 #if defined(USE_PETSC_LOG) 1044 PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 1045 #endif 1046 1047 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1048 PetscFree(baij->rowners); 1049 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1050 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1051 if (baij->colmap) PetscFree(baij->colmap); 1052 if (baij->garray) PetscFree(baij->garray); 1053 if (baij->lvec) VecDestroy(baij->lvec); 1054 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1055 if (baij->rowvalues) PetscFree(baij->rowvalues); 1056 if (baij->barray) PetscFree(baij->barray); 1057 if (baij->ht) PetscFree(baij->ht); 1058 PetscFree(baij); 1059 PLogObjectDestroy(mat); 1060 PetscHeaderDestroy(mat); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNC__ 1065 #define __FUNC__ "MatMult_MPIBAIJ" 1066 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1067 { 1068 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1069 int ierr, nt; 1070 1071 PetscFunctionBegin; 1072 VecGetLocalSize_Fast(xx,nt); 1073 if (nt != a->n) { 1074 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1075 } 1076 VecGetLocalSize_Fast(yy,nt); 1077 if (nt != a->m) { 1078 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1079 } 1080 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1081 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 1082 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1083 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1084 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNC__ 1089 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1090 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1091 { 1092 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1093 int ierr; 1094 1095 PetscFunctionBegin; 1096 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1097 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1098 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1099 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNC__ 1104 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1105 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1106 { 1107 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1108 int ierr; 1109 1110 PetscFunctionBegin; 1111 /* do nondiagonal part */ 1112 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1113 /* send it on its way */ 1114 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1115 /* do local part */ 1116 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1117 /* receive remote parts: note this assumes the values are not actually */ 1118 /* inserted in yy until the next line, which is true for my implementation*/ 1119 /* but is not perhaps always true. */ 1120 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNC__ 1125 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1126 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1127 { 1128 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1129 int ierr; 1130 1131 PetscFunctionBegin; 1132 /* do nondiagonal part */ 1133 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1134 /* send it on its way */ 1135 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1136 /* do local part */ 1137 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1138 /* receive remote parts: note this assumes the values are not actually */ 1139 /* inserted in yy until the next line, which is true for my implementation*/ 1140 /* but is not perhaps always true. */ 1141 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 /* 1146 This only works correctly for square matrices where the subblock A->A is the 1147 diagonal block 1148 */ 1149 #undef __FUNC__ 1150 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1151 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1152 { 1153 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1154 int ierr; 1155 1156 PetscFunctionBegin; 1157 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1158 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNC__ 1163 #define __FUNC__ "MatScale_MPIBAIJ" 1164 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1165 { 1166 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1167 int ierr; 1168 1169 PetscFunctionBegin; 1170 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1171 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNC__ 1176 #define __FUNC__ "MatGetSize_MPIBAIJ" 1177 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1178 { 1179 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1180 1181 PetscFunctionBegin; 1182 if (m) *m = mat->M; 1183 if (n) *n = mat->N; 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNC__ 1188 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1189 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1190 { 1191 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1192 1193 PetscFunctionBegin; 1194 *m = mat->m; *n = mat->N; 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNC__ 1199 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1200 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1201 { 1202 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1203 1204 PetscFunctionBegin; 1205 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1206 PetscFunctionReturn(0); 1207 } 1208 1209 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1210 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1211 1212 #undef __FUNC__ 1213 #define __FUNC__ "MatGetRow_MPIBAIJ" 1214 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1215 { 1216 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1217 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1218 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1219 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1220 int *cmap, *idx_p,cstart = mat->cstart; 1221 1222 PetscFunctionBegin; 1223 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1224 mat->getrowactive = PETSC_TRUE; 1225 1226 if (!mat->rowvalues && (idx || v)) { 1227 /* 1228 allocate enough space to hold information from the longest row. 1229 */ 1230 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1231 int max = 1,mbs = mat->mbs,tmp; 1232 for ( i=0; i<mbs; i++ ) { 1233 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1234 if (max < tmp) { max = tmp; } 1235 } 1236 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1237 CHKPTRQ(mat->rowvalues); 1238 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1239 } 1240 1241 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1242 lrow = row - brstart; 1243 1244 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1245 if (!v) {pvA = 0; pvB = 0;} 1246 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1247 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1248 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1249 nztot = nzA + nzB; 1250 1251 cmap = mat->garray; 1252 if (v || idx) { 1253 if (nztot) { 1254 /* Sort by increasing column numbers, assuming A and B already sorted */ 1255 int imark = -1; 1256 if (v) { 1257 *v = v_p = mat->rowvalues; 1258 for ( i=0; i<nzB; i++ ) { 1259 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1260 else break; 1261 } 1262 imark = i; 1263 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1264 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1265 } 1266 if (idx) { 1267 *idx = idx_p = mat->rowindices; 1268 if (imark > -1) { 1269 for ( i=0; i<imark; i++ ) { 1270 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1271 } 1272 } else { 1273 for ( i=0; i<nzB; i++ ) { 1274 if (cmap[cworkB[i]/bs] < cstart) 1275 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1276 else break; 1277 } 1278 imark = i; 1279 } 1280 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1281 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1282 } 1283 } else { 1284 if (idx) *idx = 0; 1285 if (v) *v = 0; 1286 } 1287 } 1288 *nz = nztot; 1289 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1290 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNC__ 1295 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1296 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1297 { 1298 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1299 1300 PetscFunctionBegin; 1301 if (baij->getrowactive == PETSC_FALSE) { 1302 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1303 } 1304 baij->getrowactive = PETSC_FALSE; 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNC__ 1309 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1310 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1311 { 1312 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1313 1314 PetscFunctionBegin; 1315 *bs = baij->bs; 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #undef __FUNC__ 1320 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1321 int MatZeroEntries_MPIBAIJ(Mat A) 1322 { 1323 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1324 int ierr; 1325 1326 PetscFunctionBegin; 1327 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1328 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 #undef __FUNC__ 1333 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1334 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1335 { 1336 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1337 Mat A = a->A, B = a->B; 1338 int ierr; 1339 double isend[5], irecv[5]; 1340 1341 PetscFunctionBegin; 1342 info->block_size = (double)a->bs; 1343 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1344 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1345 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1346 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1347 if (flag == MAT_LOCAL) { 1348 info->nz_used = isend[0]; 1349 info->nz_allocated = isend[1]; 1350 info->nz_unneeded = isend[2]; 1351 info->memory = isend[3]; 1352 info->mallocs = isend[4]; 1353 } else if (flag == MAT_GLOBAL_MAX) { 1354 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 1355 info->nz_used = irecv[0]; 1356 info->nz_allocated = irecv[1]; 1357 info->nz_unneeded = irecv[2]; 1358 info->memory = irecv[3]; 1359 info->mallocs = irecv[4]; 1360 } else if (flag == MAT_GLOBAL_SUM) { 1361 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 1362 info->nz_used = irecv[0]; 1363 info->nz_allocated = irecv[1]; 1364 info->nz_unneeded = irecv[2]; 1365 info->memory = irecv[3]; 1366 info->mallocs = irecv[4]; 1367 } 1368 info->rows_global = (double)a->M; 1369 info->columns_global = (double)a->N; 1370 info->rows_local = (double)a->m; 1371 info->columns_local = (double)a->N; 1372 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1373 info->fill_ratio_needed = 0; 1374 info->factor_mallocs = 0; 1375 PetscFunctionReturn(0); 1376 } 1377 1378 #undef __FUNC__ 1379 #define __FUNC__ "MatSetOption_MPIBAIJ" 1380 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1381 { 1382 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1383 1384 PetscFunctionBegin; 1385 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1386 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1387 op == MAT_COLUMNS_UNSORTED || 1388 op == MAT_COLUMNS_SORTED || 1389 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1390 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1391 MatSetOption(a->A,op); 1392 MatSetOption(a->B,op); 1393 } else if (op == MAT_ROW_ORIENTED) { 1394 a->roworiented = 1; 1395 MatSetOption(a->A,op); 1396 MatSetOption(a->B,op); 1397 } else if (op == MAT_ROWS_SORTED || 1398 op == MAT_ROWS_UNSORTED || 1399 op == MAT_SYMMETRIC || 1400 op == MAT_STRUCTURALLY_SYMMETRIC || 1401 op == MAT_YES_NEW_DIAGONALS) 1402 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1403 else if (op == MAT_COLUMN_ORIENTED) { 1404 a->roworiented = 0; 1405 MatSetOption(a->A,op); 1406 MatSetOption(a->B,op); 1407 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1408 a->donotstash = 1; 1409 } else if (op == MAT_NO_NEW_DIAGONALS) { 1410 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1411 } else { 1412 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1413 } 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNC__ 1418 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1419 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1420 { 1421 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1422 Mat_SeqBAIJ *Aloc; 1423 Mat B; 1424 int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 1425 int bs=baij->bs,mbs=baij->mbs; 1426 Scalar *a; 1427 1428 PetscFunctionBegin; 1429 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1430 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1431 CHKERRQ(ierr); 1432 1433 /* copy over the A part */ 1434 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1435 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1436 row = baij->rstart; 1437 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1438 1439 for ( i=0; i<mbs; i++ ) { 1440 rvals[0] = bs*(baij->rstart + i); 1441 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1442 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1443 col = (baij->cstart+aj[j])*bs; 1444 for (k=0; k<bs; k++ ) { 1445 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1446 col++; a += bs; 1447 } 1448 } 1449 } 1450 /* copy over the B part */ 1451 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1452 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1453 row = baij->rstart*bs; 1454 for ( i=0; i<mbs; i++ ) { 1455 rvals[0] = bs*(baij->rstart + i); 1456 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1457 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1458 col = baij->garray[aj[j]]*bs; 1459 for (k=0; k<bs; k++ ) { 1460 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1461 col++; a += bs; 1462 } 1463 } 1464 } 1465 PetscFree(rvals); 1466 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1467 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1468 1469 if (matout != PETSC_NULL) { 1470 *matout = B; 1471 } else { 1472 /* This isn't really an in-place transpose .... but free data structures from baij */ 1473 PetscFree(baij->rowners); 1474 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1475 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1476 if (baij->colmap) PetscFree(baij->colmap); 1477 if (baij->garray) PetscFree(baij->garray); 1478 if (baij->lvec) VecDestroy(baij->lvec); 1479 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1480 PetscFree(baij); 1481 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1482 PetscHeaderDestroy(B); 1483 } 1484 PetscFunctionReturn(0); 1485 } 1486 1487 #undef __FUNC__ 1488 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1489 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1490 { 1491 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1492 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1493 int ierr,s1,s2,s3; 1494 1495 PetscFunctionBegin; 1496 if (ll) { 1497 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1498 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1499 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1500 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1501 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1502 } 1503 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNC__ 1508 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1509 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1510 { 1511 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1512 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1513 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1514 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1515 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1516 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1517 MPI_Comm comm = A->comm; 1518 MPI_Request *send_waits,*recv_waits; 1519 MPI_Status recv_status,*send_status; 1520 IS istmp; 1521 1522 PetscFunctionBegin; 1523 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1524 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1525 1526 /* first count number of contributors to each processor */ 1527 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1528 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1529 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1530 for ( i=0; i<N; i++ ) { 1531 idx = rows[i]; 1532 found = 0; 1533 for ( j=0; j<size; j++ ) { 1534 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1535 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1536 } 1537 } 1538 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1539 } 1540 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1541 1542 /* inform other processors of number of messages and max length*/ 1543 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1544 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1545 nrecvs = work[rank]; 1546 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1547 nmax = work[rank]; 1548 PetscFree(work); 1549 1550 /* post receives: */ 1551 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1552 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1553 for ( i=0; i<nrecvs; i++ ) { 1554 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1555 } 1556 1557 /* do sends: 1558 1) starts[i] gives the starting index in svalues for stuff going to 1559 the ith processor 1560 */ 1561 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1562 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1563 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1564 starts[0] = 0; 1565 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1566 for ( i=0; i<N; i++ ) { 1567 svalues[starts[owner[i]]++] = rows[i]; 1568 } 1569 ISRestoreIndices(is,&rows); 1570 1571 starts[0] = 0; 1572 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1573 count = 0; 1574 for ( i=0; i<size; i++ ) { 1575 if (procs[i]) { 1576 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1577 } 1578 } 1579 PetscFree(starts); 1580 1581 base = owners[rank]*bs; 1582 1583 /* wait on receives */ 1584 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1585 source = lens + nrecvs; 1586 count = nrecvs; slen = 0; 1587 while (count) { 1588 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1589 /* unpack receives into our local space */ 1590 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1591 source[imdex] = recv_status.MPI_SOURCE; 1592 lens[imdex] = n; 1593 slen += n; 1594 count--; 1595 } 1596 PetscFree(recv_waits); 1597 1598 /* move the data into the send scatter */ 1599 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1600 count = 0; 1601 for ( i=0; i<nrecvs; i++ ) { 1602 values = rvalues + i*nmax; 1603 for ( j=0; j<lens[i]; j++ ) { 1604 lrows[count++] = values[j] - base; 1605 } 1606 } 1607 PetscFree(rvalues); PetscFree(lens); 1608 PetscFree(owner); PetscFree(nprocs); 1609 1610 /* actually zap the local rows */ 1611 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1612 PLogObjectParent(A,istmp); 1613 1614 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1615 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1616 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1617 1618 if (diag) { 1619 for ( i = 0; i < slen; i++ ) { 1620 row = lrows[i] + rstart_bs; 1621 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1622 } 1623 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1624 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1625 } 1626 PetscFree(lrows); 1627 1628 /* wait on sends */ 1629 if (nsends) { 1630 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1631 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1632 PetscFree(send_status); 1633 } 1634 PetscFree(send_waits); PetscFree(svalues); 1635 1636 PetscFunctionReturn(0); 1637 } 1638 extern int MatPrintHelp_SeqBAIJ(Mat); 1639 #undef __FUNC__ 1640 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1641 int MatPrintHelp_MPIBAIJ(Mat A) 1642 { 1643 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1644 int ierr; 1645 1646 PetscFunctionBegin; 1647 if (!a->rank) { 1648 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1649 } 1650 PetscFunctionReturn(0); 1651 } 1652 1653 #undef __FUNC__ 1654 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1655 int MatSetUnfactored_MPIBAIJ(Mat A) 1656 { 1657 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1658 int ierr; 1659 1660 PetscFunctionBegin; 1661 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1662 PetscFunctionReturn(0); 1663 } 1664 1665 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1666 1667 /* -------------------------------------------------------------------*/ 1668 static struct _MatOps MatOps = { 1669 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1670 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1671 0,0,0,0, 1672 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1673 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1674 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1675 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1676 0,0,0,MatGetSize_MPIBAIJ, 1677 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1678 0,0,MatConvertSameType_MPIBAIJ,0,0, 1679 0,0,0,MatGetSubMatrices_MPIBAIJ, 1680 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1681 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1682 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1683 1684 1685 #undef __FUNC__ 1686 #define __FUNC__ "MatCreateMPIBAIJ" 1687 /*@C 1688 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1689 (block compressed row). For good matrix assembly performance 1690 the user should preallocate the matrix storage by setting the parameters 1691 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1692 performance can be increased by more than a factor of 50. 1693 1694 Input Parameters: 1695 . comm - MPI communicator 1696 . bs - size of blockk 1697 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1698 This value should be the same as the local size used in creating the 1699 y vector for the matrix-vector product y = Ax. 1700 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1701 This value should be the same as the local size used in creating the 1702 x vector for the matrix-vector product y = Ax. 1703 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1704 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1705 . d_nz - number of block nonzeros per block row in diagonal portion of local 1706 submatrix (same for all local rows) 1707 . d_nzz - array containing the number of block nonzeros in the various block rows 1708 of the in diagonal portion of the local (possibly different for each block 1709 row) or PETSC_NULL. You must leave room for the diagonal entry even if 1710 it is zero. 1711 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1712 submatrix (same for all local rows). 1713 . o_nzz - array containing the number of nonzeros in the various block rows of the 1714 off-diagonal portion of the local submatrix (possibly different for 1715 each block row) or PETSC_NULL. 1716 1717 Output Parameter: 1718 . A - the matrix 1719 1720 Notes: 1721 The user MUST specify either the local or global matrix dimensions 1722 (possibly both). 1723 1724 Storage Information: 1725 For a square global matrix we define each processor's diagonal portion 1726 to be its local rows and the corresponding columns (a square submatrix); 1727 each processor's off-diagonal portion encompasses the remainder of the 1728 local matrix (a rectangular submatrix). 1729 1730 The user can specify preallocated storage for the diagonal part of 1731 the local submatrix with either d_nz or d_nnz (not both). Set 1732 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1733 memory allocation. Likewise, specify preallocated storage for the 1734 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1735 1736 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1737 the figure below we depict these three local rows and all columns (0-11). 1738 1739 $ 0 1 2 3 4 5 6 7 8 9 10 11 1740 $ ------------------- 1741 $ row 3 | o o o d d d o o o o o o 1742 $ row 4 | o o o d d d o o o o o o 1743 $ row 5 | o o o d d d o o o o o o 1744 $ ------------------- 1745 $ 1746 1747 Thus, any entries in the d locations are stored in the d (diagonal) 1748 submatrix, and any entries in the o locations are stored in the 1749 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1750 stored simply in the MATSEQBAIJ format for compressed row storage. 1751 1752 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1753 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1754 In general, for PDE problems in which most nonzeros are near the diagonal, 1755 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1756 or you will get TERRIBLE performance; see the users' manual chapter on 1757 matrices. 1758 1759 .keywords: matrix, block, aij, compressed row, sparse, parallel 1760 1761 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 1762 @*/ 1763 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1764 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1765 { 1766 Mat B; 1767 Mat_MPIBAIJ *b; 1768 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 1769 1770 PetscFunctionBegin; 1771 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1772 1773 MPI_Comm_size(comm,&size); 1774 if (size == 1) { 1775 if (M == PETSC_DECIDE) M = m; 1776 if (N == PETSC_DECIDE) N = n; 1777 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1778 PetscFunctionReturn(0); 1779 } 1780 1781 *A = 0; 1782 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 1783 PLogObjectCreate(B); 1784 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1785 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1786 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1787 1788 B->destroy = MatDestroy_MPIBAIJ; 1789 B->view = MatView_MPIBAIJ; 1790 B->mapping = 0; 1791 B->factor = 0; 1792 B->assembled = PETSC_FALSE; 1793 1794 B->insertmode = NOT_SET_VALUES; 1795 MPI_Comm_rank(comm,&b->rank); 1796 MPI_Comm_size(comm,&b->size); 1797 1798 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1799 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1800 } 1801 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1802 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1803 } 1804 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1805 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1806 } 1807 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1808 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1809 1810 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1811 work[0] = m; work[1] = n; 1812 mbs = m/bs; nbs = n/bs; 1813 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1814 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1815 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1816 } 1817 if (m == PETSC_DECIDE) { 1818 Mbs = M/bs; 1819 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1820 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1821 m = mbs*bs; 1822 } 1823 if (n == PETSC_DECIDE) { 1824 Nbs = N/bs; 1825 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1826 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1827 n = nbs*bs; 1828 } 1829 if (mbs*bs != m || nbs*bs != n) { 1830 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1831 } 1832 1833 b->m = m; B->m = m; 1834 b->n = n; B->n = n; 1835 b->N = N; B->N = N; 1836 b->M = M; B->M = M; 1837 b->bs = bs; 1838 b->bs2 = bs*bs; 1839 b->mbs = mbs; 1840 b->nbs = nbs; 1841 b->Mbs = Mbs; 1842 b->Nbs = Nbs; 1843 1844 /* build local table of row and column ownerships */ 1845 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1846 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1847 b->cowners = b->rowners + b->size + 2; 1848 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1849 b->rowners[0] = 0; 1850 for ( i=2; i<=b->size; i++ ) { 1851 b->rowners[i] += b->rowners[i-1]; 1852 } 1853 b->rstart = b->rowners[b->rank]; 1854 b->rend = b->rowners[b->rank+1]; 1855 b->rstart_bs = b->rstart * bs; 1856 b->rend_bs = b->rend * bs; 1857 1858 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1859 b->cowners[0] = 0; 1860 for ( i=2; i<=b->size; i++ ) { 1861 b->cowners[i] += b->cowners[i-1]; 1862 } 1863 b->cstart = b->cowners[b->rank]; 1864 b->cend = b->cowners[b->rank+1]; 1865 b->cstart_bs = b->cstart * bs; 1866 b->cend_bs = b->cend * bs; 1867 1868 1869 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1870 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1871 PLogObjectParent(B,b->A); 1872 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1873 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1874 PLogObjectParent(B,b->B); 1875 1876 /* build cache for off array entries formed */ 1877 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1878 b->donotstash = 0; 1879 b->colmap = 0; 1880 b->garray = 0; 1881 b->roworiented = 1; 1882 1883 /* stuff used in block assembly */ 1884 b->barray = 0; 1885 1886 /* stuff used for matrix vector multiply */ 1887 b->lvec = 0; 1888 b->Mvctx = 0; 1889 1890 /* stuff for MatGetRow() */ 1891 b->rowindices = 0; 1892 b->rowvalues = 0; 1893 b->getrowactive = PETSC_FALSE; 1894 1895 /* hash table stuff */ 1896 b->ht = 0; 1897 b->ht_size = 0; 1898 1899 *A = B; 1900 PetscFunctionReturn(0); 1901 } 1902 1903 #undef __FUNC__ 1904 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 1905 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 1906 { 1907 Mat mat; 1908 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 1909 int ierr, len=0, flg; 1910 1911 PetscFunctionBegin; 1912 *newmat = 0; 1913 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 1914 PLogObjectCreate(mat); 1915 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 1916 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1917 mat->destroy = MatDestroy_MPIBAIJ; 1918 mat->view = MatView_MPIBAIJ; 1919 mat->factor = matin->factor; 1920 mat->assembled = PETSC_TRUE; 1921 1922 a->m = mat->m = oldmat->m; 1923 a->n = mat->n = oldmat->n; 1924 a->M = mat->M = oldmat->M; 1925 a->N = mat->N = oldmat->N; 1926 1927 a->bs = oldmat->bs; 1928 a->bs2 = oldmat->bs2; 1929 a->mbs = oldmat->mbs; 1930 a->nbs = oldmat->nbs; 1931 a->Mbs = oldmat->Mbs; 1932 a->Nbs = oldmat->Nbs; 1933 1934 a->rstart = oldmat->rstart; 1935 a->rend = oldmat->rend; 1936 a->cstart = oldmat->cstart; 1937 a->cend = oldmat->cend; 1938 a->size = oldmat->size; 1939 a->rank = oldmat->rank; 1940 mat->insertmode = NOT_SET_VALUES; 1941 a->rowvalues = 0; 1942 a->getrowactive = PETSC_FALSE; 1943 a->barray = 0; 1944 1945 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1946 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1947 a->cowners = a->rowners + a->size + 2; 1948 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1949 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1950 if (oldmat->colmap) { 1951 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 1952 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1953 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 1954 } else a->colmap = 0; 1955 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 1956 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1957 PLogObjectMemory(mat,len*sizeof(int)); 1958 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1959 } else a->garray = 0; 1960 1961 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1962 PLogObjectParent(mat,a->lvec); 1963 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1964 PLogObjectParent(mat,a->Mvctx); 1965 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1966 PLogObjectParent(mat,a->A); 1967 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1968 PLogObjectParent(mat,a->B); 1969 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1970 if (flg) { 1971 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1972 } 1973 *newmat = mat; 1974 PetscFunctionReturn(0); 1975 } 1976 1977 #include "sys.h" 1978 1979 #undef __FUNC__ 1980 #define __FUNC__ "MatLoad_MPIBAIJ" 1981 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 1982 { 1983 Mat A; 1984 int i, nz, ierr, j,rstart, rend, fd; 1985 Scalar *vals,*buf; 1986 MPI_Comm comm = ((PetscObject)viewer)->comm; 1987 MPI_Status status; 1988 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 1989 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 1990 int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 1991 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1992 int dcount,kmax,k,nzcount,tmp; 1993 1994 PetscFunctionBegin; 1995 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1996 bs2 = bs*bs; 1997 1998 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1999 if (!rank) { 2000 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2001 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2002 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2003 if (header[3] < 0) { 2004 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2005 } 2006 } 2007 2008 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2009 M = header[1]; N = header[2]; 2010 2011 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2012 2013 /* 2014 This code adds extra rows to make sure the number of rows is 2015 divisible by the blocksize 2016 */ 2017 Mbs = M/bs; 2018 extra_rows = bs - M + bs*(Mbs); 2019 if (extra_rows == bs) extra_rows = 0; 2020 else Mbs++; 2021 if (extra_rows &&!rank) { 2022 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2023 } 2024 2025 /* determine ownership of all rows */ 2026 mbs = Mbs/size + ((Mbs % size) > rank); 2027 m = mbs * bs; 2028 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2029 browners = rowners + size + 1; 2030 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2031 rowners[0] = 0; 2032 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2033 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2034 rstart = rowners[rank]; 2035 rend = rowners[rank+1]; 2036 2037 /* distribute row lengths to all processors */ 2038 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2039 if (!rank) { 2040 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2041 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2042 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2043 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2044 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2045 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2046 PetscFree(sndcounts); 2047 } else { 2048 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2049 } 2050 2051 if (!rank) { 2052 /* calculate the number of nonzeros on each processor */ 2053 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2054 PetscMemzero(procsnz,size*sizeof(int)); 2055 for ( i=0; i<size; i++ ) { 2056 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2057 procsnz[i] += rowlengths[j]; 2058 } 2059 } 2060 PetscFree(rowlengths); 2061 2062 /* determine max buffer needed and allocate it */ 2063 maxnz = 0; 2064 for ( i=0; i<size; i++ ) { 2065 maxnz = PetscMax(maxnz,procsnz[i]); 2066 } 2067 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2068 2069 /* read in my part of the matrix column indices */ 2070 nz = procsnz[0]; 2071 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2072 mycols = ibuf; 2073 if (size == 1) nz -= extra_rows; 2074 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2075 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2076 2077 /* read in every ones (except the last) and ship off */ 2078 for ( i=1; i<size-1; i++ ) { 2079 nz = procsnz[i]; 2080 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2081 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2082 } 2083 /* read in the stuff for the last proc */ 2084 if ( size != 1 ) { 2085 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2086 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2087 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2088 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2089 } 2090 PetscFree(cols); 2091 } else { 2092 /* determine buffer space needed for message */ 2093 nz = 0; 2094 for ( i=0; i<m; i++ ) { 2095 nz += locrowlens[i]; 2096 } 2097 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2098 mycols = ibuf; 2099 /* receive message of column indices*/ 2100 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2101 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2102 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2103 } 2104 2105 /* loop over local rows, determining number of off diagonal entries */ 2106 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2107 odlens = dlens + (rend-rstart); 2108 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2109 PetscMemzero(mask,3*Mbs*sizeof(int)); 2110 masked1 = mask + Mbs; 2111 masked2 = masked1 + Mbs; 2112 rowcount = 0; nzcount = 0; 2113 for ( i=0; i<mbs; i++ ) { 2114 dcount = 0; 2115 odcount = 0; 2116 for ( j=0; j<bs; j++ ) { 2117 kmax = locrowlens[rowcount]; 2118 for ( k=0; k<kmax; k++ ) { 2119 tmp = mycols[nzcount++]/bs; 2120 if (!mask[tmp]) { 2121 mask[tmp] = 1; 2122 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2123 else masked1[dcount++] = tmp; 2124 } 2125 } 2126 rowcount++; 2127 } 2128 2129 dlens[i] = dcount; 2130 odlens[i] = odcount; 2131 2132 /* zero out the mask elements we set */ 2133 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2134 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2135 } 2136 2137 /* create our matrix */ 2138 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2139 CHKERRQ(ierr); 2140 A = *newmat; 2141 MatSetOption(A,MAT_COLUMNS_SORTED); 2142 2143 if (!rank) { 2144 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2145 /* read in my part of the matrix numerical values */ 2146 nz = procsnz[0]; 2147 vals = buf; 2148 mycols = ibuf; 2149 if (size == 1) nz -= extra_rows; 2150 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2151 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2152 2153 /* insert into matrix */ 2154 jj = rstart*bs; 2155 for ( i=0; i<m; i++ ) { 2156 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2157 mycols += locrowlens[i]; 2158 vals += locrowlens[i]; 2159 jj++; 2160 } 2161 /* read in other processors (except the last one) and ship out */ 2162 for ( i=1; i<size-1; i++ ) { 2163 nz = procsnz[i]; 2164 vals = buf; 2165 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2166 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2167 } 2168 /* the last proc */ 2169 if ( size != 1 ){ 2170 nz = procsnz[i] - extra_rows; 2171 vals = buf; 2172 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2173 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2174 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2175 } 2176 PetscFree(procsnz); 2177 } else { 2178 /* receive numeric values */ 2179 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2180 2181 /* receive message of values*/ 2182 vals = buf; 2183 mycols = ibuf; 2184 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2185 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2186 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2187 2188 /* insert into matrix */ 2189 jj = rstart*bs; 2190 for ( i=0; i<m; i++ ) { 2191 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2192 mycols += locrowlens[i]; 2193 vals += locrowlens[i]; 2194 jj++; 2195 } 2196 } 2197 PetscFree(locrowlens); 2198 PetscFree(buf); 2199 PetscFree(ibuf); 2200 PetscFree(rowners); 2201 PetscFree(dlens); 2202 PetscFree(mask); 2203 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2204 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2205 PetscFunctionReturn(0); 2206 } 2207 2208 2209