1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.167 1999/04/19 22:12:56 bsmith Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 EXTERN_C_BEGIN 20 #undef __FUNC__ 21 #define __FUNC__ "MatStoreValues_MPIBAIJ" 22 int MatStoreValues_MPIBAIJ(Mat mat) 23 { 24 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 25 int ierr; 26 27 PetscFunctionBegin; 28 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 29 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 EXTERN_C_END 33 34 EXTERN_C_BEGIN 35 #undef __FUNC__ 36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 37 int MatRetrieveValues_MPIBAIJ(Mat mat) 38 { 39 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 40 int ierr; 41 42 PetscFunctionBegin; 43 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 44 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 45 PetscFunctionReturn(0); 46 } 47 EXTERN_C_END 48 49 /* 50 Local utility routine that creates a mapping from the global column 51 number to the local number in the off-diagonal part of the local 52 storage of the matrix. This is done in a non scable way since the 53 length of colmap equals the global matrix length. 54 */ 55 #undef __FUNC__ 56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 57 static int CreateColmap_MPIBAIJ_Private(Mat mat) 58 { 59 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 60 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 61 int nbs = B->nbs,i,bs=B->bs; 62 #if defined (USE_CTABLE) 63 int ierr; 64 #endif 65 66 PetscFunctionBegin; 67 #if defined (USE_CTABLE) 68 ierr = TableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 69 for ( i=0; i<nbs; i++ ){ 70 ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 71 } 72 #else 73 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 74 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 75 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 76 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 77 #endif 78 PetscFunctionReturn(0); 79 } 80 81 #define CHUNKSIZE 10 82 83 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 84 { \ 85 \ 86 brow = row/bs; \ 87 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 88 rmax = aimax[brow]; nrow = ailen[brow]; \ 89 bcol = col/bs; \ 90 ridx = row % bs; cidx = col % bs; \ 91 low = 0; high = nrow; \ 92 while (high-low > 3) { \ 93 t = (low+high)/2; \ 94 if (rp[t] > bcol) high = t; \ 95 else low = t; \ 96 } \ 97 for ( _i=low; _i<high; _i++ ) { \ 98 if (rp[_i] > bcol) break; \ 99 if (rp[_i] == bcol) { \ 100 bap = ap + bs2*_i + bs*cidx + ridx; \ 101 if (addv == ADD_VALUES) *bap += value; \ 102 else *bap = value; \ 103 goto a_noinsert; \ 104 } \ 105 } \ 106 if (a->nonew == 1) goto a_noinsert; \ 107 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 108 if (nrow >= rmax) { \ 109 /* there is no extra room in row, therefore enlarge */ \ 110 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 111 Scalar *new_a; \ 112 \ 113 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 114 \ 115 /* malloc new storage space */ \ 116 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 117 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 118 new_j = (int *) (new_a + bs2*new_nz); \ 119 new_i = new_j + new_nz; \ 120 \ 121 /* copy over old data into new slots */ \ 122 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 123 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 124 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 125 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 126 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 127 len*sizeof(int));CHKERRQ(ierr); \ 128 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 129 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 130 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 131 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 132 /* free up old matrix storage */ \ 133 PetscFree(a->a); \ 134 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 135 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 136 a->singlemalloc = 1; \ 137 \ 138 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 139 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 140 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 141 a->maxnz += bs2*CHUNKSIZE; \ 142 a->reallocs++; \ 143 a->nz++; \ 144 } \ 145 N = nrow++ - 1; \ 146 /* shift up all the later entries in this row */ \ 147 for ( ii=N; ii>=_i; ii-- ) { \ 148 rp[ii+1] = rp[ii]; \ 149 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 150 } \ 151 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); } \ 152 rp[_i] = bcol; \ 153 ap[bs2*_i + bs*cidx + ridx] = value; \ 154 a_noinsert:; \ 155 ailen[brow] = nrow; \ 156 } 157 158 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 159 { \ 160 \ 161 brow = row/bs; \ 162 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 163 rmax = bimax[brow]; nrow = bilen[brow]; \ 164 bcol = col/bs; \ 165 ridx = row % bs; cidx = col % bs; \ 166 low = 0; high = nrow; \ 167 while (high-low > 3) { \ 168 t = (low+high)/2; \ 169 if (rp[t] > bcol) high = t; \ 170 else low = t; \ 171 } \ 172 for ( _i=low; _i<high; _i++ ) { \ 173 if (rp[_i] > bcol) break; \ 174 if (rp[_i] == bcol) { \ 175 bap = ap + bs2*_i + bs*cidx + ridx; \ 176 if (addv == ADD_VALUES) *bap += value; \ 177 else *bap = value; \ 178 goto b_noinsert; \ 179 } \ 180 } \ 181 if (b->nonew == 1) goto b_noinsert; \ 182 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 183 if (nrow >= rmax) { \ 184 /* there is no extra room in row, therefore enlarge */ \ 185 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 186 Scalar *new_a; \ 187 \ 188 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 189 \ 190 /* malloc new storage space */ \ 191 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 192 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 193 new_j = (int *) (new_a + bs2*new_nz); \ 194 new_i = new_j + new_nz; \ 195 \ 196 /* copy over old data into new slots */ \ 197 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 198 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 199 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 200 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 201 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 202 len*sizeof(int));CHKERRQ(ierr); \ 203 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 204 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 205 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 206 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 207 /* free up old matrix storage */ \ 208 PetscFree(b->a); \ 209 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 210 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 211 b->singlemalloc = 1; \ 212 \ 213 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 214 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 215 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 216 b->maxnz += bs2*CHUNKSIZE; \ 217 b->reallocs++; \ 218 b->nz++; \ 219 } \ 220 N = nrow++ - 1; \ 221 /* shift up all the later entries in this row */ \ 222 for ( ii=N; ii>=_i; ii-- ) { \ 223 rp[ii+1] = rp[ii]; \ 224 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 225 } \ 226 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);} \ 227 rp[_i] = bcol; \ 228 ap[bs2*_i + bs*cidx + ridx] = value; \ 229 b_noinsert:; \ 230 bilen[brow] = nrow; \ 231 } 232 233 #undef __FUNC__ 234 #define __FUNC__ "MatSetValues_MPIBAIJ" 235 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 236 { 237 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 238 Scalar value; 239 int ierr,i,j,row,col; 240 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 241 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 242 int cend_orig=baij->cend_bs,bs=baij->bs; 243 244 /* Some Variables required in the macro */ 245 Mat A = baij->A; 246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 247 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 248 Scalar *aa=a->a; 249 250 Mat B = baij->B; 251 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 252 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 253 Scalar *ba=b->a; 254 255 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 256 int low,high,t,ridx,cidx,bs2=a->bs2; 257 Scalar *ap,*bap; 258 259 PetscFunctionBegin; 260 for ( i=0; i<m; i++ ) { 261 if (im[i] < 0) continue; 262 #if defined(USE_PETSC_BOPT_g) 263 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 264 #endif 265 if (im[i] >= rstart_orig && im[i] < rend_orig) { 266 row = im[i] - rstart_orig; 267 for ( j=0; j<n; j++ ) { 268 if (in[j] >= cstart_orig && in[j] < cend_orig){ 269 col = in[j] - cstart_orig; 270 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 271 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 272 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 273 } else if (in[j] < 0) continue; 274 #if defined(USE_PETSC_BOPT_g) 275 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 276 #endif 277 else { 278 if (mat->was_assembled) { 279 if (!baij->colmap) { 280 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 281 } 282 #if defined (USE_CTABLE) 283 ierr = TableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr); 284 col = col - 1 + in[j]%bs; 285 #else 286 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 287 #endif 288 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 289 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 290 col = in[j]; 291 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 292 B = baij->B; 293 b = (Mat_SeqBAIJ *) (B)->data; 294 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 295 ba=b->a; 296 } 297 } else col = in[j]; 298 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 299 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 300 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 301 } 302 } 303 } else { 304 if ( !baij->donotstash) { 305 if (roworiented) { 306 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 307 } else { 308 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 309 } 310 } 311 } 312 } 313 PetscFunctionReturn(0); 314 } 315 316 #undef __FUNC__ 317 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 318 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 319 { 320 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 321 Scalar *value,*barray=baij->barray; 322 int ierr,i,j,ii,jj,row,col; 323 int roworiented = baij->roworiented,rstart=baij->rstart ; 324 int rend=baij->rend,cstart=baij->cstart,stepval; 325 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 326 327 if(!barray) { 328 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray); 329 } 330 331 if (roworiented) { 332 stepval = (n-1)*bs; 333 } else { 334 stepval = (m-1)*bs; 335 } 336 for ( i=0; i<m; i++ ) { 337 if (im[i] < 0) continue; 338 #if defined(USE_PETSC_BOPT_g) 339 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 340 #endif 341 if (im[i] >= rstart && im[i] < rend) { 342 row = im[i] - rstart; 343 for ( j=0; j<n; j++ ) { 344 /* If NumCol = 1 then a copy is not required */ 345 if ((roworiented) && (n == 1)) { 346 barray = v + i*bs2; 347 } else if((!roworiented) && (m == 1)) { 348 barray = v + j*bs2; 349 } else { /* Here a copy is required */ 350 if (roworiented) { 351 value = v + i*(stepval+bs)*bs + j*bs; 352 } else { 353 value = v + j*(stepval+bs)*bs + i*bs; 354 } 355 for ( ii=0; ii<bs; ii++,value+=stepval ) { 356 for (jj=0; jj<bs; jj++ ) { 357 *barray++ = *value++; 358 } 359 } 360 barray -=bs2; 361 } 362 363 if (in[j] >= cstart && in[j] < cend){ 364 col = in[j] - cstart; 365 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 366 } 367 else if (in[j] < 0) continue; 368 #if defined(USE_PETSC_BOPT_g) 369 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 370 #endif 371 else { 372 if (mat->was_assembled) { 373 if (!baij->colmap) { 374 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 375 } 376 377 #if defined(USE_PETSC_BOPT_g) 378 #if defined (USE_CTABLE) 379 { int data; 380 ierr = TableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 381 if((data - 1) % bs) 382 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 383 } 384 #else 385 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 386 #endif 387 #endif 388 #if defined (USE_CTABLE) 389 ierr = TableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 390 col = (col - 1)/bs; 391 #else 392 col = (baij->colmap[in[j]] - 1)/bs; 393 #endif 394 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 395 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 396 col = in[j]; 397 } 398 } 399 else col = in[j]; 400 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 401 } 402 } 403 } else { 404 if (!baij->donotstash) { 405 if (roworiented) { 406 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 407 } else { 408 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 409 } 410 } 411 } 412 } 413 PetscFunctionReturn(0); 414 } 415 #define HASH_KEY 0.6180339887 416 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 417 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 418 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 419 #undef __FUNC__ 420 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 421 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 422 { 423 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 424 int ierr,i,j,row,col; 425 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 426 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 427 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 428 double tmp; 429 Scalar ** HD = baij->hd,value; 430 #if defined(USE_PETSC_BOPT_g) 431 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 432 #endif 433 434 PetscFunctionBegin; 435 436 for ( i=0; i<m; i++ ) { 437 #if defined(USE_PETSC_BOPT_g) 438 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 439 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 440 #endif 441 row = im[i]; 442 if (row >= rstart_orig && row < rend_orig) { 443 for ( j=0; j<n; j++ ) { 444 col = in[j]; 445 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 446 /* Look up into the Hash Table */ 447 key = (row/bs)*Nbs+(col/bs)+1; 448 h1 = HASH(size,key,tmp); 449 450 451 idx = h1; 452 #if defined(USE_PETSC_BOPT_g) 453 insert_ct++; 454 total_ct++; 455 if (HT[idx] != key) { 456 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 457 if (idx == size) { 458 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 459 if (idx == h1) { 460 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 461 } 462 } 463 } 464 #else 465 if (HT[idx] != key) { 466 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 467 if (idx == size) { 468 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 469 if (idx == h1) { 470 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 471 } 472 } 473 } 474 #endif 475 /* A HASH table entry is found, so insert the values at the correct address */ 476 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 477 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 478 } 479 } else { 480 if (!baij->donotstash) { 481 if (roworiented) { 482 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 483 } else { 484 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 485 } 486 } 487 } 488 } 489 #if defined(USE_PETSC_BOPT_g) 490 baij->ht_total_ct = total_ct; 491 baij->ht_insert_ct = insert_ct; 492 #endif 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNC__ 497 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 498 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 499 { 500 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 501 int ierr,i,j,ii,jj,row,col; 502 int roworiented = baij->roworiented,rstart=baij->rstart ; 503 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 504 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 505 double tmp; 506 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 507 #if defined(USE_PETSC_BOPT_g) 508 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 509 #endif 510 511 PetscFunctionBegin; 512 513 if (roworiented) { 514 stepval = (n-1)*bs; 515 } else { 516 stepval = (m-1)*bs; 517 } 518 for ( i=0; i<m; i++ ) { 519 #if defined(USE_PETSC_BOPT_g) 520 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 521 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 522 #endif 523 row = im[i]; 524 v_t = v + i*bs2; 525 if (row >= rstart && row < rend) { 526 for ( j=0; j<n; j++ ) { 527 col = in[j]; 528 529 /* Look up into the Hash Table */ 530 key = row*Nbs+col+1; 531 h1 = HASH(size,key,tmp); 532 533 idx = h1; 534 #if defined(USE_PETSC_BOPT_g) 535 total_ct++; 536 insert_ct++; 537 if (HT[idx] != key) { 538 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 539 if (idx == size) { 540 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 541 if (idx == h1) { 542 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 543 } 544 } 545 } 546 #else 547 if (HT[idx] != key) { 548 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 549 if (idx == size) { 550 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 551 if (idx == h1) { 552 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 553 } 554 } 555 } 556 #endif 557 baij_a = HD[idx]; 558 if (roworiented) { 559 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 560 /* value = v + (i*(stepval+bs)+j)*bs; */ 561 value = v_t; 562 v_t += bs; 563 if (addv == ADD_VALUES) { 564 for ( ii=0; ii<bs; ii++,value+=stepval) { 565 for ( jj=ii; jj<bs2; jj+=bs ) { 566 baij_a[jj] += *value++; 567 } 568 } 569 } else { 570 for ( ii=0; ii<bs; ii++,value+=stepval) { 571 for ( jj=ii; jj<bs2; jj+=bs ) { 572 baij_a[jj] = *value++; 573 } 574 } 575 } 576 } else { 577 value = v + j*(stepval+bs)*bs + i*bs; 578 if (addv == ADD_VALUES) { 579 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 580 for ( jj=0; jj<bs; jj++ ) { 581 baij_a[jj] += *value++; 582 } 583 } 584 } else { 585 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 586 for ( jj=0; jj<bs; jj++ ) { 587 baij_a[jj] = *value++; 588 } 589 } 590 } 591 } 592 } 593 } else { 594 if (!baij->donotstash) { 595 if (roworiented) { 596 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 597 } else { 598 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 599 } 600 } 601 } 602 } 603 #if defined(USE_PETSC_BOPT_g) 604 baij->ht_total_ct = total_ct; 605 baij->ht_insert_ct = insert_ct; 606 #endif 607 PetscFunctionReturn(0); 608 } 609 610 #undef __FUNC__ 611 #define __FUNC__ "MatGetValues_MPIBAIJ" 612 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 613 { 614 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 615 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 616 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 617 618 PetscFunctionBegin; 619 for ( i=0; i<m; i++ ) { 620 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 621 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 622 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 623 row = idxm[i] - bsrstart; 624 for ( j=0; j<n; j++ ) { 625 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 626 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 627 if (idxn[j] >= bscstart && idxn[j] < bscend){ 628 col = idxn[j] - bscstart; 629 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 630 } else { 631 if (!baij->colmap) { 632 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 633 } 634 #if defined (USE_CTABLE) 635 ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 636 data --; 637 #else 638 data = baij->colmap[idxn[j]/bs]-1; 639 #endif 640 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 641 else { 642 col = data + idxn[j]%bs; 643 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 644 } 645 } 646 } 647 } else { 648 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 649 } 650 } 651 PetscFunctionReturn(0); 652 } 653 654 #undef __FUNC__ 655 #define __FUNC__ "MatNorm_MPIBAIJ" 656 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 657 { 658 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 659 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 660 int ierr, i,bs2=baij->bs2; 661 double sum = 0.0; 662 Scalar *v; 663 664 PetscFunctionBegin; 665 if (baij->size == 1) { 666 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 667 } else { 668 if (type == NORM_FROBENIUS) { 669 v = amat->a; 670 for (i=0; i<amat->nz*bs2; i++ ) { 671 #if defined(USE_PETSC_COMPLEX) 672 sum += PetscReal(PetscConj(*v)*(*v)); v++; 673 #else 674 sum += (*v)*(*v); v++; 675 #endif 676 } 677 v = bmat->a; 678 for (i=0; i<bmat->nz*bs2; i++ ) { 679 #if defined(USE_PETSC_COMPLEX) 680 sum += PetscReal(PetscConj(*v)*(*v)); v++; 681 #else 682 sum += (*v)*(*v); v++; 683 #endif 684 } 685 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 686 *norm = sqrt(*norm); 687 } else { 688 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 689 } 690 } 691 PetscFunctionReturn(0); 692 } 693 694 695 /* 696 Creates the hash table, and sets the table 697 This table is created only once. 698 If new entried need to be added to the matrix 699 then the hash table has to be destroyed and 700 recreated. 701 */ 702 #undef __FUNC__ 703 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 704 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 705 { 706 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 707 Mat A = baij->A, B=baij->B; 708 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 709 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 710 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 711 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 712 int *HT,key; 713 Scalar **HD; 714 double tmp; 715 #if defined(USE_PETSC_BOPT_g) 716 int ct=0,max=0; 717 #endif 718 719 PetscFunctionBegin; 720 baij->ht_size=(int)(factor*nz); 721 size = baij->ht_size; 722 723 if (baij->ht) { 724 PetscFunctionReturn(0); 725 } 726 727 /* Allocate Memory for Hash Table */ 728 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd); 729 baij->ht = (int*)(baij->hd + size); 730 HD = baij->hd; 731 HT = baij->ht; 732 733 734 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 735 736 737 /* Loop Over A */ 738 for ( i=0; i<a->mbs; i++ ) { 739 for ( j=ai[i]; j<ai[i+1]; j++ ) { 740 row = i+rstart; 741 col = aj[j]+cstart; 742 743 key = row*Nbs + col + 1; 744 h1 = HASH(size,key,tmp); 745 for ( k=0; k<size; k++ ){ 746 if (HT[(h1+k)%size] == 0.0) { 747 HT[(h1+k)%size] = key; 748 HD[(h1+k)%size] = a->a + j*bs2; 749 break; 750 #if defined(USE_PETSC_BOPT_g) 751 } else { 752 ct++; 753 #endif 754 } 755 } 756 #if defined(USE_PETSC_BOPT_g) 757 if (k> max) max = k; 758 #endif 759 } 760 } 761 /* Loop Over B */ 762 for ( i=0; i<b->mbs; i++ ) { 763 for ( j=bi[i]; j<bi[i+1]; j++ ) { 764 row = i+rstart; 765 col = garray[bj[j]]; 766 key = row*Nbs + col + 1; 767 h1 = HASH(size,key,tmp); 768 for ( k=0; k<size; k++ ){ 769 if (HT[(h1+k)%size] == 0.0) { 770 HT[(h1+k)%size] = key; 771 HD[(h1+k)%size] = b->a + j*bs2; 772 break; 773 #if defined(USE_PETSC_BOPT_g) 774 } else { 775 ct++; 776 #endif 777 } 778 } 779 #if defined(USE_PETSC_BOPT_g) 780 if (k> max) max = k; 781 #endif 782 } 783 } 784 785 /* Print Summary */ 786 #if defined(USE_PETSC_BOPT_g) 787 for ( i=0,j=0; i<size; i++) 788 if (HT[i]) {j++;} 789 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 790 (j== 0)? 0.0:((double)(ct+j))/j,max); 791 #endif 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNC__ 796 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 797 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 800 int ierr,nstash,reallocs; 801 InsertMode addv; 802 803 PetscFunctionBegin; 804 if (baij->donotstash) { 805 PetscFunctionReturn(0); 806 } 807 808 /* make sure all processors are either in INSERTMODE or ADDMODE */ 809 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 810 if (addv == (ADD_VALUES|INSERT_VALUES)) { 811 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 812 } 813 mat->insertmode = addv; /* in case this processor had no cache */ 814 815 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 816 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 817 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 818 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 819 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 820 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNC__ 825 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 826 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 827 { 828 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 829 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 830 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 831 int *row,*col,other_disassembled,r1,r2,r3; 832 Scalar *val; 833 InsertMode addv = mat->insertmode; 834 835 PetscFunctionBegin; 836 if (!baij->donotstash) { 837 while (1) { 838 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 839 if (!flg) break; 840 841 for ( i=0; i<n; ) { 842 /* Now identify the consecutive vals belonging to the same row */ 843 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 844 if (j < n) ncols = j-i; 845 else ncols = n-i; 846 /* Now assemble all these values with a single function call */ 847 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 848 i = j; 849 } 850 } 851 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 852 /* Now process the block-stash. Since the values are stashed column-oriented, 853 set the roworiented flag to column oriented, and after MatSetValues() 854 restore the original flags */ 855 r1 = baij->roworiented; 856 r2 = a->roworiented; 857 r3 = b->roworiented; 858 baij->roworiented = 0; 859 a->roworiented = 0; 860 b->roworiented = 0; 861 while (1) { 862 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 863 if (!flg) break; 864 865 for ( i=0; i<n; ) { 866 /* Now identify the consecutive vals belonging to the same row */ 867 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 868 if (j < n) ncols = j-i; 869 else ncols = n-i; 870 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 871 i = j; 872 } 873 } 874 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 875 baij->roworiented = r1; 876 a->roworiented = r2; 877 b->roworiented = r3; 878 } 879 880 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 881 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 882 883 /* determine if any processor has disassembled, if so we must 884 also disassemble ourselfs, in order that we may reassemble. */ 885 /* 886 if nonzero structure of submatrix B cannot change then we know that 887 no processor disassembled thus we can skip this stuff 888 */ 889 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 890 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 891 if (mat->was_assembled && !other_disassembled) { 892 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 893 } 894 } 895 896 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 897 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 898 } 899 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 900 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 901 902 #if defined(USE_PETSC_BOPT_g) 903 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 904 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 905 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 906 baij->ht_total_ct = 0; 907 baij->ht_insert_ct = 0; 908 } 909 #endif 910 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 911 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 912 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 913 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 914 } 915 916 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 917 PetscFunctionReturn(0); 918 } 919 920 #undef __FUNC__ 921 #define __FUNC__ "MatView_MPIBAIJ_Binary" 922 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 923 { 924 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 925 int ierr; 926 927 PetscFunctionBegin; 928 if (baij->size == 1) { 929 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 930 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNC__ 935 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 936 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 937 { 938 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 939 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 940 FILE *fd; 941 ViewerType vtype; 942 943 PetscFunctionBegin; 944 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 945 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 946 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 947 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 948 MatInfo info; 949 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 950 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 951 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 952 ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr); 953 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 954 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 955 baij->bs,(int)info.memory); 956 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 957 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 958 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 959 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 960 fflush(fd); 961 ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr); 962 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 965 ierr = PetscPrintf(mat->comm," block size is %d\n",bs);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 } 969 970 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 971 Draw draw; 972 PetscTruth isnull; 973 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 974 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 975 } 976 977 if (size == 1) { 978 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 979 } else { 980 /* assemble the entire matrix onto first processor. */ 981 Mat A; 982 Mat_SeqBAIJ *Aloc; 983 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 984 int mbs = baij->mbs; 985 Scalar *a; 986 987 if (!rank) { 988 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 989 } else { 990 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 991 } 992 PLogObjectParent(mat,A); 993 994 /* copy over the A part */ 995 Aloc = (Mat_SeqBAIJ*) baij->A->data; 996 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 997 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 998 999 for ( i=0; i<mbs; i++ ) { 1000 rvals[0] = bs*(baij->rstart + i); 1001 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1002 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1003 col = (baij->cstart+aj[j])*bs; 1004 for (k=0; k<bs; k++ ) { 1005 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1006 col++; a += bs; 1007 } 1008 } 1009 } 1010 /* copy over the B part */ 1011 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1012 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1013 for ( i=0; i<mbs; i++ ) { 1014 rvals[0] = bs*(baij->rstart + i); 1015 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1016 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1017 col = baij->garray[aj[j]]*bs; 1018 for (k=0; k<bs; k++ ) { 1019 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1020 col++; a += bs; 1021 } 1022 } 1023 } 1024 PetscFree(rvals); 1025 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1026 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1027 /* 1028 Everyone has to call to draw the matrix since the graphics waits are 1029 synchronized across all processors that share the Draw object 1030 */ 1031 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1032 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 1033 } 1034 ierr = MatDestroy(A);CHKERRQ(ierr); 1035 } 1036 PetscFunctionReturn(0); 1037 } 1038 1039 1040 1041 #undef __FUNC__ 1042 #define __FUNC__ "MatView_MPIBAIJ" 1043 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1044 { 1045 int ierr; 1046 ViewerType vtype; 1047 1048 PetscFunctionBegin; 1049 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 1050 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1051 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 1052 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1053 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1054 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1055 } else { 1056 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNC__ 1062 #define __FUNC__ "MatDestroy_MPIBAIJ" 1063 int MatDestroy_MPIBAIJ(Mat mat) 1064 { 1065 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1066 int ierr; 1067 1068 PetscFunctionBegin; 1069 if (--mat->refct > 0) PetscFunctionReturn(0); 1070 1071 if (mat->mapping) { 1072 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1073 } 1074 if (mat->bmapping) { 1075 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1076 } 1077 if (mat->rmap) { 1078 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1079 } 1080 if (mat->cmap) { 1081 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1082 } 1083 #if defined(USE_PETSC_LOG) 1084 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1085 #endif 1086 1087 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1088 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1089 1090 PetscFree(baij->rowners); 1091 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1092 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1093 #if defined (USE_CTABLE) 1094 if (baij->colmap) TableDelete(baij->colmap); 1095 #else 1096 if (baij->colmap) PetscFree(baij->colmap); 1097 #endif 1098 if (baij->garray) PetscFree(baij->garray); 1099 if (baij->lvec) VecDestroy(baij->lvec); 1100 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1101 if (baij->rowvalues) PetscFree(baij->rowvalues); 1102 if (baij->barray) PetscFree(baij->barray); 1103 if (baij->hd) PetscFree(baij->hd); 1104 PetscFree(baij); 1105 PLogObjectDestroy(mat); 1106 PetscHeaderDestroy(mat); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNC__ 1111 #define __FUNC__ "MatMult_MPIBAIJ" 1112 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1113 { 1114 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1115 int ierr, nt; 1116 1117 PetscFunctionBegin; 1118 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1119 if (nt != a->n) { 1120 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1121 } 1122 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1123 if (nt != a->m) { 1124 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1125 } 1126 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1127 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1128 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1129 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1130 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNC__ 1135 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1136 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1137 { 1138 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1139 int ierr; 1140 1141 PetscFunctionBegin; 1142 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1143 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1144 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1145 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNC__ 1150 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1151 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1152 { 1153 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1154 int ierr; 1155 1156 PetscFunctionBegin; 1157 /* do nondiagonal part */ 1158 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1159 /* send it on its way */ 1160 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1161 /* do local part */ 1162 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 1163 /* receive remote parts: note this assumes the values are not actually */ 1164 /* inserted in yy until the next line, which is true for my implementation*/ 1165 /* but is not perhaps always true. */ 1166 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNC__ 1171 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1172 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1173 { 1174 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1175 int ierr; 1176 1177 PetscFunctionBegin; 1178 /* do nondiagonal part */ 1179 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1180 /* send it on its way */ 1181 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1182 /* do local part */ 1183 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1184 /* receive remote parts: note this assumes the values are not actually */ 1185 /* inserted in yy until the next line, which is true for my implementation*/ 1186 /* but is not perhaps always true. */ 1187 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 /* 1192 This only works correctly for square matrices where the subblock A->A is the 1193 diagonal block 1194 */ 1195 #undef __FUNC__ 1196 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1197 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1198 { 1199 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1200 int ierr; 1201 1202 PetscFunctionBegin; 1203 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1204 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNC__ 1209 #define __FUNC__ "MatScale_MPIBAIJ" 1210 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1211 { 1212 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1213 int ierr; 1214 1215 PetscFunctionBegin; 1216 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1217 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #undef __FUNC__ 1222 #define __FUNC__ "MatGetSize_MPIBAIJ" 1223 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1224 { 1225 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1226 1227 PetscFunctionBegin; 1228 if (m) *m = mat->M; 1229 if (n) *n = mat->N; 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNC__ 1234 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1235 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1236 { 1237 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1238 1239 PetscFunctionBegin; 1240 *m = mat->m; *n = mat->n; 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNC__ 1245 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1246 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1247 { 1248 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1249 1250 PetscFunctionBegin; 1251 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNC__ 1256 #define __FUNC__ "MatGetRow_MPIBAIJ" 1257 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1258 { 1259 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1260 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1261 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1262 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1263 int *cmap, *idx_p,cstart = mat->cstart; 1264 1265 PetscFunctionBegin; 1266 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1267 mat->getrowactive = PETSC_TRUE; 1268 1269 if (!mat->rowvalues && (idx || v)) { 1270 /* 1271 allocate enough space to hold information from the longest row. 1272 */ 1273 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1274 int max = 1,mbs = mat->mbs,tmp; 1275 for ( i=0; i<mbs; i++ ) { 1276 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1277 if (max < tmp) { max = tmp; } 1278 } 1279 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1280 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1281 } 1282 1283 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1284 lrow = row - brstart; 1285 1286 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1287 if (!v) {pvA = 0; pvB = 0;} 1288 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1289 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1290 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1291 nztot = nzA + nzB; 1292 1293 cmap = mat->garray; 1294 if (v || idx) { 1295 if (nztot) { 1296 /* Sort by increasing column numbers, assuming A and B already sorted */ 1297 int imark = -1; 1298 if (v) { 1299 *v = v_p = mat->rowvalues; 1300 for ( i=0; i<nzB; i++ ) { 1301 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1302 else break; 1303 } 1304 imark = i; 1305 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1306 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1307 } 1308 if (idx) { 1309 *idx = idx_p = mat->rowindices; 1310 if (imark > -1) { 1311 for ( i=0; i<imark; i++ ) { 1312 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1313 } 1314 } else { 1315 for ( i=0; i<nzB; i++ ) { 1316 if (cmap[cworkB[i]/bs] < cstart) 1317 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1318 else break; 1319 } 1320 imark = i; 1321 } 1322 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1323 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1324 } 1325 } else { 1326 if (idx) *idx = 0; 1327 if (v) *v = 0; 1328 } 1329 } 1330 *nz = nztot; 1331 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1332 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1333 PetscFunctionReturn(0); 1334 } 1335 1336 #undef __FUNC__ 1337 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1338 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1339 { 1340 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1341 1342 PetscFunctionBegin; 1343 if (baij->getrowactive == PETSC_FALSE) { 1344 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1345 } 1346 baij->getrowactive = PETSC_FALSE; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNC__ 1351 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1352 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1353 { 1354 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1355 1356 PetscFunctionBegin; 1357 *bs = baij->bs; 1358 PetscFunctionReturn(0); 1359 } 1360 1361 #undef __FUNC__ 1362 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1363 int MatZeroEntries_MPIBAIJ(Mat A) 1364 { 1365 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1366 int ierr; 1367 1368 PetscFunctionBegin; 1369 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1370 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNC__ 1375 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1376 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1377 { 1378 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1379 Mat A = a->A, B = a->B; 1380 int ierr; 1381 double isend[5], irecv[5]; 1382 1383 PetscFunctionBegin; 1384 info->block_size = (double)a->bs; 1385 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1386 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1387 isend[3] = info->memory; isend[4] = info->mallocs; 1388 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1389 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1390 isend[3] += info->memory; isend[4] += info->mallocs; 1391 if (flag == MAT_LOCAL) { 1392 info->nz_used = isend[0]; 1393 info->nz_allocated = isend[1]; 1394 info->nz_unneeded = isend[2]; 1395 info->memory = isend[3]; 1396 info->mallocs = isend[4]; 1397 } else if (flag == MAT_GLOBAL_MAX) { 1398 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1399 info->nz_used = irecv[0]; 1400 info->nz_allocated = irecv[1]; 1401 info->nz_unneeded = irecv[2]; 1402 info->memory = irecv[3]; 1403 info->mallocs = irecv[4]; 1404 } else if (flag == MAT_GLOBAL_SUM) { 1405 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1406 info->nz_used = irecv[0]; 1407 info->nz_allocated = irecv[1]; 1408 info->nz_unneeded = irecv[2]; 1409 info->memory = irecv[3]; 1410 info->mallocs = irecv[4]; 1411 } else { 1412 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1413 } 1414 info->rows_global = (double)a->M; 1415 info->columns_global = (double)a->N; 1416 info->rows_local = (double)a->m; 1417 info->columns_local = (double)a->N; 1418 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1419 info->fill_ratio_needed = 0; 1420 info->factor_mallocs = 0; 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNC__ 1425 #define __FUNC__ "MatSetOption_MPIBAIJ" 1426 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1427 { 1428 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1429 int ierr; 1430 1431 PetscFunctionBegin; 1432 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1433 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1434 op == MAT_COLUMNS_UNSORTED || 1435 op == MAT_COLUMNS_SORTED || 1436 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1437 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1438 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1439 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1440 } else if (op == MAT_ROW_ORIENTED) { 1441 a->roworiented = 1; 1442 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1443 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1444 } else if (op == MAT_ROWS_SORTED || 1445 op == MAT_ROWS_UNSORTED || 1446 op == MAT_SYMMETRIC || 1447 op == MAT_STRUCTURALLY_SYMMETRIC || 1448 op == MAT_YES_NEW_DIAGONALS || 1449 op == MAT_USE_HASH_TABLE) { 1450 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1451 } else if (op == MAT_COLUMN_ORIENTED) { 1452 a->roworiented = 0; 1453 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1454 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1455 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1456 a->donotstash = 1; 1457 } else if (op == MAT_NO_NEW_DIAGONALS) { 1458 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1459 } else if (op == MAT_USE_HASH_TABLE) { 1460 a->ht_flag = 1; 1461 } else { 1462 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1463 } 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNC__ 1468 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1469 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1470 { 1471 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1472 Mat_SeqBAIJ *Aloc; 1473 Mat B; 1474 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1475 int bs=baij->bs,mbs=baij->mbs; 1476 Scalar *a; 1477 1478 PetscFunctionBegin; 1479 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1480 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1481 CHKERRQ(ierr); 1482 1483 /* copy over the A part */ 1484 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1485 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1486 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1487 1488 for ( i=0; i<mbs; i++ ) { 1489 rvals[0] = bs*(baij->rstart + i); 1490 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1491 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1492 col = (baij->cstart+aj[j])*bs; 1493 for (k=0; k<bs; k++ ) { 1494 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1495 col++; a += bs; 1496 } 1497 } 1498 } 1499 /* copy over the B part */ 1500 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1501 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1502 for ( i=0; i<mbs; i++ ) { 1503 rvals[0] = bs*(baij->rstart + i); 1504 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1505 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1506 col = baij->garray[aj[j]]*bs; 1507 for (k=0; k<bs; k++ ) { 1508 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1509 col++; a += bs; 1510 } 1511 } 1512 } 1513 PetscFree(rvals); 1514 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1515 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1516 1517 if (matout != PETSC_NULL) { 1518 *matout = B; 1519 } else { 1520 PetscOps *Abops; 1521 MatOps Aops; 1522 1523 /* This isn't really an in-place transpose .... but free data structures from baij */ 1524 PetscFree(baij->rowners); 1525 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1526 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1527 #if defined (USE_CTABLE) 1528 if (baij->colmap) TableDelete(baij->colmap); 1529 #else 1530 if (baij->colmap) PetscFree(baij->colmap); 1531 #endif 1532 if (baij->garray) PetscFree(baij->garray); 1533 if (baij->lvec) VecDestroy(baij->lvec); 1534 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1535 PetscFree(baij); 1536 1537 /* 1538 This is horrible, horrible code. We need to keep the 1539 A pointers for the bops and ops but copy everything 1540 else from C. 1541 */ 1542 Abops = A->bops; 1543 Aops = A->ops; 1544 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1545 A->bops = Abops; 1546 A->ops = Aops; 1547 1548 PetscHeaderDestroy(B); 1549 } 1550 PetscFunctionReturn(0); 1551 } 1552 1553 #undef __FUNC__ 1554 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1555 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1556 { 1557 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1558 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1559 int ierr,s1,s2,s3; 1560 1561 PetscFunctionBegin; 1562 if (ll) { 1563 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1564 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 1565 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1566 ierr = MatDiagonalScale(a,ll,0);CHKERRQ(ierr); 1567 ierr = MatDiagonalScale(b,ll,0);CHKERRQ(ierr); 1568 } 1569 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #undef __FUNC__ 1574 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1575 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1576 { 1577 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1578 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1579 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1580 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1581 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1582 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1583 MPI_Comm comm = A->comm; 1584 MPI_Request *send_waits,*recv_waits; 1585 MPI_Status recv_status,*send_status; 1586 IS istmp; 1587 1588 PetscFunctionBegin; 1589 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1590 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1591 1592 /* first count number of contributors to each processor */ 1593 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 1594 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1595 procs = nprocs + size; 1596 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1597 for ( i=0; i<N; i++ ) { 1598 idx = rows[i]; 1599 found = 0; 1600 for ( j=0; j<size; j++ ) { 1601 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1602 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1603 } 1604 } 1605 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1606 } 1607 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1608 1609 /* inform other processors of number of messages and max length*/ 1610 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 1611 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1612 nrecvs = work[rank]; 1613 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1614 nmax = work[rank]; 1615 PetscFree(work); 1616 1617 /* post receives: */ 1618 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1619 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1620 for ( i=0; i<nrecvs; i++ ) { 1621 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1622 } 1623 1624 /* do sends: 1625 1) starts[i] gives the starting index in svalues for stuff going to 1626 the ith processor 1627 */ 1628 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 1629 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1630 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 1631 starts[0] = 0; 1632 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1633 for ( i=0; i<N; i++ ) { 1634 svalues[starts[owner[i]]++] = rows[i]; 1635 } 1636 ISRestoreIndices(is,&rows); 1637 1638 starts[0] = 0; 1639 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1640 count = 0; 1641 for ( i=0; i<size; i++ ) { 1642 if (procs[i]) { 1643 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1644 } 1645 } 1646 PetscFree(starts); 1647 1648 base = owners[rank]*bs; 1649 1650 /* wait on receives */ 1651 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 1652 source = lens + nrecvs; 1653 count = nrecvs; slen = 0; 1654 while (count) { 1655 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1656 /* unpack receives into our local space */ 1657 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1658 source[imdex] = recv_status.MPI_SOURCE; 1659 lens[imdex] = n; 1660 slen += n; 1661 count--; 1662 } 1663 PetscFree(recv_waits); 1664 1665 /* move the data into the send scatter */ 1666 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 1667 count = 0; 1668 for ( i=0; i<nrecvs; i++ ) { 1669 values = rvalues + i*nmax; 1670 for ( j=0; j<lens[i]; j++ ) { 1671 lrows[count++] = values[j] - base; 1672 } 1673 } 1674 PetscFree(rvalues); PetscFree(lens); 1675 PetscFree(owner); PetscFree(nprocs); 1676 1677 /* actually zap the local rows */ 1678 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1679 PLogObjectParent(A,istmp); 1680 1681 /* 1682 Zero the required rows. If the "diagonal block" of the matrix 1683 is square and the user wishes to set the diagonal we use seperate 1684 code so that MatSetValues() is not called for each diagonal allocating 1685 new memory, thus calling lots of mallocs and slowing things down. 1686 1687 Contributed by: Mathew Knepley 1688 */ 1689 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1690 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 1691 if (diag && (l->A->M == l->A->N)) { 1692 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 1693 } else if (diag) { 1694 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1695 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1696 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1697 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1698 } 1699 for ( i = 0; i < slen; i++ ) { 1700 row = lrows[i] + rstart_bs; 1701 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1702 } 1703 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1704 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1705 } else { 1706 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1707 } 1708 1709 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1710 PetscFree(lrows); 1711 1712 /* wait on sends */ 1713 if (nsends) { 1714 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1715 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1716 PetscFree(send_status); 1717 } 1718 PetscFree(send_waits); PetscFree(svalues); 1719 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNC__ 1724 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1725 int MatPrintHelp_MPIBAIJ(Mat A) 1726 { 1727 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1728 MPI_Comm comm = A->comm; 1729 static int called = 0; 1730 int ierr; 1731 1732 PetscFunctionBegin; 1733 if (!a->rank) { 1734 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1735 } 1736 if (called) {PetscFunctionReturn(0);} else called = 1; 1737 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1738 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNC__ 1743 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1744 int MatSetUnfactored_MPIBAIJ(Mat A) 1745 { 1746 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1747 int ierr; 1748 1749 PetscFunctionBegin; 1750 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1751 PetscFunctionReturn(0); 1752 } 1753 1754 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1755 1756 #undef __FUNC__ 1757 #define __FUNC__ "MatEqual_MPIBAIJ" 1758 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag) 1759 { 1760 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data; 1761 Mat a, b, c, d; 1762 PetscTruth flg; 1763 int ierr; 1764 1765 PetscFunctionBegin; 1766 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1767 a = matA->A; b = matA->B; 1768 c = matB->A; d = matB->B; 1769 1770 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1771 if (flg == PETSC_TRUE) { 1772 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1773 } 1774 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 /* -------------------------------------------------------------------*/ 1779 static struct _MatOps MatOps_Values = { 1780 MatSetValues_MPIBAIJ, 1781 MatGetRow_MPIBAIJ, 1782 MatRestoreRow_MPIBAIJ, 1783 MatMult_MPIBAIJ, 1784 MatMultAdd_MPIBAIJ, 1785 MatMultTrans_MPIBAIJ, 1786 MatMultTransAdd_MPIBAIJ, 1787 0, 1788 0, 1789 0, 1790 0, 1791 0, 1792 0, 1793 0, 1794 MatTranspose_MPIBAIJ, 1795 MatGetInfo_MPIBAIJ, 1796 MatEqual_MPIBAIJ, 1797 MatGetDiagonal_MPIBAIJ, 1798 MatDiagonalScale_MPIBAIJ, 1799 MatNorm_MPIBAIJ, 1800 MatAssemblyBegin_MPIBAIJ, 1801 MatAssemblyEnd_MPIBAIJ, 1802 0, 1803 MatSetOption_MPIBAIJ, 1804 MatZeroEntries_MPIBAIJ, 1805 MatZeroRows_MPIBAIJ, 1806 0, 1807 0, 1808 0, 1809 0, 1810 MatGetSize_MPIBAIJ, 1811 MatGetLocalSize_MPIBAIJ, 1812 MatGetOwnershipRange_MPIBAIJ, 1813 0, 1814 0, 1815 0, 1816 0, 1817 MatDuplicate_MPIBAIJ, 1818 0, 1819 0, 1820 0, 1821 0, 1822 0, 1823 MatGetSubMatrices_MPIBAIJ, 1824 MatIncreaseOverlap_MPIBAIJ, 1825 MatGetValues_MPIBAIJ, 1826 0, 1827 MatPrintHelp_MPIBAIJ, 1828 MatScale_MPIBAIJ, 1829 0, 1830 0, 1831 0, 1832 MatGetBlockSize_MPIBAIJ, 1833 0, 1834 0, 1835 0, 1836 0, 1837 0, 1838 0, 1839 MatSetUnfactored_MPIBAIJ, 1840 0, 1841 MatSetValuesBlocked_MPIBAIJ, 1842 0, 1843 0, 1844 0, 1845 MatGetMaps_Petsc}; 1846 1847 1848 EXTERN_C_BEGIN 1849 #undef __FUNC__ 1850 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1851 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1852 { 1853 PetscFunctionBegin; 1854 *a = ((Mat_MPIBAIJ *)A->data)->A; 1855 *iscopy = PETSC_FALSE; 1856 PetscFunctionReturn(0); 1857 } 1858 EXTERN_C_END 1859 1860 #undef __FUNC__ 1861 #define __FUNC__ "MatCreateMPIBAIJ" 1862 /*@C 1863 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1864 (block compressed row). For good matrix assembly performance 1865 the user should preallocate the matrix storage by setting the parameters 1866 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1867 performance can be increased by more than a factor of 50. 1868 1869 Collective on MPI_Comm 1870 1871 Input Parameters: 1872 + comm - MPI communicator 1873 . bs - size of blockk 1874 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1875 This value should be the same as the local size used in creating the 1876 y vector for the matrix-vector product y = Ax. 1877 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1878 This value should be the same as the local size used in creating the 1879 x vector for the matrix-vector product y = Ax. 1880 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1881 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1882 . d_nz - number of block nonzeros per block row in diagonal portion of local 1883 submatrix (same for all local rows) 1884 . d_nnz - array containing the number of block nonzeros in the various block rows 1885 of the in diagonal portion of the local (possibly different for each block 1886 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1887 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1888 submatrix (same for all local rows). 1889 - o_nnz - array containing the number of nonzeros in the various block rows of the 1890 off-diagonal portion of the local submatrix (possibly different for 1891 each block row) or PETSC_NULL. 1892 1893 Output Parameter: 1894 . A - the matrix 1895 1896 Options Database Keys: 1897 . -mat_no_unroll - uses code that does not unroll the loops in the 1898 block calculations (much slower) 1899 . -mat_block_size - size of the blocks to use 1900 . -mat_mpi - use the parallel matrix data structures even on one processor 1901 (defaults to using SeqBAIJ format on one processor) 1902 1903 Notes: 1904 The user MUST specify either the local or global matrix dimensions 1905 (possibly both). 1906 1907 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1908 than it must be used on all processors that share the object for that argument. 1909 1910 Storage Information: 1911 For a square global matrix we define each processor's diagonal portion 1912 to be its local rows and the corresponding columns (a square submatrix); 1913 each processor's off-diagonal portion encompasses the remainder of the 1914 local matrix (a rectangular submatrix). 1915 1916 The user can specify preallocated storage for the diagonal part of 1917 the local submatrix with either d_nz or d_nnz (not both). Set 1918 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1919 memory allocation. Likewise, specify preallocated storage for the 1920 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1921 1922 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1923 the figure below we depict these three local rows and all columns (0-11). 1924 1925 .vb 1926 0 1 2 3 4 5 6 7 8 9 10 11 1927 ------------------- 1928 row 3 | o o o d d d o o o o o o 1929 row 4 | o o o d d d o o o o o o 1930 row 5 | o o o d d d o o o o o o 1931 ------------------- 1932 .ve 1933 1934 Thus, any entries in the d locations are stored in the d (diagonal) 1935 submatrix, and any entries in the o locations are stored in the 1936 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1937 stored simply in the MATSEQBAIJ format for compressed row storage. 1938 1939 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1940 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1941 In general, for PDE problems in which most nonzeros are near the diagonal, 1942 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1943 or you will get TERRIBLE performance; see the users' manual chapter on 1944 matrices. 1945 1946 Level: intermediate 1947 1948 .keywords: matrix, block, aij, compressed row, sparse, parallel 1949 1950 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1951 @*/ 1952 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1953 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1954 { 1955 Mat B; 1956 Mat_MPIBAIJ *b; 1957 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1958 int flag1 = 0,flag2 = 0; 1959 1960 PetscFunctionBegin; 1961 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1962 1963 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1964 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 1965 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1966 if (!flag1 && !flag2 && size == 1) { 1967 if (M == PETSC_DECIDE) M = m; 1968 if (N == PETSC_DECIDE) N = n; 1969 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1970 PetscFunctionReturn(0); 1971 } 1972 1973 *A = 0; 1974 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 1975 PLogObjectCreate(B); 1976 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 1977 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 1978 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1979 1980 B->ops->destroy = MatDestroy_MPIBAIJ; 1981 B->ops->view = MatView_MPIBAIJ; 1982 B->mapping = 0; 1983 B->factor = 0; 1984 B->assembled = PETSC_FALSE; 1985 1986 B->insertmode = NOT_SET_VALUES; 1987 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1988 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 1989 1990 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1991 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1992 } 1993 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1994 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1995 } 1996 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1997 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1998 } 1999 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2000 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2001 2002 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2003 work[0] = m; work[1] = n; 2004 mbs = m/bs; nbs = n/bs; 2005 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2006 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2007 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2008 } 2009 if (m == PETSC_DECIDE) { 2010 Mbs = M/bs; 2011 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2012 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2013 m = mbs*bs; 2014 } 2015 if (n == PETSC_DECIDE) { 2016 Nbs = N/bs; 2017 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2018 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2019 n = nbs*bs; 2020 } 2021 if (mbs*bs != m || nbs*bs != n) { 2022 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2023 } 2024 2025 b->m = m; B->m = m; 2026 b->n = n; B->n = n; 2027 b->N = N; B->N = N; 2028 b->M = M; B->M = M; 2029 b->bs = bs; 2030 b->bs2 = bs*bs; 2031 b->mbs = mbs; 2032 b->nbs = nbs; 2033 b->Mbs = Mbs; 2034 b->Nbs = Nbs; 2035 2036 /* the information in the maps duplicates the information computed below, eventually 2037 we should remove the duplicate information that is not contained in the maps */ 2038 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2039 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2040 2041 /* build local table of row and column ownerships */ 2042 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2043 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2044 b->cowners = b->rowners + b->size + 2; 2045 b->rowners_bs = b->cowners + b->size + 2; 2046 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2047 b->rowners[0] = 0; 2048 for ( i=2; i<=b->size; i++ ) { 2049 b->rowners[i] += b->rowners[i-1]; 2050 } 2051 for ( i=0; i<=b->size; i++ ) { 2052 b->rowners_bs[i] = b->rowners[i]*bs; 2053 } 2054 b->rstart = b->rowners[b->rank]; 2055 b->rend = b->rowners[b->rank+1]; 2056 b->rstart_bs = b->rstart * bs; 2057 b->rend_bs = b->rend * bs; 2058 2059 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2060 b->cowners[0] = 0; 2061 for ( i=2; i<=b->size; i++ ) { 2062 b->cowners[i] += b->cowners[i-1]; 2063 } 2064 b->cstart = b->cowners[b->rank]; 2065 b->cend = b->cowners[b->rank+1]; 2066 b->cstart_bs = b->cstart * bs; 2067 b->cend_bs = b->cend * bs; 2068 2069 2070 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2071 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2072 PLogObjectParent(B,b->A); 2073 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2074 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2075 PLogObjectParent(B,b->B); 2076 2077 /* build cache for off array entries formed */ 2078 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2079 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2080 b->donotstash = 0; 2081 b->colmap = 0; 2082 b->garray = 0; 2083 b->roworiented = 1; 2084 2085 /* stuff used in block assembly */ 2086 b->barray = 0; 2087 2088 /* stuff used for matrix vector multiply */ 2089 b->lvec = 0; 2090 b->Mvctx = 0; 2091 2092 /* stuff for MatGetRow() */ 2093 b->rowindices = 0; 2094 b->rowvalues = 0; 2095 b->getrowactive = PETSC_FALSE; 2096 2097 /* hash table stuff */ 2098 b->ht = 0; 2099 b->hd = 0; 2100 b->ht_size = 0; 2101 b->ht_flag = 0; 2102 b->ht_fact = 0; 2103 b->ht_total_ct = 0; 2104 b->ht_insert_ct = 0; 2105 2106 *A = B; 2107 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2108 if (flg) { 2109 double fact = 1.39; 2110 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2111 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr); 2112 if (fact <= 1.0) fact = 1.39; 2113 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2114 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2115 } 2116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 2117 "MatStoreValues_MPIBAIJ", 2118 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2119 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 2120 "MatRetrieveValues_MPIBAIJ", 2121 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2122 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2123 "MatGetDiagonalBlock_MPIBAIJ", 2124 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 #undef __FUNC__ 2129 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2130 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2131 { 2132 Mat mat; 2133 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2134 int ierr, len=0, flg; 2135 2136 PetscFunctionBegin; 2137 *newmat = 0; 2138 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2139 PLogObjectCreate(mat); 2140 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2141 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2142 mat->ops->destroy = MatDestroy_MPIBAIJ; 2143 mat->ops->view = MatView_MPIBAIJ; 2144 mat->factor = matin->factor; 2145 mat->assembled = PETSC_TRUE; 2146 mat->insertmode = NOT_SET_VALUES; 2147 2148 a->m = mat->m = oldmat->m; 2149 a->n = mat->n = oldmat->n; 2150 a->M = mat->M = oldmat->M; 2151 a->N = mat->N = oldmat->N; 2152 2153 a->bs = oldmat->bs; 2154 a->bs2 = oldmat->bs2; 2155 a->mbs = oldmat->mbs; 2156 a->nbs = oldmat->nbs; 2157 a->Mbs = oldmat->Mbs; 2158 a->Nbs = oldmat->Nbs; 2159 2160 a->rstart = oldmat->rstart; 2161 a->rend = oldmat->rend; 2162 a->cstart = oldmat->cstart; 2163 a->cend = oldmat->cend; 2164 a->size = oldmat->size; 2165 a->rank = oldmat->rank; 2166 a->donotstash = oldmat->donotstash; 2167 a->roworiented = oldmat->roworiented; 2168 a->rowindices = 0; 2169 a->rowvalues = 0; 2170 a->getrowactive = PETSC_FALSE; 2171 a->barray = 0; 2172 a->rstart_bs = oldmat->rstart_bs; 2173 a->rend_bs = oldmat->rend_bs; 2174 a->cstart_bs = oldmat->cstart_bs; 2175 a->cend_bs = oldmat->cend_bs; 2176 2177 /* hash table stuff */ 2178 a->ht = 0; 2179 a->hd = 0; 2180 a->ht_size = 0; 2181 a->ht_flag = oldmat->ht_flag; 2182 a->ht_fact = oldmat->ht_fact; 2183 a->ht_total_ct = 0; 2184 a->ht_insert_ct = 0; 2185 2186 2187 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2188 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2189 a->cowners = a->rowners + a->size + 2; 2190 a->rowners_bs = a->cowners + a->size + 2; 2191 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2192 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2193 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2194 if (oldmat->colmap) { 2195 #if defined (USE_CTABLE) 2196 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2197 #else 2198 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2199 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2200 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2201 #endif 2202 } else a->colmap = 0; 2203 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2204 a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2205 PLogObjectMemory(mat,len*sizeof(int)); 2206 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2207 } else a->garray = 0; 2208 2209 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2210 PLogObjectParent(mat,a->lvec); 2211 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2212 2213 PLogObjectParent(mat,a->Mvctx); 2214 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2215 PLogObjectParent(mat,a->A); 2216 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2217 PLogObjectParent(mat,a->B); 2218 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2219 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2220 if (flg) { 2221 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2222 } 2223 *newmat = mat; 2224 PetscFunctionReturn(0); 2225 } 2226 2227 #include "sys.h" 2228 2229 #undef __FUNC__ 2230 #define __FUNC__ "MatLoad_MPIBAIJ" 2231 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2232 { 2233 Mat A; 2234 int i, nz, ierr, j,rstart, rend, fd; 2235 Scalar *vals,*buf; 2236 MPI_Comm comm = ((PetscObject)viewer)->comm; 2237 MPI_Status status; 2238 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2239 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2240 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2241 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2242 int dcount,kmax,k,nzcount,tmp; 2243 2244 PetscFunctionBegin; 2245 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2246 2247 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2249 if (!rank) { 2250 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2251 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2252 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2253 if (header[3] < 0) { 2254 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2255 } 2256 } 2257 2258 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2259 M = header[1]; N = header[2]; 2260 2261 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2262 2263 /* 2264 This code adds extra rows to make sure the number of rows is 2265 divisible by the blocksize 2266 */ 2267 Mbs = M/bs; 2268 extra_rows = bs - M + bs*(Mbs); 2269 if (extra_rows == bs) extra_rows = 0; 2270 else Mbs++; 2271 if (extra_rows &&!rank) { 2272 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2273 } 2274 2275 /* determine ownership of all rows */ 2276 mbs = Mbs/size + ((Mbs % size) > rank); 2277 m = mbs * bs; 2278 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2279 browners = rowners + size + 1; 2280 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2281 rowners[0] = 0; 2282 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2283 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2284 rstart = rowners[rank]; 2285 rend = rowners[rank+1]; 2286 2287 /* distribute row lengths to all processors */ 2288 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens); 2289 if (!rank) { 2290 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths); 2291 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2292 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2293 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2294 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2295 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2296 PetscFree(sndcounts); 2297 } else { 2298 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2299 } 2300 2301 if (!rank) { 2302 /* calculate the number of nonzeros on each processor */ 2303 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2304 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2305 for ( i=0; i<size; i++ ) { 2306 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2307 procsnz[i] += rowlengths[j]; 2308 } 2309 } 2310 PetscFree(rowlengths); 2311 2312 /* determine max buffer needed and allocate it */ 2313 maxnz = 0; 2314 for ( i=0; i<size; i++ ) { 2315 maxnz = PetscMax(maxnz,procsnz[i]); 2316 } 2317 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2318 2319 /* read in my part of the matrix column indices */ 2320 nz = procsnz[0]; 2321 ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2322 mycols = ibuf; 2323 if (size == 1) nz -= extra_rows; 2324 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2325 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2326 2327 /* read in every ones (except the last) and ship off */ 2328 for ( i=1; i<size-1; i++ ) { 2329 nz = procsnz[i]; 2330 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2331 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2332 } 2333 /* read in the stuff for the last proc */ 2334 if ( size != 1 ) { 2335 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2336 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2337 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2338 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2339 } 2340 PetscFree(cols); 2341 } else { 2342 /* determine buffer space needed for message */ 2343 nz = 0; 2344 for ( i=0; i<m; i++ ) { 2345 nz += locrowlens[i]; 2346 } 2347 ibuf = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2348 mycols = ibuf; 2349 /* receive message of column indices*/ 2350 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2351 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2352 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2353 } 2354 2355 /* loop over local rows, determining number of off diagonal entries */ 2356 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens); 2357 odlens = dlens + (rend-rstart); 2358 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask); 2359 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2360 masked1 = mask + Mbs; 2361 masked2 = masked1 + Mbs; 2362 rowcount = 0; nzcount = 0; 2363 for ( i=0; i<mbs; i++ ) { 2364 dcount = 0; 2365 odcount = 0; 2366 for ( j=0; j<bs; j++ ) { 2367 kmax = locrowlens[rowcount]; 2368 for ( k=0; k<kmax; k++ ) { 2369 tmp = mycols[nzcount++]/bs; 2370 if (!mask[tmp]) { 2371 mask[tmp] = 1; 2372 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2373 else masked1[dcount++] = tmp; 2374 } 2375 } 2376 rowcount++; 2377 } 2378 2379 dlens[i] = dcount; 2380 odlens[i] = odcount; 2381 2382 /* zero out the mask elements we set */ 2383 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2384 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2385 } 2386 2387 /* create our matrix */ 2388 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2389 A = *newmat; 2390 MatSetOption(A,MAT_COLUMNS_SORTED); 2391 2392 if (!rank) { 2393 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf); 2394 /* read in my part of the matrix numerical values */ 2395 nz = procsnz[0]; 2396 vals = buf; 2397 mycols = ibuf; 2398 if (size == 1) nz -= extra_rows; 2399 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2400 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2401 2402 /* insert into matrix */ 2403 jj = rstart*bs; 2404 for ( i=0; i<m; i++ ) { 2405 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2406 mycols += locrowlens[i]; 2407 vals += locrowlens[i]; 2408 jj++; 2409 } 2410 /* read in other processors (except the last one) and ship out */ 2411 for ( i=1; i<size-1; i++ ) { 2412 nz = procsnz[i]; 2413 vals = buf; 2414 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2415 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2416 } 2417 /* the last proc */ 2418 if ( size != 1 ){ 2419 nz = procsnz[i] - extra_rows; 2420 vals = buf; 2421 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2422 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2423 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2424 } 2425 PetscFree(procsnz); 2426 } else { 2427 /* receive numeric values */ 2428 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf); 2429 2430 /* receive message of values*/ 2431 vals = buf; 2432 mycols = ibuf; 2433 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2434 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2435 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2436 2437 /* insert into matrix */ 2438 jj = rstart*bs; 2439 for ( i=0; i<m; i++ ) { 2440 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2441 mycols += locrowlens[i]; 2442 vals += locrowlens[i]; 2443 jj++; 2444 } 2445 } 2446 PetscFree(locrowlens); 2447 PetscFree(buf); 2448 PetscFree(ibuf); 2449 PetscFree(rowners); 2450 PetscFree(dlens); 2451 PetscFree(mask); 2452 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2453 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 2458 2459 #undef __FUNC__ 2460 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2461 /*@ 2462 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2463 2464 Input Parameters: 2465 . mat - the matrix 2466 . fact - factor 2467 2468 Collective on Mat 2469 2470 Level: advanced 2471 2472 Notes: 2473 This can also be set by the command line option: -mat_use_hash_table fact 2474 2475 .keywords: matrix, hashtable, factor, HT 2476 2477 .seealso: MatSetOption() 2478 @*/ 2479 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2480 { 2481 Mat_MPIBAIJ *baij; 2482 2483 PetscFunctionBegin; 2484 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2485 if (mat->type != MATMPIBAIJ) { 2486 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2487 } 2488 baij = (Mat_MPIBAIJ*) mat->data; 2489 baij->ht_fact = fact; 2490 PetscFunctionReturn(0); 2491 } 2492