1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.168 1999/05/04 20:32:41 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 EXTERN_C_BEGIN 20 #undef __FUNC__ 21 #define __FUNC__ "MatStoreValues_MPIBAIJ" 22 int MatStoreValues_MPIBAIJ(Mat mat) 23 { 24 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 25 int ierr; 26 27 PetscFunctionBegin; 28 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 29 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 EXTERN_C_END 33 34 EXTERN_C_BEGIN 35 #undef __FUNC__ 36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 37 int MatRetrieveValues_MPIBAIJ(Mat mat) 38 { 39 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 40 int ierr; 41 42 PetscFunctionBegin; 43 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 44 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 45 PetscFunctionReturn(0); 46 } 47 EXTERN_C_END 48 49 /* 50 Local utility routine that creates a mapping from the global column 51 number to the local number in the off-diagonal part of the local 52 storage of the matrix. This is done in a non scable way since the 53 length of colmap equals the global matrix length. 54 */ 55 #undef __FUNC__ 56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 57 static int CreateColmap_MPIBAIJ_Private(Mat mat) 58 { 59 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 60 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 61 int nbs = B->nbs,i,bs=B->bs,ierr; 62 63 PetscFunctionBegin; 64 #if defined (USE_CTABLE) 65 ierr = TableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 66 for ( i=0; i<nbs; i++ ){ 67 ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 68 } 69 #else 70 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 71 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 72 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 73 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 74 #endif 75 PetscFunctionReturn(0); 76 } 77 78 #define CHUNKSIZE 10 79 80 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 81 { \ 82 \ 83 brow = row/bs; \ 84 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 85 rmax = aimax[brow]; nrow = ailen[brow]; \ 86 bcol = col/bs; \ 87 ridx = row % bs; cidx = col % bs; \ 88 low = 0; high = nrow; \ 89 while (high-low > 3) { \ 90 t = (low+high)/2; \ 91 if (rp[t] > bcol) high = t; \ 92 else low = t; \ 93 } \ 94 for ( _i=low; _i<high; _i++ ) { \ 95 if (rp[_i] > bcol) break; \ 96 if (rp[_i] == bcol) { \ 97 bap = ap + bs2*_i + bs*cidx + ridx; \ 98 if (addv == ADD_VALUES) *bap += value; \ 99 else *bap = value; \ 100 goto a_noinsert; \ 101 } \ 102 } \ 103 if (a->nonew == 1) goto a_noinsert; \ 104 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 105 if (nrow >= rmax) { \ 106 /* there is no extra room in row, therefore enlarge */ \ 107 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 108 Scalar *new_a; \ 109 \ 110 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 111 \ 112 /* malloc new storage space */ \ 113 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 114 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 115 new_j = (int *) (new_a + bs2*new_nz); \ 116 new_i = new_j + new_nz; \ 117 \ 118 /* copy over old data into new slots */ \ 119 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 120 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 121 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 122 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 123 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 124 len*sizeof(int));CHKERRQ(ierr); \ 125 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 126 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 127 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 128 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 129 /* free up old matrix storage */ \ 130 PetscFree(a->a); \ 131 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 132 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 133 a->singlemalloc = 1; \ 134 \ 135 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 136 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 137 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 138 a->maxnz += bs2*CHUNKSIZE; \ 139 a->reallocs++; \ 140 a->nz++; \ 141 } \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for ( ii=N; ii>=_i; ii-- ) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); } \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 a_noinsert:; \ 152 ailen[brow] = nrow; \ 153 } 154 155 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 156 { \ 157 \ 158 brow = row/bs; \ 159 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 160 rmax = bimax[brow]; nrow = bilen[brow]; \ 161 bcol = col/bs; \ 162 ridx = row % bs; cidx = col % bs; \ 163 low = 0; high = nrow; \ 164 while (high-low > 3) { \ 165 t = (low+high)/2; \ 166 if (rp[t] > bcol) high = t; \ 167 else low = t; \ 168 } \ 169 for ( _i=low; _i<high; _i++ ) { \ 170 if (rp[_i] > bcol) break; \ 171 if (rp[_i] == bcol) { \ 172 bap = ap + bs2*_i + bs*cidx + ridx; \ 173 if (addv == ADD_VALUES) *bap += value; \ 174 else *bap = value; \ 175 goto b_noinsert; \ 176 } \ 177 } \ 178 if (b->nonew == 1) goto b_noinsert; \ 179 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 180 if (nrow >= rmax) { \ 181 /* there is no extra room in row, therefore enlarge */ \ 182 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 183 Scalar *new_a; \ 184 \ 185 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 186 \ 187 /* malloc new storage space */ \ 188 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 189 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 190 new_j = (int *) (new_a + bs2*new_nz); \ 191 new_i = new_j + new_nz; \ 192 \ 193 /* copy over old data into new slots */ \ 194 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 195 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 196 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 197 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 198 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 199 len*sizeof(int));CHKERRQ(ierr); \ 200 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 201 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 202 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 203 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 204 /* free up old matrix storage */ \ 205 PetscFree(b->a); \ 206 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 207 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 208 b->singlemalloc = 1; \ 209 \ 210 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 211 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 212 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 213 b->maxnz += bs2*CHUNKSIZE; \ 214 b->reallocs++; \ 215 b->nz++; \ 216 } \ 217 N = nrow++ - 1; \ 218 /* shift up all the later entries in this row */ \ 219 for ( ii=N; ii>=_i; ii-- ) { \ 220 rp[ii+1] = rp[ii]; \ 221 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 222 } \ 223 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);} \ 224 rp[_i] = bcol; \ 225 ap[bs2*_i + bs*cidx + ridx] = value; \ 226 b_noinsert:; \ 227 bilen[brow] = nrow; \ 228 } 229 230 #undef __FUNC__ 231 #define __FUNC__ "MatSetValues_MPIBAIJ" 232 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 233 { 234 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 235 Scalar value; 236 int ierr,i,j,row,col; 237 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 238 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 239 int cend_orig=baij->cend_bs,bs=baij->bs; 240 241 /* Some Variables required in the macro */ 242 Mat A = baij->A; 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 244 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 245 Scalar *aa=a->a; 246 247 Mat B = baij->B; 248 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 249 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 250 Scalar *ba=b->a; 251 252 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 253 int low,high,t,ridx,cidx,bs2=a->bs2; 254 Scalar *ap,*bap; 255 256 PetscFunctionBegin; 257 for ( i=0; i<m; i++ ) { 258 if (im[i] < 0) continue; 259 #if defined(USE_PETSC_BOPT_g) 260 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 261 #endif 262 if (im[i] >= rstart_orig && im[i] < rend_orig) { 263 row = im[i] - rstart_orig; 264 for ( j=0; j<n; j++ ) { 265 if (in[j] >= cstart_orig && in[j] < cend_orig){ 266 col = in[j] - cstart_orig; 267 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 268 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 269 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 270 } else if (in[j] < 0) continue; 271 #if defined(USE_PETSC_BOPT_g) 272 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 273 #endif 274 else { 275 if (mat->was_assembled) { 276 if (!baij->colmap) { 277 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 278 } 279 #if defined (USE_CTABLE) 280 ierr = TableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr); 281 col = col - 1 + in[j]%bs; 282 #else 283 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 284 #endif 285 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 286 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 287 col = in[j]; 288 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 289 B = baij->B; 290 b = (Mat_SeqBAIJ *) (B)->data; 291 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 292 ba=b->a; 293 } 294 } else col = in[j]; 295 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 296 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 297 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 298 } 299 } 300 } else { 301 if ( !baij->donotstash) { 302 if (roworiented) { 303 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 304 } else { 305 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 306 } 307 } 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNC__ 314 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 315 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 316 { 317 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 318 Scalar *value,*barray=baij->barray; 319 int ierr,i,j,ii,jj,row,col; 320 int roworiented = baij->roworiented,rstart=baij->rstart ; 321 int rend=baij->rend,cstart=baij->cstart,stepval; 322 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 323 324 if(!barray) { 325 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray); 326 } 327 328 if (roworiented) { 329 stepval = (n-1)*bs; 330 } else { 331 stepval = (m-1)*bs; 332 } 333 for ( i=0; i<m; i++ ) { 334 if (im[i] < 0) continue; 335 #if defined(USE_PETSC_BOPT_g) 336 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 337 #endif 338 if (im[i] >= rstart && im[i] < rend) { 339 row = im[i] - rstart; 340 for ( j=0; j<n; j++ ) { 341 /* If NumCol = 1 then a copy is not required */ 342 if ((roworiented) && (n == 1)) { 343 barray = v + i*bs2; 344 } else if((!roworiented) && (m == 1)) { 345 barray = v + j*bs2; 346 } else { /* Here a copy is required */ 347 if (roworiented) { 348 value = v + i*(stepval+bs)*bs + j*bs; 349 } else { 350 value = v + j*(stepval+bs)*bs + i*bs; 351 } 352 for ( ii=0; ii<bs; ii++,value+=stepval ) { 353 for (jj=0; jj<bs; jj++ ) { 354 *barray++ = *value++; 355 } 356 } 357 barray -=bs2; 358 } 359 360 if (in[j] >= cstart && in[j] < cend){ 361 col = in[j] - cstart; 362 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 363 } 364 else if (in[j] < 0) continue; 365 #if defined(USE_PETSC_BOPT_g) 366 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 367 #endif 368 else { 369 if (mat->was_assembled) { 370 if (!baij->colmap) { 371 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 372 } 373 374 #if defined(USE_PETSC_BOPT_g) 375 #if defined (USE_CTABLE) 376 { int data; 377 ierr = TableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 378 if((data - 1) % bs) 379 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 380 } 381 #else 382 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 383 #endif 384 #endif 385 #if defined (USE_CTABLE) 386 ierr = TableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 387 col = (col - 1)/bs; 388 #else 389 col = (baij->colmap[in[j]] - 1)/bs; 390 #endif 391 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 392 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 393 col = in[j]; 394 } 395 } 396 else col = in[j]; 397 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 398 } 399 } 400 } else { 401 if (!baij->donotstash) { 402 if (roworiented) { 403 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 404 } else { 405 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 406 } 407 } 408 } 409 } 410 PetscFunctionReturn(0); 411 } 412 #define HASH_KEY 0.6180339887 413 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 414 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 415 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 416 #undef __FUNC__ 417 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 418 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 419 { 420 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 421 int ierr,i,j,row,col; 422 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 423 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 424 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 425 double tmp; 426 Scalar ** HD = baij->hd,value; 427 #if defined(USE_PETSC_BOPT_g) 428 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 429 #endif 430 431 PetscFunctionBegin; 432 433 for ( i=0; i<m; i++ ) { 434 #if defined(USE_PETSC_BOPT_g) 435 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 436 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 437 #endif 438 row = im[i]; 439 if (row >= rstart_orig && row < rend_orig) { 440 for ( j=0; j<n; j++ ) { 441 col = in[j]; 442 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 443 /* Look up into the Hash Table */ 444 key = (row/bs)*Nbs+(col/bs)+1; 445 h1 = HASH(size,key,tmp); 446 447 448 idx = h1; 449 #if defined(USE_PETSC_BOPT_g) 450 insert_ct++; 451 total_ct++; 452 if (HT[idx] != key) { 453 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 454 if (idx == size) { 455 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 456 if (idx == h1) { 457 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 458 } 459 } 460 } 461 #else 462 if (HT[idx] != key) { 463 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 464 if (idx == size) { 465 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 466 if (idx == h1) { 467 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 468 } 469 } 470 } 471 #endif 472 /* A HASH table entry is found, so insert the values at the correct address */ 473 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 474 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 475 } 476 } else { 477 if (!baij->donotstash) { 478 if (roworiented) { 479 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 480 } else { 481 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 482 } 483 } 484 } 485 } 486 #if defined(USE_PETSC_BOPT_g) 487 baij->ht_total_ct = total_ct; 488 baij->ht_insert_ct = insert_ct; 489 #endif 490 PetscFunctionReturn(0); 491 } 492 493 #undef __FUNC__ 494 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 495 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 496 { 497 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 498 int ierr,i,j,ii,jj,row,col; 499 int roworiented = baij->roworiented,rstart=baij->rstart ; 500 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 501 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 502 double tmp; 503 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 504 #if defined(USE_PETSC_BOPT_g) 505 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 506 #endif 507 508 PetscFunctionBegin; 509 510 if (roworiented) { 511 stepval = (n-1)*bs; 512 } else { 513 stepval = (m-1)*bs; 514 } 515 for ( i=0; i<m; i++ ) { 516 #if defined(USE_PETSC_BOPT_g) 517 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 518 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 519 #endif 520 row = im[i]; 521 v_t = v + i*bs2; 522 if (row >= rstart && row < rend) { 523 for ( j=0; j<n; j++ ) { 524 col = in[j]; 525 526 /* Look up into the Hash Table */ 527 key = row*Nbs+col+1; 528 h1 = HASH(size,key,tmp); 529 530 idx = h1; 531 #if defined(USE_PETSC_BOPT_g) 532 total_ct++; 533 insert_ct++; 534 if (HT[idx] != key) { 535 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 536 if (idx == size) { 537 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 538 if (idx == h1) { 539 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 540 } 541 } 542 } 543 #else 544 if (HT[idx] != key) { 545 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 546 if (idx == size) { 547 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 548 if (idx == h1) { 549 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 550 } 551 } 552 } 553 #endif 554 baij_a = HD[idx]; 555 if (roworiented) { 556 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 557 /* value = v + (i*(stepval+bs)+j)*bs; */ 558 value = v_t; 559 v_t += bs; 560 if (addv == ADD_VALUES) { 561 for ( ii=0; ii<bs; ii++,value+=stepval) { 562 for ( jj=ii; jj<bs2; jj+=bs ) { 563 baij_a[jj] += *value++; 564 } 565 } 566 } else { 567 for ( ii=0; ii<bs; ii++,value+=stepval) { 568 for ( jj=ii; jj<bs2; jj+=bs ) { 569 baij_a[jj] = *value++; 570 } 571 } 572 } 573 } else { 574 value = v + j*(stepval+bs)*bs + i*bs; 575 if (addv == ADD_VALUES) { 576 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 577 for ( jj=0; jj<bs; jj++ ) { 578 baij_a[jj] += *value++; 579 } 580 } 581 } else { 582 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 583 for ( jj=0; jj<bs; jj++ ) { 584 baij_a[jj] = *value++; 585 } 586 } 587 } 588 } 589 } 590 } else { 591 if (!baij->donotstash) { 592 if (roworiented) { 593 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 594 } else { 595 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 596 } 597 } 598 } 599 } 600 #if defined(USE_PETSC_BOPT_g) 601 baij->ht_total_ct = total_ct; 602 baij->ht_insert_ct = insert_ct; 603 #endif 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNC__ 608 #define __FUNC__ "MatGetValues_MPIBAIJ" 609 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 610 { 611 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 612 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 613 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 614 615 PetscFunctionBegin; 616 for ( i=0; i<m; i++ ) { 617 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 618 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 619 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 620 row = idxm[i] - bsrstart; 621 for ( j=0; j<n; j++ ) { 622 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 623 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 624 if (idxn[j] >= bscstart && idxn[j] < bscend){ 625 col = idxn[j] - bscstart; 626 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 627 } else { 628 if (!baij->colmap) { 629 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 630 } 631 #if defined (USE_CTABLE) 632 ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 633 data --; 634 #else 635 data = baij->colmap[idxn[j]/bs]-1; 636 #endif 637 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 638 else { 639 col = data + idxn[j]%bs; 640 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 641 } 642 } 643 } 644 } else { 645 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 646 } 647 } 648 PetscFunctionReturn(0); 649 } 650 651 #undef __FUNC__ 652 #define __FUNC__ "MatNorm_MPIBAIJ" 653 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 654 { 655 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 656 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 657 int ierr, i,bs2=baij->bs2; 658 double sum = 0.0; 659 Scalar *v; 660 661 PetscFunctionBegin; 662 if (baij->size == 1) { 663 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 664 } else { 665 if (type == NORM_FROBENIUS) { 666 v = amat->a; 667 for (i=0; i<amat->nz*bs2; i++ ) { 668 #if defined(USE_PETSC_COMPLEX) 669 sum += PetscReal(PetscConj(*v)*(*v)); v++; 670 #else 671 sum += (*v)*(*v); v++; 672 #endif 673 } 674 v = bmat->a; 675 for (i=0; i<bmat->nz*bs2; i++ ) { 676 #if defined(USE_PETSC_COMPLEX) 677 sum += PetscReal(PetscConj(*v)*(*v)); v++; 678 #else 679 sum += (*v)*(*v); v++; 680 #endif 681 } 682 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 683 *norm = sqrt(*norm); 684 } else { 685 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 686 } 687 } 688 PetscFunctionReturn(0); 689 } 690 691 692 /* 693 Creates the hash table, and sets the table 694 This table is created only once. 695 If new entried need to be added to the matrix 696 then the hash table has to be destroyed and 697 recreated. 698 */ 699 #undef __FUNC__ 700 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 701 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 702 { 703 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 704 Mat A = baij->A, B=baij->B; 705 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 706 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 707 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 708 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 709 int *HT,key; 710 Scalar **HD; 711 double tmp; 712 #if defined(USE_PETSC_BOPT_g) 713 int ct=0,max=0; 714 #endif 715 716 PetscFunctionBegin; 717 baij->ht_size=(int)(factor*nz); 718 size = baij->ht_size; 719 720 if (baij->ht) { 721 PetscFunctionReturn(0); 722 } 723 724 /* Allocate Memory for Hash Table */ 725 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd); 726 baij->ht = (int*)(baij->hd + size); 727 HD = baij->hd; 728 HT = baij->ht; 729 730 731 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 732 733 734 /* Loop Over A */ 735 for ( i=0; i<a->mbs; i++ ) { 736 for ( j=ai[i]; j<ai[i+1]; j++ ) { 737 row = i+rstart; 738 col = aj[j]+cstart; 739 740 key = row*Nbs + col + 1; 741 h1 = HASH(size,key,tmp); 742 for ( k=0; k<size; k++ ){ 743 if (HT[(h1+k)%size] == 0.0) { 744 HT[(h1+k)%size] = key; 745 HD[(h1+k)%size] = a->a + j*bs2; 746 break; 747 #if defined(USE_PETSC_BOPT_g) 748 } else { 749 ct++; 750 #endif 751 } 752 } 753 #if defined(USE_PETSC_BOPT_g) 754 if (k> max) max = k; 755 #endif 756 } 757 } 758 /* Loop Over B */ 759 for ( i=0; i<b->mbs; i++ ) { 760 for ( j=bi[i]; j<bi[i+1]; j++ ) { 761 row = i+rstart; 762 col = garray[bj[j]]; 763 key = row*Nbs + col + 1; 764 h1 = HASH(size,key,tmp); 765 for ( k=0; k<size; k++ ){ 766 if (HT[(h1+k)%size] == 0.0) { 767 HT[(h1+k)%size] = key; 768 HD[(h1+k)%size] = b->a + j*bs2; 769 break; 770 #if defined(USE_PETSC_BOPT_g) 771 } else { 772 ct++; 773 #endif 774 } 775 } 776 #if defined(USE_PETSC_BOPT_g) 777 if (k> max) max = k; 778 #endif 779 } 780 } 781 782 /* Print Summary */ 783 #if defined(USE_PETSC_BOPT_g) 784 for ( i=0,j=0; i<size; i++) 785 if (HT[i]) {j++;} 786 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 787 (j== 0)? 0.0:((double)(ct+j))/j,max); 788 #endif 789 PetscFunctionReturn(0); 790 } 791 792 #undef __FUNC__ 793 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 794 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 795 { 796 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 797 int ierr,nstash,reallocs; 798 InsertMode addv; 799 800 PetscFunctionBegin; 801 if (baij->donotstash) { 802 PetscFunctionReturn(0); 803 } 804 805 /* make sure all processors are either in INSERTMODE or ADDMODE */ 806 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 807 if (addv == (ADD_VALUES|INSERT_VALUES)) { 808 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 809 } 810 mat->insertmode = addv; /* in case this processor had no cache */ 811 812 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 813 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 814 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 815 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 816 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 817 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNC__ 822 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 823 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 824 { 825 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 826 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 827 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 828 int *row,*col,other_disassembled,r1,r2,r3; 829 Scalar *val; 830 InsertMode addv = mat->insertmode; 831 832 PetscFunctionBegin; 833 if (!baij->donotstash) { 834 while (1) { 835 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 836 if (!flg) break; 837 838 for ( i=0; i<n; ) { 839 /* Now identify the consecutive vals belonging to the same row */ 840 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 841 if (j < n) ncols = j-i; 842 else ncols = n-i; 843 /* Now assemble all these values with a single function call */ 844 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 845 i = j; 846 } 847 } 848 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 849 /* Now process the block-stash. Since the values are stashed column-oriented, 850 set the roworiented flag to column oriented, and after MatSetValues() 851 restore the original flags */ 852 r1 = baij->roworiented; 853 r2 = a->roworiented; 854 r3 = b->roworiented; 855 baij->roworiented = 0; 856 a->roworiented = 0; 857 b->roworiented = 0; 858 while (1) { 859 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 860 if (!flg) break; 861 862 for ( i=0; i<n; ) { 863 /* Now identify the consecutive vals belonging to the same row */ 864 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 865 if (j < n) ncols = j-i; 866 else ncols = n-i; 867 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 868 i = j; 869 } 870 } 871 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 872 baij->roworiented = r1; 873 a->roworiented = r2; 874 b->roworiented = r3; 875 } 876 877 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 878 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 879 880 /* determine if any processor has disassembled, if so we must 881 also disassemble ourselfs, in order that we may reassemble. */ 882 /* 883 if nonzero structure of submatrix B cannot change then we know that 884 no processor disassembled thus we can skip this stuff 885 */ 886 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 887 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 888 if (mat->was_assembled && !other_disassembled) { 889 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 890 } 891 } 892 893 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 894 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 895 } 896 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 897 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 898 899 #if defined(USE_PETSC_BOPT_g) 900 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 901 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 902 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 903 baij->ht_total_ct = 0; 904 baij->ht_insert_ct = 0; 905 } 906 #endif 907 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 908 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 909 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 910 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 911 } 912 913 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 914 PetscFunctionReturn(0); 915 } 916 917 #undef __FUNC__ 918 #define __FUNC__ "MatView_MPIBAIJ_Binary" 919 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 920 { 921 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 922 int ierr; 923 924 PetscFunctionBegin; 925 if (baij->size == 1) { 926 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 927 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNC__ 932 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 933 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 934 { 935 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 936 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 937 FILE *fd; 938 ViewerType vtype; 939 940 PetscFunctionBegin; 941 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 942 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 943 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 944 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 945 MatInfo info; 946 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 947 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 948 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 949 ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr); 950 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 951 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 952 baij->bs,(int)info.memory); 953 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 954 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 955 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 956 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 957 fflush(fd); 958 ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr); 959 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 962 ierr = PetscPrintf(mat->comm," block size is %d\n",bs);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 } 966 967 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 968 Draw draw; 969 PetscTruth isnull; 970 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 971 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 972 } 973 974 if (size == 1) { 975 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 976 } else { 977 /* assemble the entire matrix onto first processor. */ 978 Mat A; 979 Mat_SeqBAIJ *Aloc; 980 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 981 int mbs = baij->mbs; 982 Scalar *a; 983 984 if (!rank) { 985 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 986 } else { 987 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 988 } 989 PLogObjectParent(mat,A); 990 991 /* copy over the A part */ 992 Aloc = (Mat_SeqBAIJ*) baij->A->data; 993 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 994 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 995 996 for ( i=0; i<mbs; i++ ) { 997 rvals[0] = bs*(baij->rstart + i); 998 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 999 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1000 col = (baij->cstart+aj[j])*bs; 1001 for (k=0; k<bs; k++ ) { 1002 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1003 col++; a += bs; 1004 } 1005 } 1006 } 1007 /* copy over the B part */ 1008 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1009 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1010 for ( i=0; i<mbs; i++ ) { 1011 rvals[0] = bs*(baij->rstart + i); 1012 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1013 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1014 col = baij->garray[aj[j]]*bs; 1015 for (k=0; k<bs; k++ ) { 1016 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1017 col++; a += bs; 1018 } 1019 } 1020 } 1021 PetscFree(rvals); 1022 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1023 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1024 /* 1025 Everyone has to call to draw the matrix since the graphics waits are 1026 synchronized across all processors that share the Draw object 1027 */ 1028 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1029 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 1030 } 1031 ierr = MatDestroy(A);CHKERRQ(ierr); 1032 } 1033 PetscFunctionReturn(0); 1034 } 1035 1036 1037 1038 #undef __FUNC__ 1039 #define __FUNC__ "MatView_MPIBAIJ" 1040 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1041 { 1042 int ierr; 1043 ViewerType vtype; 1044 1045 PetscFunctionBegin; 1046 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 1047 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1048 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 1049 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1050 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1051 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1052 } else { 1053 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1054 } 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNC__ 1059 #define __FUNC__ "MatDestroy_MPIBAIJ" 1060 int MatDestroy_MPIBAIJ(Mat mat) 1061 { 1062 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1063 int ierr; 1064 1065 PetscFunctionBegin; 1066 if (--mat->refct > 0) PetscFunctionReturn(0); 1067 1068 if (mat->mapping) { 1069 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1070 } 1071 if (mat->bmapping) { 1072 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1073 } 1074 if (mat->rmap) { 1075 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1076 } 1077 if (mat->cmap) { 1078 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1079 } 1080 #if defined(USE_PETSC_LOG) 1081 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1082 #endif 1083 1084 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1085 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1086 1087 PetscFree(baij->rowners); 1088 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1089 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1090 #if defined (USE_CTABLE) 1091 if (baij->colmap) TableDelete(baij->colmap); 1092 #else 1093 if (baij->colmap) PetscFree(baij->colmap); 1094 #endif 1095 if (baij->garray) PetscFree(baij->garray); 1096 if (baij->lvec) VecDestroy(baij->lvec); 1097 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1098 if (baij->rowvalues) PetscFree(baij->rowvalues); 1099 if (baij->barray) PetscFree(baij->barray); 1100 if (baij->hd) PetscFree(baij->hd); 1101 PetscFree(baij); 1102 PLogObjectDestroy(mat); 1103 PetscHeaderDestroy(mat); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNC__ 1108 #define __FUNC__ "MatMult_MPIBAIJ" 1109 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1110 { 1111 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1112 int ierr, nt; 1113 1114 PetscFunctionBegin; 1115 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1116 if (nt != a->n) { 1117 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1118 } 1119 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1120 if (nt != a->m) { 1121 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1122 } 1123 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1124 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1125 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1126 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1127 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNC__ 1132 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1133 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1134 { 1135 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1136 int ierr; 1137 1138 PetscFunctionBegin; 1139 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1140 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1141 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1142 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNC__ 1147 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1148 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1149 { 1150 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1151 int ierr; 1152 1153 PetscFunctionBegin; 1154 /* do nondiagonal part */ 1155 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1156 /* send it on its way */ 1157 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1158 /* do local part */ 1159 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 1160 /* receive remote parts: note this assumes the values are not actually */ 1161 /* inserted in yy until the next line, which is true for my implementation*/ 1162 /* but is not perhaps always true. */ 1163 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNC__ 1168 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1169 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1170 { 1171 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1172 int ierr; 1173 1174 PetscFunctionBegin; 1175 /* do nondiagonal part */ 1176 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1177 /* send it on its way */ 1178 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1179 /* do local part */ 1180 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1181 /* receive remote parts: note this assumes the values are not actually */ 1182 /* inserted in yy until the next line, which is true for my implementation*/ 1183 /* but is not perhaps always true. */ 1184 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 /* 1189 This only works correctly for square matrices where the subblock A->A is the 1190 diagonal block 1191 */ 1192 #undef __FUNC__ 1193 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1194 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1195 { 1196 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1197 int ierr; 1198 1199 PetscFunctionBegin; 1200 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1201 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNC__ 1206 #define __FUNC__ "MatScale_MPIBAIJ" 1207 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1208 { 1209 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1210 int ierr; 1211 1212 PetscFunctionBegin; 1213 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1214 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNC__ 1219 #define __FUNC__ "MatGetSize_MPIBAIJ" 1220 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1221 { 1222 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1223 1224 PetscFunctionBegin; 1225 if (m) *m = mat->M; 1226 if (n) *n = mat->N; 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNC__ 1231 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1232 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1233 { 1234 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1235 1236 PetscFunctionBegin; 1237 *m = mat->m; *n = mat->n; 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNC__ 1242 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1243 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1244 { 1245 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1246 1247 PetscFunctionBegin; 1248 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNC__ 1253 #define __FUNC__ "MatGetRow_MPIBAIJ" 1254 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1255 { 1256 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1257 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1258 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1259 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1260 int *cmap, *idx_p,cstart = mat->cstart; 1261 1262 PetscFunctionBegin; 1263 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1264 mat->getrowactive = PETSC_TRUE; 1265 1266 if (!mat->rowvalues && (idx || v)) { 1267 /* 1268 allocate enough space to hold information from the longest row. 1269 */ 1270 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1271 int max = 1,mbs = mat->mbs,tmp; 1272 for ( i=0; i<mbs; i++ ) { 1273 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1274 if (max < tmp) { max = tmp; } 1275 } 1276 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1277 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1278 } 1279 1280 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1281 lrow = row - brstart; 1282 1283 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1284 if (!v) {pvA = 0; pvB = 0;} 1285 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1286 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1287 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1288 nztot = nzA + nzB; 1289 1290 cmap = mat->garray; 1291 if (v || idx) { 1292 if (nztot) { 1293 /* Sort by increasing column numbers, assuming A and B already sorted */ 1294 int imark = -1; 1295 if (v) { 1296 *v = v_p = mat->rowvalues; 1297 for ( i=0; i<nzB; i++ ) { 1298 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1299 else break; 1300 } 1301 imark = i; 1302 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1303 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1304 } 1305 if (idx) { 1306 *idx = idx_p = mat->rowindices; 1307 if (imark > -1) { 1308 for ( i=0; i<imark; i++ ) { 1309 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1310 } 1311 } else { 1312 for ( i=0; i<nzB; i++ ) { 1313 if (cmap[cworkB[i]/bs] < cstart) 1314 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1315 else break; 1316 } 1317 imark = i; 1318 } 1319 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1320 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1321 } 1322 } else { 1323 if (idx) *idx = 0; 1324 if (v) *v = 0; 1325 } 1326 } 1327 *nz = nztot; 1328 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1329 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 #undef __FUNC__ 1334 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1335 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1336 { 1337 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1338 1339 PetscFunctionBegin; 1340 if (baij->getrowactive == PETSC_FALSE) { 1341 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1342 } 1343 baij->getrowactive = PETSC_FALSE; 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #undef __FUNC__ 1348 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1349 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1350 { 1351 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1352 1353 PetscFunctionBegin; 1354 *bs = baij->bs; 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNC__ 1359 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1360 int MatZeroEntries_MPIBAIJ(Mat A) 1361 { 1362 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1363 int ierr; 1364 1365 PetscFunctionBegin; 1366 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1367 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNC__ 1372 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1373 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1374 { 1375 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1376 Mat A = a->A, B = a->B; 1377 int ierr; 1378 double isend[5], irecv[5]; 1379 1380 PetscFunctionBegin; 1381 info->block_size = (double)a->bs; 1382 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1383 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1384 isend[3] = info->memory; isend[4] = info->mallocs; 1385 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1386 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1387 isend[3] += info->memory; isend[4] += info->mallocs; 1388 if (flag == MAT_LOCAL) { 1389 info->nz_used = isend[0]; 1390 info->nz_allocated = isend[1]; 1391 info->nz_unneeded = isend[2]; 1392 info->memory = isend[3]; 1393 info->mallocs = isend[4]; 1394 } else if (flag == MAT_GLOBAL_MAX) { 1395 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1396 info->nz_used = irecv[0]; 1397 info->nz_allocated = irecv[1]; 1398 info->nz_unneeded = irecv[2]; 1399 info->memory = irecv[3]; 1400 info->mallocs = irecv[4]; 1401 } else if (flag == MAT_GLOBAL_SUM) { 1402 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1403 info->nz_used = irecv[0]; 1404 info->nz_allocated = irecv[1]; 1405 info->nz_unneeded = irecv[2]; 1406 info->memory = irecv[3]; 1407 info->mallocs = irecv[4]; 1408 } else { 1409 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1410 } 1411 info->rows_global = (double)a->M; 1412 info->columns_global = (double)a->N; 1413 info->rows_local = (double)a->m; 1414 info->columns_local = (double)a->N; 1415 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1416 info->fill_ratio_needed = 0; 1417 info->factor_mallocs = 0; 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNC__ 1422 #define __FUNC__ "MatSetOption_MPIBAIJ" 1423 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1424 { 1425 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1426 int ierr; 1427 1428 PetscFunctionBegin; 1429 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1430 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1431 op == MAT_COLUMNS_UNSORTED || 1432 op == MAT_COLUMNS_SORTED || 1433 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1434 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1435 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1436 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1437 } else if (op == MAT_ROW_ORIENTED) { 1438 a->roworiented = 1; 1439 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1440 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1441 } else if (op == MAT_ROWS_SORTED || 1442 op == MAT_ROWS_UNSORTED || 1443 op == MAT_SYMMETRIC || 1444 op == MAT_STRUCTURALLY_SYMMETRIC || 1445 op == MAT_YES_NEW_DIAGONALS || 1446 op == MAT_USE_HASH_TABLE) { 1447 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1448 } else if (op == MAT_COLUMN_ORIENTED) { 1449 a->roworiented = 0; 1450 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1451 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1452 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1453 a->donotstash = 1; 1454 } else if (op == MAT_NO_NEW_DIAGONALS) { 1455 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1456 } else if (op == MAT_USE_HASH_TABLE) { 1457 a->ht_flag = 1; 1458 } else { 1459 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1460 } 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNC__ 1465 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1466 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1467 { 1468 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1469 Mat_SeqBAIJ *Aloc; 1470 Mat B; 1471 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1472 int bs=baij->bs,mbs=baij->mbs; 1473 Scalar *a; 1474 1475 PetscFunctionBegin; 1476 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1477 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1478 CHKERRQ(ierr); 1479 1480 /* copy over the A part */ 1481 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1482 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1483 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1484 1485 for ( i=0; i<mbs; i++ ) { 1486 rvals[0] = bs*(baij->rstart + i); 1487 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1488 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1489 col = (baij->cstart+aj[j])*bs; 1490 for (k=0; k<bs; k++ ) { 1491 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1492 col++; a += bs; 1493 } 1494 } 1495 } 1496 /* copy over the B part */ 1497 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1498 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1499 for ( i=0; i<mbs; i++ ) { 1500 rvals[0] = bs*(baij->rstart + i); 1501 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1502 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1503 col = baij->garray[aj[j]]*bs; 1504 for (k=0; k<bs; k++ ) { 1505 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1506 col++; a += bs; 1507 } 1508 } 1509 } 1510 PetscFree(rvals); 1511 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1512 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1513 1514 if (matout != PETSC_NULL) { 1515 *matout = B; 1516 } else { 1517 PetscOps *Abops; 1518 MatOps Aops; 1519 1520 /* This isn't really an in-place transpose .... but free data structures from baij */ 1521 PetscFree(baij->rowners); 1522 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1523 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1524 #if defined (USE_CTABLE) 1525 if (baij->colmap) TableDelete(baij->colmap); 1526 #else 1527 if (baij->colmap) PetscFree(baij->colmap); 1528 #endif 1529 if (baij->garray) PetscFree(baij->garray); 1530 if (baij->lvec) VecDestroy(baij->lvec); 1531 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1532 PetscFree(baij); 1533 1534 /* 1535 This is horrible, horrible code. We need to keep the 1536 A pointers for the bops and ops but copy everything 1537 else from C. 1538 */ 1539 Abops = A->bops; 1540 Aops = A->ops; 1541 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1542 A->bops = Abops; 1543 A->ops = Aops; 1544 1545 PetscHeaderDestroy(B); 1546 } 1547 PetscFunctionReturn(0); 1548 } 1549 1550 #undef __FUNC__ 1551 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1552 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1553 { 1554 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1555 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1556 int ierr,s1,s2,s3; 1557 1558 PetscFunctionBegin; 1559 if (ll) { 1560 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1561 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 1562 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1563 ierr = MatDiagonalScale(a,ll,0);CHKERRQ(ierr); 1564 ierr = MatDiagonalScale(b,ll,0);CHKERRQ(ierr); 1565 } 1566 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #undef __FUNC__ 1571 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1572 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1573 { 1574 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1575 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1576 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1577 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1578 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1579 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1580 MPI_Comm comm = A->comm; 1581 MPI_Request *send_waits,*recv_waits; 1582 MPI_Status recv_status,*send_status; 1583 IS istmp; 1584 1585 PetscFunctionBegin; 1586 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1587 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1588 1589 /* first count number of contributors to each processor */ 1590 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 1591 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1592 procs = nprocs + size; 1593 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1594 for ( i=0; i<N; i++ ) { 1595 idx = rows[i]; 1596 found = 0; 1597 for ( j=0; j<size; j++ ) { 1598 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1599 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1600 } 1601 } 1602 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1603 } 1604 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1605 1606 /* inform other processors of number of messages and max length*/ 1607 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 1608 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1609 nrecvs = work[rank]; 1610 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1611 nmax = work[rank]; 1612 PetscFree(work); 1613 1614 /* post receives: */ 1615 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1616 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1617 for ( i=0; i<nrecvs; i++ ) { 1618 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1619 } 1620 1621 /* do sends: 1622 1) starts[i] gives the starting index in svalues for stuff going to 1623 the ith processor 1624 */ 1625 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 1626 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1627 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 1628 starts[0] = 0; 1629 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1630 for ( i=0; i<N; i++ ) { 1631 svalues[starts[owner[i]]++] = rows[i]; 1632 } 1633 ISRestoreIndices(is,&rows); 1634 1635 starts[0] = 0; 1636 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1637 count = 0; 1638 for ( i=0; i<size; i++ ) { 1639 if (procs[i]) { 1640 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1641 } 1642 } 1643 PetscFree(starts); 1644 1645 base = owners[rank]*bs; 1646 1647 /* wait on receives */ 1648 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 1649 source = lens + nrecvs; 1650 count = nrecvs; slen = 0; 1651 while (count) { 1652 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1653 /* unpack receives into our local space */ 1654 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1655 source[imdex] = recv_status.MPI_SOURCE; 1656 lens[imdex] = n; 1657 slen += n; 1658 count--; 1659 } 1660 PetscFree(recv_waits); 1661 1662 /* move the data into the send scatter */ 1663 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 1664 count = 0; 1665 for ( i=0; i<nrecvs; i++ ) { 1666 values = rvalues + i*nmax; 1667 for ( j=0; j<lens[i]; j++ ) { 1668 lrows[count++] = values[j] - base; 1669 } 1670 } 1671 PetscFree(rvalues); PetscFree(lens); 1672 PetscFree(owner); PetscFree(nprocs); 1673 1674 /* actually zap the local rows */ 1675 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1676 PLogObjectParent(A,istmp); 1677 1678 /* 1679 Zero the required rows. If the "diagonal block" of the matrix 1680 is square and the user wishes to set the diagonal we use seperate 1681 code so that MatSetValues() is not called for each diagonal allocating 1682 new memory, thus calling lots of mallocs and slowing things down. 1683 1684 Contributed by: Mathew Knepley 1685 */ 1686 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1687 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 1688 if (diag && (l->A->M == l->A->N)) { 1689 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 1690 } else if (diag) { 1691 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1692 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1693 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1694 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1695 } 1696 for ( i = 0; i < slen; i++ ) { 1697 row = lrows[i] + rstart_bs; 1698 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1699 } 1700 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1701 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1702 } else { 1703 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1704 } 1705 1706 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1707 PetscFree(lrows); 1708 1709 /* wait on sends */ 1710 if (nsends) { 1711 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1712 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1713 PetscFree(send_status); 1714 } 1715 PetscFree(send_waits); PetscFree(svalues); 1716 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNC__ 1721 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1722 int MatPrintHelp_MPIBAIJ(Mat A) 1723 { 1724 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1725 MPI_Comm comm = A->comm; 1726 static int called = 0; 1727 int ierr; 1728 1729 PetscFunctionBegin; 1730 if (!a->rank) { 1731 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1732 } 1733 if (called) {PetscFunctionReturn(0);} else called = 1; 1734 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1735 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 #undef __FUNC__ 1740 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1741 int MatSetUnfactored_MPIBAIJ(Mat A) 1742 { 1743 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1744 int ierr; 1745 1746 PetscFunctionBegin; 1747 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1748 PetscFunctionReturn(0); 1749 } 1750 1751 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1752 1753 #undef __FUNC__ 1754 #define __FUNC__ "MatEqual_MPIBAIJ" 1755 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag) 1756 { 1757 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data; 1758 Mat a, b, c, d; 1759 PetscTruth flg; 1760 int ierr; 1761 1762 PetscFunctionBegin; 1763 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1764 a = matA->A; b = matA->B; 1765 c = matB->A; d = matB->B; 1766 1767 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1768 if (flg == PETSC_TRUE) { 1769 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1770 } 1771 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1772 PetscFunctionReturn(0); 1773 } 1774 1775 /* -------------------------------------------------------------------*/ 1776 static struct _MatOps MatOps_Values = { 1777 MatSetValues_MPIBAIJ, 1778 MatGetRow_MPIBAIJ, 1779 MatRestoreRow_MPIBAIJ, 1780 MatMult_MPIBAIJ, 1781 MatMultAdd_MPIBAIJ, 1782 MatMultTrans_MPIBAIJ, 1783 MatMultTransAdd_MPIBAIJ, 1784 0, 1785 0, 1786 0, 1787 0, 1788 0, 1789 0, 1790 0, 1791 MatTranspose_MPIBAIJ, 1792 MatGetInfo_MPIBAIJ, 1793 MatEqual_MPIBAIJ, 1794 MatGetDiagonal_MPIBAIJ, 1795 MatDiagonalScale_MPIBAIJ, 1796 MatNorm_MPIBAIJ, 1797 MatAssemblyBegin_MPIBAIJ, 1798 MatAssemblyEnd_MPIBAIJ, 1799 0, 1800 MatSetOption_MPIBAIJ, 1801 MatZeroEntries_MPIBAIJ, 1802 MatZeroRows_MPIBAIJ, 1803 0, 1804 0, 1805 0, 1806 0, 1807 MatGetSize_MPIBAIJ, 1808 MatGetLocalSize_MPIBAIJ, 1809 MatGetOwnershipRange_MPIBAIJ, 1810 0, 1811 0, 1812 0, 1813 0, 1814 MatDuplicate_MPIBAIJ, 1815 0, 1816 0, 1817 0, 1818 0, 1819 0, 1820 MatGetSubMatrices_MPIBAIJ, 1821 MatIncreaseOverlap_MPIBAIJ, 1822 MatGetValues_MPIBAIJ, 1823 0, 1824 MatPrintHelp_MPIBAIJ, 1825 MatScale_MPIBAIJ, 1826 0, 1827 0, 1828 0, 1829 MatGetBlockSize_MPIBAIJ, 1830 0, 1831 0, 1832 0, 1833 0, 1834 0, 1835 0, 1836 MatSetUnfactored_MPIBAIJ, 1837 0, 1838 MatSetValuesBlocked_MPIBAIJ, 1839 0, 1840 0, 1841 0, 1842 MatGetMaps_Petsc}; 1843 1844 1845 EXTERN_C_BEGIN 1846 #undef __FUNC__ 1847 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1848 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1849 { 1850 PetscFunctionBegin; 1851 *a = ((Mat_MPIBAIJ *)A->data)->A; 1852 *iscopy = PETSC_FALSE; 1853 PetscFunctionReturn(0); 1854 } 1855 EXTERN_C_END 1856 1857 #undef __FUNC__ 1858 #define __FUNC__ "MatCreateMPIBAIJ" 1859 /*@C 1860 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1861 (block compressed row). For good matrix assembly performance 1862 the user should preallocate the matrix storage by setting the parameters 1863 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1864 performance can be increased by more than a factor of 50. 1865 1866 Collective on MPI_Comm 1867 1868 Input Parameters: 1869 + comm - MPI communicator 1870 . bs - size of blockk 1871 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1872 This value should be the same as the local size used in creating the 1873 y vector for the matrix-vector product y = Ax. 1874 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1875 This value should be the same as the local size used in creating the 1876 x vector for the matrix-vector product y = Ax. 1877 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1878 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1879 . d_nz - number of block nonzeros per block row in diagonal portion of local 1880 submatrix (same for all local rows) 1881 . d_nnz - array containing the number of block nonzeros in the various block rows 1882 of the in diagonal portion of the local (possibly different for each block 1883 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1884 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1885 submatrix (same for all local rows). 1886 - o_nnz - array containing the number of nonzeros in the various block rows of the 1887 off-diagonal portion of the local submatrix (possibly different for 1888 each block row) or PETSC_NULL. 1889 1890 Output Parameter: 1891 . A - the matrix 1892 1893 Options Database Keys: 1894 . -mat_no_unroll - uses code that does not unroll the loops in the 1895 block calculations (much slower) 1896 . -mat_block_size - size of the blocks to use 1897 . -mat_mpi - use the parallel matrix data structures even on one processor 1898 (defaults to using SeqBAIJ format on one processor) 1899 1900 Notes: 1901 The user MUST specify either the local or global matrix dimensions 1902 (possibly both). 1903 1904 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1905 than it must be used on all processors that share the object for that argument. 1906 1907 Storage Information: 1908 For a square global matrix we define each processor's diagonal portion 1909 to be its local rows and the corresponding columns (a square submatrix); 1910 each processor's off-diagonal portion encompasses the remainder of the 1911 local matrix (a rectangular submatrix). 1912 1913 The user can specify preallocated storage for the diagonal part of 1914 the local submatrix with either d_nz or d_nnz (not both). Set 1915 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1916 memory allocation. Likewise, specify preallocated storage for the 1917 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1918 1919 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1920 the figure below we depict these three local rows and all columns (0-11). 1921 1922 .vb 1923 0 1 2 3 4 5 6 7 8 9 10 11 1924 ------------------- 1925 row 3 | o o o d d d o o o o o o 1926 row 4 | o o o d d d o o o o o o 1927 row 5 | o o o d d d o o o o o o 1928 ------------------- 1929 .ve 1930 1931 Thus, any entries in the d locations are stored in the d (diagonal) 1932 submatrix, and any entries in the o locations are stored in the 1933 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1934 stored simply in the MATSEQBAIJ format for compressed row storage. 1935 1936 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1937 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1938 In general, for PDE problems in which most nonzeros are near the diagonal, 1939 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1940 or you will get TERRIBLE performance; see the users' manual chapter on 1941 matrices. 1942 1943 Level: intermediate 1944 1945 .keywords: matrix, block, aij, compressed row, sparse, parallel 1946 1947 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1948 @*/ 1949 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1950 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1951 { 1952 Mat B; 1953 Mat_MPIBAIJ *b; 1954 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1955 int flag1 = 0,flag2 = 0; 1956 1957 PetscFunctionBegin; 1958 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1959 1960 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1961 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 1962 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1963 if (!flag1 && !flag2 && size == 1) { 1964 if (M == PETSC_DECIDE) M = m; 1965 if (N == PETSC_DECIDE) N = n; 1966 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1967 PetscFunctionReturn(0); 1968 } 1969 1970 *A = 0; 1971 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 1972 PLogObjectCreate(B); 1973 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 1974 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 1975 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1976 1977 B->ops->destroy = MatDestroy_MPIBAIJ; 1978 B->ops->view = MatView_MPIBAIJ; 1979 B->mapping = 0; 1980 B->factor = 0; 1981 B->assembled = PETSC_FALSE; 1982 1983 B->insertmode = NOT_SET_VALUES; 1984 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1985 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 1986 1987 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1988 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1989 } 1990 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1991 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1992 } 1993 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1994 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1995 } 1996 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1997 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1998 1999 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2000 work[0] = m; work[1] = n; 2001 mbs = m/bs; nbs = n/bs; 2002 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2003 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2004 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2005 } 2006 if (m == PETSC_DECIDE) { 2007 Mbs = M/bs; 2008 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2009 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2010 m = mbs*bs; 2011 } 2012 if (n == PETSC_DECIDE) { 2013 Nbs = N/bs; 2014 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2015 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2016 n = nbs*bs; 2017 } 2018 if (mbs*bs != m || nbs*bs != n) { 2019 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2020 } 2021 2022 b->m = m; B->m = m; 2023 b->n = n; B->n = n; 2024 b->N = N; B->N = N; 2025 b->M = M; B->M = M; 2026 b->bs = bs; 2027 b->bs2 = bs*bs; 2028 b->mbs = mbs; 2029 b->nbs = nbs; 2030 b->Mbs = Mbs; 2031 b->Nbs = Nbs; 2032 2033 /* the information in the maps duplicates the information computed below, eventually 2034 we should remove the duplicate information that is not contained in the maps */ 2035 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2036 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2037 2038 /* build local table of row and column ownerships */ 2039 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2040 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2041 b->cowners = b->rowners + b->size + 2; 2042 b->rowners_bs = b->cowners + b->size + 2; 2043 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2044 b->rowners[0] = 0; 2045 for ( i=2; i<=b->size; i++ ) { 2046 b->rowners[i] += b->rowners[i-1]; 2047 } 2048 for ( i=0; i<=b->size; i++ ) { 2049 b->rowners_bs[i] = b->rowners[i]*bs; 2050 } 2051 b->rstart = b->rowners[b->rank]; 2052 b->rend = b->rowners[b->rank+1]; 2053 b->rstart_bs = b->rstart * bs; 2054 b->rend_bs = b->rend * bs; 2055 2056 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2057 b->cowners[0] = 0; 2058 for ( i=2; i<=b->size; i++ ) { 2059 b->cowners[i] += b->cowners[i-1]; 2060 } 2061 b->cstart = b->cowners[b->rank]; 2062 b->cend = b->cowners[b->rank+1]; 2063 b->cstart_bs = b->cstart * bs; 2064 b->cend_bs = b->cend * bs; 2065 2066 2067 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2068 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2069 PLogObjectParent(B,b->A); 2070 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2071 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2072 PLogObjectParent(B,b->B); 2073 2074 /* build cache for off array entries formed */ 2075 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2076 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2077 b->donotstash = 0; 2078 b->colmap = 0; 2079 b->garray = 0; 2080 b->roworiented = 1; 2081 2082 /* stuff used in block assembly */ 2083 b->barray = 0; 2084 2085 /* stuff used for matrix vector multiply */ 2086 b->lvec = 0; 2087 b->Mvctx = 0; 2088 2089 /* stuff for MatGetRow() */ 2090 b->rowindices = 0; 2091 b->rowvalues = 0; 2092 b->getrowactive = PETSC_FALSE; 2093 2094 /* hash table stuff */ 2095 b->ht = 0; 2096 b->hd = 0; 2097 b->ht_size = 0; 2098 b->ht_flag = 0; 2099 b->ht_fact = 0; 2100 b->ht_total_ct = 0; 2101 b->ht_insert_ct = 0; 2102 2103 *A = B; 2104 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2105 if (flg) { 2106 double fact = 1.39; 2107 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2108 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr); 2109 if (fact <= 1.0) fact = 1.39; 2110 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2111 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2112 } 2113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 2114 "MatStoreValues_MPIBAIJ", 2115 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 2117 "MatRetrieveValues_MPIBAIJ", 2118 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2119 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2120 "MatGetDiagonalBlock_MPIBAIJ", 2121 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2122 PetscFunctionReturn(0); 2123 } 2124 2125 #undef __FUNC__ 2126 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2127 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2128 { 2129 Mat mat; 2130 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2131 int ierr, len=0, flg; 2132 2133 PetscFunctionBegin; 2134 *newmat = 0; 2135 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2136 PLogObjectCreate(mat); 2137 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2138 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2139 mat->ops->destroy = MatDestroy_MPIBAIJ; 2140 mat->ops->view = MatView_MPIBAIJ; 2141 mat->factor = matin->factor; 2142 mat->assembled = PETSC_TRUE; 2143 mat->insertmode = NOT_SET_VALUES; 2144 2145 a->m = mat->m = oldmat->m; 2146 a->n = mat->n = oldmat->n; 2147 a->M = mat->M = oldmat->M; 2148 a->N = mat->N = oldmat->N; 2149 2150 a->bs = oldmat->bs; 2151 a->bs2 = oldmat->bs2; 2152 a->mbs = oldmat->mbs; 2153 a->nbs = oldmat->nbs; 2154 a->Mbs = oldmat->Mbs; 2155 a->Nbs = oldmat->Nbs; 2156 2157 a->rstart = oldmat->rstart; 2158 a->rend = oldmat->rend; 2159 a->cstart = oldmat->cstart; 2160 a->cend = oldmat->cend; 2161 a->size = oldmat->size; 2162 a->rank = oldmat->rank; 2163 a->donotstash = oldmat->donotstash; 2164 a->roworiented = oldmat->roworiented; 2165 a->rowindices = 0; 2166 a->rowvalues = 0; 2167 a->getrowactive = PETSC_FALSE; 2168 a->barray = 0; 2169 a->rstart_bs = oldmat->rstart_bs; 2170 a->rend_bs = oldmat->rend_bs; 2171 a->cstart_bs = oldmat->cstart_bs; 2172 a->cend_bs = oldmat->cend_bs; 2173 2174 /* hash table stuff */ 2175 a->ht = 0; 2176 a->hd = 0; 2177 a->ht_size = 0; 2178 a->ht_flag = oldmat->ht_flag; 2179 a->ht_fact = oldmat->ht_fact; 2180 a->ht_total_ct = 0; 2181 a->ht_insert_ct = 0; 2182 2183 2184 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2185 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2186 a->cowners = a->rowners + a->size + 2; 2187 a->rowners_bs = a->cowners + a->size + 2; 2188 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2189 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2190 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2191 if (oldmat->colmap) { 2192 #if defined (USE_CTABLE) 2193 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2194 #else 2195 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2196 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2197 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2198 #endif 2199 } else a->colmap = 0; 2200 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2201 a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2202 PLogObjectMemory(mat,len*sizeof(int)); 2203 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2204 } else a->garray = 0; 2205 2206 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2207 PLogObjectParent(mat,a->lvec); 2208 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2209 2210 PLogObjectParent(mat,a->Mvctx); 2211 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2212 PLogObjectParent(mat,a->A); 2213 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2214 PLogObjectParent(mat,a->B); 2215 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2216 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2217 if (flg) { 2218 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2219 } 2220 *newmat = mat; 2221 PetscFunctionReturn(0); 2222 } 2223 2224 #include "sys.h" 2225 2226 #undef __FUNC__ 2227 #define __FUNC__ "MatLoad_MPIBAIJ" 2228 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2229 { 2230 Mat A; 2231 int i, nz, ierr, j,rstart, rend, fd; 2232 Scalar *vals,*buf; 2233 MPI_Comm comm = ((PetscObject)viewer)->comm; 2234 MPI_Status status; 2235 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2236 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2237 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2238 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2239 int dcount,kmax,k,nzcount,tmp; 2240 2241 PetscFunctionBegin; 2242 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2243 2244 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2245 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2246 if (!rank) { 2247 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2248 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2249 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2250 if (header[3] < 0) { 2251 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2252 } 2253 } 2254 2255 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2256 M = header[1]; N = header[2]; 2257 2258 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2259 2260 /* 2261 This code adds extra rows to make sure the number of rows is 2262 divisible by the blocksize 2263 */ 2264 Mbs = M/bs; 2265 extra_rows = bs - M + bs*(Mbs); 2266 if (extra_rows == bs) extra_rows = 0; 2267 else Mbs++; 2268 if (extra_rows &&!rank) { 2269 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2270 } 2271 2272 /* determine ownership of all rows */ 2273 mbs = Mbs/size + ((Mbs % size) > rank); 2274 m = mbs * bs; 2275 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2276 browners = rowners + size + 1; 2277 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2278 rowners[0] = 0; 2279 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2280 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2281 rstart = rowners[rank]; 2282 rend = rowners[rank+1]; 2283 2284 /* distribute row lengths to all processors */ 2285 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens); 2286 if (!rank) { 2287 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths); 2288 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2289 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2290 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2291 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2292 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2293 PetscFree(sndcounts); 2294 } else { 2295 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2296 } 2297 2298 if (!rank) { 2299 /* calculate the number of nonzeros on each processor */ 2300 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2301 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2302 for ( i=0; i<size; i++ ) { 2303 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2304 procsnz[i] += rowlengths[j]; 2305 } 2306 } 2307 PetscFree(rowlengths); 2308 2309 /* determine max buffer needed and allocate it */ 2310 maxnz = 0; 2311 for ( i=0; i<size; i++ ) { 2312 maxnz = PetscMax(maxnz,procsnz[i]); 2313 } 2314 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2315 2316 /* read in my part of the matrix column indices */ 2317 nz = procsnz[0]; 2318 ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2319 mycols = ibuf; 2320 if (size == 1) nz -= extra_rows; 2321 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2322 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2323 2324 /* read in every ones (except the last) and ship off */ 2325 for ( i=1; i<size-1; i++ ) { 2326 nz = procsnz[i]; 2327 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2328 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2329 } 2330 /* read in the stuff for the last proc */ 2331 if ( size != 1 ) { 2332 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2333 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2334 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2335 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2336 } 2337 PetscFree(cols); 2338 } else { 2339 /* determine buffer space needed for message */ 2340 nz = 0; 2341 for ( i=0; i<m; i++ ) { 2342 nz += locrowlens[i]; 2343 } 2344 ibuf = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2345 mycols = ibuf; 2346 /* receive message of column indices*/ 2347 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2348 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2349 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2350 } 2351 2352 /* loop over local rows, determining number of off diagonal entries */ 2353 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens); 2354 odlens = dlens + (rend-rstart); 2355 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask); 2356 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2357 masked1 = mask + Mbs; 2358 masked2 = masked1 + Mbs; 2359 rowcount = 0; nzcount = 0; 2360 for ( i=0; i<mbs; i++ ) { 2361 dcount = 0; 2362 odcount = 0; 2363 for ( j=0; j<bs; j++ ) { 2364 kmax = locrowlens[rowcount]; 2365 for ( k=0; k<kmax; k++ ) { 2366 tmp = mycols[nzcount++]/bs; 2367 if (!mask[tmp]) { 2368 mask[tmp] = 1; 2369 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2370 else masked1[dcount++] = tmp; 2371 } 2372 } 2373 rowcount++; 2374 } 2375 2376 dlens[i] = dcount; 2377 odlens[i] = odcount; 2378 2379 /* zero out the mask elements we set */ 2380 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2381 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2382 } 2383 2384 /* create our matrix */ 2385 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2386 A = *newmat; 2387 MatSetOption(A,MAT_COLUMNS_SORTED); 2388 2389 if (!rank) { 2390 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf); 2391 /* read in my part of the matrix numerical values */ 2392 nz = procsnz[0]; 2393 vals = buf; 2394 mycols = ibuf; 2395 if (size == 1) nz -= extra_rows; 2396 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2397 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2398 2399 /* insert into matrix */ 2400 jj = rstart*bs; 2401 for ( i=0; i<m; i++ ) { 2402 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2403 mycols += locrowlens[i]; 2404 vals += locrowlens[i]; 2405 jj++; 2406 } 2407 /* read in other processors (except the last one) and ship out */ 2408 for ( i=1; i<size-1; i++ ) { 2409 nz = procsnz[i]; 2410 vals = buf; 2411 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2412 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2413 } 2414 /* the last proc */ 2415 if ( size != 1 ){ 2416 nz = procsnz[i] - extra_rows; 2417 vals = buf; 2418 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2419 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2420 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2421 } 2422 PetscFree(procsnz); 2423 } else { 2424 /* receive numeric values */ 2425 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf); 2426 2427 /* receive message of values*/ 2428 vals = buf; 2429 mycols = ibuf; 2430 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2431 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2432 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2433 2434 /* insert into matrix */ 2435 jj = rstart*bs; 2436 for ( i=0; i<m; i++ ) { 2437 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2438 mycols += locrowlens[i]; 2439 vals += locrowlens[i]; 2440 jj++; 2441 } 2442 } 2443 PetscFree(locrowlens); 2444 PetscFree(buf); 2445 PetscFree(ibuf); 2446 PetscFree(rowners); 2447 PetscFree(dlens); 2448 PetscFree(mask); 2449 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2450 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2451 PetscFunctionReturn(0); 2452 } 2453 2454 2455 2456 #undef __FUNC__ 2457 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2458 /*@ 2459 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2460 2461 Input Parameters: 2462 . mat - the matrix 2463 . fact - factor 2464 2465 Collective on Mat 2466 2467 Level: advanced 2468 2469 Notes: 2470 This can also be set by the command line option: -mat_use_hash_table fact 2471 2472 .keywords: matrix, hashtable, factor, HT 2473 2474 .seealso: MatSetOption() 2475 @*/ 2476 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2477 { 2478 Mat_MPIBAIJ *baij; 2479 2480 PetscFunctionBegin; 2481 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2482 if (mat->type != MATMPIBAIJ) { 2483 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2484 } 2485 baij = (Mat_MPIBAIJ*) mat->data; 2486 baij->ht_fact = fact; 2487 PetscFunctionReturn(0); 2488 } 2489