1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.172 1999/06/10 16:08:12 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 EXTERN_C_BEGIN 20 #undef __FUNC__ 21 #define __FUNC__ "MatStoreValues_MPIBAIJ" 22 int MatStoreValues_MPIBAIJ(Mat mat) 23 { 24 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 25 int ierr; 26 27 PetscFunctionBegin; 28 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 29 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 EXTERN_C_END 33 34 EXTERN_C_BEGIN 35 #undef __FUNC__ 36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 37 int MatRetrieveValues_MPIBAIJ(Mat mat) 38 { 39 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 40 int ierr; 41 42 PetscFunctionBegin; 43 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 44 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 45 PetscFunctionReturn(0); 46 } 47 EXTERN_C_END 48 49 /* 50 Local utility routine that creates a mapping from the global column 51 number to the local number in the off-diagonal part of the local 52 storage of the matrix. This is done in a non scable way since the 53 length of colmap equals the global matrix length. 54 */ 55 #undef __FUNC__ 56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 57 static int CreateColmap_MPIBAIJ_Private(Mat mat) 58 { 59 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 60 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 61 int nbs = B->nbs,i,bs=B->bs,ierr; 62 63 PetscFunctionBegin; 64 #if defined (PETSC_USE_CTABLE) 65 ierr = TableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 66 for ( i=0; i<nbs; i++ ){ 67 ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 68 } 69 #else 70 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 71 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 72 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 73 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 74 #endif 75 PetscFunctionReturn(0); 76 } 77 78 #define CHUNKSIZE 10 79 80 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 81 { \ 82 \ 83 brow = row/bs; \ 84 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 85 rmax = aimax[brow]; nrow = ailen[brow]; \ 86 bcol = col/bs; \ 87 ridx = row % bs; cidx = col % bs; \ 88 low = 0; high = nrow; \ 89 while (high-low > 3) { \ 90 t = (low+high)/2; \ 91 if (rp[t] > bcol) high = t; \ 92 else low = t; \ 93 } \ 94 for ( _i=low; _i<high; _i++ ) { \ 95 if (rp[_i] > bcol) break; \ 96 if (rp[_i] == bcol) { \ 97 bap = ap + bs2*_i + bs*cidx + ridx; \ 98 if (addv == ADD_VALUES) *bap += value; \ 99 else *bap = value; \ 100 goto a_noinsert; \ 101 } \ 102 } \ 103 if (a->nonew == 1) goto a_noinsert; \ 104 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 105 if (nrow >= rmax) { \ 106 /* there is no extra room in row, therefore enlarge */ \ 107 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 108 Scalar *new_a; \ 109 \ 110 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 111 \ 112 /* malloc new storage space */ \ 113 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 114 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 115 new_j = (int *) (new_a + bs2*new_nz); \ 116 new_i = new_j + new_nz; \ 117 \ 118 /* copy over old data into new slots */ \ 119 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 120 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 121 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 122 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 123 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 124 len*sizeof(int));CHKERRQ(ierr); \ 125 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 126 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 127 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 128 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 129 /* free up old matrix storage */ \ 130 PetscFree(a->a); \ 131 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 132 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 133 a->singlemalloc = 1; \ 134 \ 135 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 136 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 137 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 138 a->maxnz += bs2*CHUNKSIZE; \ 139 a->reallocs++; \ 140 a->nz++; \ 141 } \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for ( ii=N; ii>=_i; ii-- ) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); } \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 a_noinsert:; \ 152 ailen[brow] = nrow; \ 153 } 154 155 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 156 { \ 157 \ 158 brow = row/bs; \ 159 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 160 rmax = bimax[brow]; nrow = bilen[brow]; \ 161 bcol = col/bs; \ 162 ridx = row % bs; cidx = col % bs; \ 163 low = 0; high = nrow; \ 164 while (high-low > 3) { \ 165 t = (low+high)/2; \ 166 if (rp[t] > bcol) high = t; \ 167 else low = t; \ 168 } \ 169 for ( _i=low; _i<high; _i++ ) { \ 170 if (rp[_i] > bcol) break; \ 171 if (rp[_i] == bcol) { \ 172 bap = ap + bs2*_i + bs*cidx + ridx; \ 173 if (addv == ADD_VALUES) *bap += value; \ 174 else *bap = value; \ 175 goto b_noinsert; \ 176 } \ 177 } \ 178 if (b->nonew == 1) goto b_noinsert; \ 179 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 180 if (nrow >= rmax) { \ 181 /* there is no extra room in row, therefore enlarge */ \ 182 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 183 Scalar *new_a; \ 184 \ 185 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 186 \ 187 /* malloc new storage space */ \ 188 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 189 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 190 new_j = (int *) (new_a + bs2*new_nz); \ 191 new_i = new_j + new_nz; \ 192 \ 193 /* copy over old data into new slots */ \ 194 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 195 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 196 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 197 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 198 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 199 len*sizeof(int));CHKERRQ(ierr); \ 200 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 201 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 202 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 203 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 204 /* free up old matrix storage */ \ 205 PetscFree(b->a); \ 206 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 207 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 208 b->singlemalloc = 1; \ 209 \ 210 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 211 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 212 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 213 b->maxnz += bs2*CHUNKSIZE; \ 214 b->reallocs++; \ 215 b->nz++; \ 216 } \ 217 N = nrow++ - 1; \ 218 /* shift up all the later entries in this row */ \ 219 for ( ii=N; ii>=_i; ii-- ) { \ 220 rp[ii+1] = rp[ii]; \ 221 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 222 } \ 223 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);} \ 224 rp[_i] = bcol; \ 225 ap[bs2*_i + bs*cidx + ridx] = value; \ 226 b_noinsert:; \ 227 bilen[brow] = nrow; \ 228 } 229 230 #undef __FUNC__ 231 #define __FUNC__ "MatSetValues_MPIBAIJ" 232 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 233 { 234 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 235 Scalar value; 236 int ierr,i,j,row,col; 237 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 238 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 239 int cend_orig=baij->cend_bs,bs=baij->bs; 240 241 /* Some Variables required in the macro */ 242 Mat A = baij->A; 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 244 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 245 Scalar *aa=a->a; 246 247 Mat B = baij->B; 248 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 249 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 250 Scalar *ba=b->a; 251 252 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 253 int low,high,t,ridx,cidx,bs2=a->bs2; 254 Scalar *ap,*bap; 255 256 PetscFunctionBegin; 257 for ( i=0; i<m; i++ ) { 258 if (im[i] < 0) continue; 259 #if defined(PETSC_USE_BOPT_g) 260 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 261 #endif 262 if (im[i] >= rstart_orig && im[i] < rend_orig) { 263 row = im[i] - rstart_orig; 264 for ( j=0; j<n; j++ ) { 265 if (in[j] >= cstart_orig && in[j] < cend_orig){ 266 col = in[j] - cstart_orig; 267 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 268 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 269 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 270 } else if (in[j] < 0) continue; 271 #if defined(PETSC_USE_BOPT_g) 272 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 273 #endif 274 else { 275 if (mat->was_assembled) { 276 if (!baij->colmap) { 277 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 278 } 279 #if defined (PETSC_USE_CTABLE) 280 ierr = TableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr); 281 col = col - 1 + in[j]%bs; 282 #else 283 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 284 #endif 285 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 286 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 287 col = in[j]; 288 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 289 B = baij->B; 290 b = (Mat_SeqBAIJ *) (B)->data; 291 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 292 ba=b->a; 293 } 294 } else col = in[j]; 295 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 296 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 297 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 298 } 299 } 300 } else { 301 if ( !baij->donotstash) { 302 if (roworiented) { 303 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 304 } else { 305 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 306 } 307 } 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNC__ 314 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 315 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 316 { 317 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 318 Scalar *value,*barray=baij->barray; 319 int ierr,i,j,ii,jj,row,col; 320 int roworiented = baij->roworiented,rstart=baij->rstart ; 321 int rend=baij->rend,cstart=baij->cstart,stepval; 322 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 323 324 if(!barray) { 325 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray); 326 } 327 328 if (roworiented) { 329 stepval = (n-1)*bs; 330 } else { 331 stepval = (m-1)*bs; 332 } 333 for ( i=0; i<m; i++ ) { 334 if (im[i] < 0) continue; 335 #if defined(PETSC_USE_BOPT_g) 336 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 337 #endif 338 if (im[i] >= rstart && im[i] < rend) { 339 row = im[i] - rstart; 340 for ( j=0; j<n; j++ ) { 341 /* If NumCol = 1 then a copy is not required */ 342 if ((roworiented) && (n == 1)) { 343 barray = v + i*bs2; 344 } else if((!roworiented) && (m == 1)) { 345 barray = v + j*bs2; 346 } else { /* Here a copy is required */ 347 if (roworiented) { 348 value = v + i*(stepval+bs)*bs + j*bs; 349 } else { 350 value = v + j*(stepval+bs)*bs + i*bs; 351 } 352 for ( ii=0; ii<bs; ii++,value+=stepval ) { 353 for (jj=0; jj<bs; jj++ ) { 354 *barray++ = *value++; 355 } 356 } 357 barray -=bs2; 358 } 359 360 if (in[j] >= cstart && in[j] < cend){ 361 col = in[j] - cstart; 362 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 363 } 364 else if (in[j] < 0) continue; 365 #if defined(PETSC_USE_BOPT_g) 366 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 367 #endif 368 else { 369 if (mat->was_assembled) { 370 if (!baij->colmap) { 371 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 372 } 373 374 #if defined(PETSC_USE_BOPT_g) 375 #if defined (PETSC_USE_CTABLE) 376 { int data; 377 ierr = TableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 378 if((data - 1) % bs) 379 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 380 } 381 #else 382 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 383 #endif 384 #endif 385 #if defined (PETSC_USE_CTABLE) 386 ierr = TableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 387 col = (col - 1)/bs; 388 #else 389 col = (baij->colmap[in[j]] - 1)/bs; 390 #endif 391 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 392 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 393 col = in[j]; 394 } 395 } 396 else col = in[j]; 397 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 398 } 399 } 400 } else { 401 if (!baij->donotstash) { 402 if (roworiented) { 403 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 404 } else { 405 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 406 } 407 } 408 } 409 } 410 PetscFunctionReturn(0); 411 } 412 #define HASH_KEY 0.6180339887 413 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 414 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 415 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 416 #undef __FUNC__ 417 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 418 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 419 { 420 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 421 int ierr,i,j,row,col; 422 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 423 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 424 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 425 double tmp; 426 Scalar ** HD = baij->hd,value; 427 #if defined(PETSC_USE_BOPT_g) 428 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 429 #endif 430 431 PetscFunctionBegin; 432 433 for ( i=0; i<m; i++ ) { 434 #if defined(PETSC_USE_BOPT_g) 435 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 436 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 437 #endif 438 row = im[i]; 439 if (row >= rstart_orig && row < rend_orig) { 440 for ( j=0; j<n; j++ ) { 441 col = in[j]; 442 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 443 /* Look up into the Hash Table */ 444 key = (row/bs)*Nbs+(col/bs)+1; 445 h1 = HASH(size,key,tmp); 446 447 448 idx = h1; 449 #if defined(PETSC_USE_BOPT_g) 450 insert_ct++; 451 total_ct++; 452 if (HT[idx] != key) { 453 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 454 if (idx == size) { 455 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 456 if (idx == h1) { 457 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 458 } 459 } 460 } 461 #else 462 if (HT[idx] != key) { 463 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 464 if (idx == size) { 465 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 466 if (idx == h1) { 467 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 468 } 469 } 470 } 471 #endif 472 /* A HASH table entry is found, so insert the values at the correct address */ 473 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 474 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 475 } 476 } else { 477 if (!baij->donotstash) { 478 if (roworiented) { 479 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 480 } else { 481 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 482 } 483 } 484 } 485 } 486 #if defined(PETSC_USE_BOPT_g) 487 baij->ht_total_ct = total_ct; 488 baij->ht_insert_ct = insert_ct; 489 #endif 490 PetscFunctionReturn(0); 491 } 492 493 #undef __FUNC__ 494 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 495 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 496 { 497 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 498 int ierr,i,j,ii,jj,row,col; 499 int roworiented = baij->roworiented,rstart=baij->rstart ; 500 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 501 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 502 double tmp; 503 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 504 #if defined(PETSC_USE_BOPT_g) 505 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 506 #endif 507 508 PetscFunctionBegin; 509 510 if (roworiented) { 511 stepval = (n-1)*bs; 512 } else { 513 stepval = (m-1)*bs; 514 } 515 for ( i=0; i<m; i++ ) { 516 #if defined(PETSC_USE_BOPT_g) 517 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 518 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 519 #endif 520 row = im[i]; 521 v_t = v + i*bs2; 522 if (row >= rstart && row < rend) { 523 for ( j=0; j<n; j++ ) { 524 col = in[j]; 525 526 /* Look up into the Hash Table */ 527 key = row*Nbs+col+1; 528 h1 = HASH(size,key,tmp); 529 530 idx = h1; 531 #if defined(PETSC_USE_BOPT_g) 532 total_ct++; 533 insert_ct++; 534 if (HT[idx] != key) { 535 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 536 if (idx == size) { 537 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 538 if (idx == h1) { 539 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 540 } 541 } 542 } 543 #else 544 if (HT[idx] != key) { 545 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 546 if (idx == size) { 547 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 548 if (idx == h1) { 549 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 550 } 551 } 552 } 553 #endif 554 baij_a = HD[idx]; 555 if (roworiented) { 556 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 557 /* value = v + (i*(stepval+bs)+j)*bs; */ 558 value = v_t; 559 v_t += bs; 560 if (addv == ADD_VALUES) { 561 for ( ii=0; ii<bs; ii++,value+=stepval) { 562 for ( jj=ii; jj<bs2; jj+=bs ) { 563 baij_a[jj] += *value++; 564 } 565 } 566 } else { 567 for ( ii=0; ii<bs; ii++,value+=stepval) { 568 for ( jj=ii; jj<bs2; jj+=bs ) { 569 baij_a[jj] = *value++; 570 } 571 } 572 } 573 } else { 574 value = v + j*(stepval+bs)*bs + i*bs; 575 if (addv == ADD_VALUES) { 576 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 577 for ( jj=0; jj<bs; jj++ ) { 578 baij_a[jj] += *value++; 579 } 580 } 581 } else { 582 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 583 for ( jj=0; jj<bs; jj++ ) { 584 baij_a[jj] = *value++; 585 } 586 } 587 } 588 } 589 } 590 } else { 591 if (!baij->donotstash) { 592 if (roworiented) { 593 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 594 } else { 595 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 596 } 597 } 598 } 599 } 600 #if defined(PETSC_USE_BOPT_g) 601 baij->ht_total_ct = total_ct; 602 baij->ht_insert_ct = insert_ct; 603 #endif 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNC__ 608 #define __FUNC__ "MatGetValues_MPIBAIJ" 609 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 610 { 611 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 612 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 613 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 614 615 PetscFunctionBegin; 616 for ( i=0; i<m; i++ ) { 617 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 618 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 619 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 620 row = idxm[i] - bsrstart; 621 for ( j=0; j<n; j++ ) { 622 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 623 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 624 if (idxn[j] >= bscstart && idxn[j] < bscend){ 625 col = idxn[j] - bscstart; 626 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 627 } else { 628 if (!baij->colmap) { 629 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 630 } 631 #if defined (PETSC_USE_CTABLE) 632 ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 633 data --; 634 #else 635 data = baij->colmap[idxn[j]/bs]-1; 636 #endif 637 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 638 else { 639 col = data + idxn[j]%bs; 640 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 641 } 642 } 643 } 644 } else { 645 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 646 } 647 } 648 PetscFunctionReturn(0); 649 } 650 651 #undef __FUNC__ 652 #define __FUNC__ "MatNorm_MPIBAIJ" 653 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 654 { 655 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 656 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 657 int ierr, i,bs2=baij->bs2; 658 double sum = 0.0; 659 Scalar *v; 660 661 PetscFunctionBegin; 662 if (baij->size == 1) { 663 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 664 } else { 665 if (type == NORM_FROBENIUS) { 666 v = amat->a; 667 for (i=0; i<amat->nz*bs2; i++ ) { 668 #if defined(PETSC_USE_COMPLEX) 669 sum += PetscReal(PetscConj(*v)*(*v)); v++; 670 #else 671 sum += (*v)*(*v); v++; 672 #endif 673 } 674 v = bmat->a; 675 for (i=0; i<bmat->nz*bs2; i++ ) { 676 #if defined(PETSC_USE_COMPLEX) 677 sum += PetscReal(PetscConj(*v)*(*v)); v++; 678 #else 679 sum += (*v)*(*v); v++; 680 #endif 681 } 682 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 683 *norm = sqrt(*norm); 684 } else { 685 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 686 } 687 } 688 PetscFunctionReturn(0); 689 } 690 691 692 /* 693 Creates the hash table, and sets the table 694 This table is created only once. 695 If new entried need to be added to the matrix 696 then the hash table has to be destroyed and 697 recreated. 698 */ 699 #undef __FUNC__ 700 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 701 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 702 { 703 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 704 Mat A = baij->A, B=baij->B; 705 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 706 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 707 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 708 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 709 int *HT,key; 710 Scalar **HD; 711 double tmp; 712 #if defined(PETSC_USE_BOPT_g) 713 int ct=0,max=0; 714 #endif 715 716 PetscFunctionBegin; 717 baij->ht_size=(int)(factor*nz); 718 size = baij->ht_size; 719 720 if (baij->ht) { 721 PetscFunctionReturn(0); 722 } 723 724 /* Allocate Memory for Hash Table */ 725 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd); 726 baij->ht = (int*)(baij->hd + size); 727 HD = baij->hd; 728 HT = baij->ht; 729 730 731 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 732 733 734 /* Loop Over A */ 735 for ( i=0; i<a->mbs; i++ ) { 736 for ( j=ai[i]; j<ai[i+1]; j++ ) { 737 row = i+rstart; 738 col = aj[j]+cstart; 739 740 key = row*Nbs + col + 1; 741 h1 = HASH(size,key,tmp); 742 for ( k=0; k<size; k++ ){ 743 if (HT[(h1+k)%size] == 0.0) { 744 HT[(h1+k)%size] = key; 745 HD[(h1+k)%size] = a->a + j*bs2; 746 break; 747 #if defined(PETSC_USE_BOPT_g) 748 } else { 749 ct++; 750 #endif 751 } 752 } 753 #if defined(PETSC_USE_BOPT_g) 754 if (k> max) max = k; 755 #endif 756 } 757 } 758 /* Loop Over B */ 759 for ( i=0; i<b->mbs; i++ ) { 760 for ( j=bi[i]; j<bi[i+1]; j++ ) { 761 row = i+rstart; 762 col = garray[bj[j]]; 763 key = row*Nbs + col + 1; 764 h1 = HASH(size,key,tmp); 765 for ( k=0; k<size; k++ ){ 766 if (HT[(h1+k)%size] == 0.0) { 767 HT[(h1+k)%size] = key; 768 HD[(h1+k)%size] = b->a + j*bs2; 769 break; 770 #if defined(PETSC_USE_BOPT_g) 771 } else { 772 ct++; 773 #endif 774 } 775 } 776 #if defined(PETSC_USE_BOPT_g) 777 if (k> max) max = k; 778 #endif 779 } 780 } 781 782 /* Print Summary */ 783 #if defined(PETSC_USE_BOPT_g) 784 for ( i=0,j=0; i<size; i++) 785 if (HT[i]) {j++;} 786 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 787 (j== 0)? 0.0:((double)(ct+j))/j,max); 788 #endif 789 PetscFunctionReturn(0); 790 } 791 792 #undef __FUNC__ 793 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 794 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 795 { 796 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 797 int ierr,nstash,reallocs; 798 InsertMode addv; 799 800 PetscFunctionBegin; 801 if (baij->donotstash) { 802 PetscFunctionReturn(0); 803 } 804 805 /* make sure all processors are either in INSERTMODE or ADDMODE */ 806 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 807 if (addv == (ADD_VALUES|INSERT_VALUES)) { 808 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 809 } 810 mat->insertmode = addv; /* in case this processor had no cache */ 811 812 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 813 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 814 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 815 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 816 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 817 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNC__ 822 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 823 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 824 { 825 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 826 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 827 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 828 int *row,*col,other_disassembled,r1,r2,r3; 829 Scalar *val; 830 InsertMode addv = mat->insertmode; 831 832 PetscFunctionBegin; 833 if (!baij->donotstash) { 834 while (1) { 835 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 836 if (!flg) break; 837 838 for ( i=0; i<n; ) { 839 /* Now identify the consecutive vals belonging to the same row */ 840 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 841 if (j < n) ncols = j-i; 842 else ncols = n-i; 843 /* Now assemble all these values with a single function call */ 844 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 845 i = j; 846 } 847 } 848 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 849 /* Now process the block-stash. Since the values are stashed column-oriented, 850 set the roworiented flag to column oriented, and after MatSetValues() 851 restore the original flags */ 852 r1 = baij->roworiented; 853 r2 = a->roworiented; 854 r3 = b->roworiented; 855 baij->roworiented = 0; 856 a->roworiented = 0; 857 b->roworiented = 0; 858 while (1) { 859 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 860 if (!flg) break; 861 862 for ( i=0; i<n; ) { 863 /* Now identify the consecutive vals belonging to the same row */ 864 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 865 if (j < n) ncols = j-i; 866 else ncols = n-i; 867 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 868 i = j; 869 } 870 } 871 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 872 baij->roworiented = r1; 873 a->roworiented = r2; 874 b->roworiented = r3; 875 } 876 877 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 878 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 879 880 /* determine if any processor has disassembled, if so we must 881 also disassemble ourselfs, in order that we may reassemble. */ 882 /* 883 if nonzero structure of submatrix B cannot change then we know that 884 no processor disassembled thus we can skip this stuff 885 */ 886 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 887 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 888 if (mat->was_assembled && !other_disassembled) { 889 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 890 } 891 } 892 893 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 894 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 895 } 896 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 897 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 898 899 #if defined(PETSC_USE_BOPT_g) 900 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 901 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 902 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 903 baij->ht_total_ct = 0; 904 baij->ht_insert_ct = 0; 905 } 906 #endif 907 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 908 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 909 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 910 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 911 } 912 913 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 914 PetscFunctionReturn(0); 915 } 916 917 /* 918 #undef __FUNC__ 919 #define __FUNC__ "MatView_MPIBAIJ_Binary" 920 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 921 { 922 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 923 int ierr; 924 925 PetscFunctionBegin; 926 if (baij->size == 1) { 927 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 928 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 929 PetscFunctionReturn(0); 930 } 931 */ 932 933 #undef __FUNC__ 934 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 935 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 936 { 937 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 938 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 939 FILE *fd; 940 ViewerType vtype; 941 942 PetscFunctionBegin; 943 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 944 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 945 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 946 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 947 MatInfo info; 948 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 949 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 950 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 951 ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr); 952 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 953 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 954 baij->bs,(int)info.memory); 955 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 956 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 957 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 958 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 959 fflush(fd); 960 ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr); 961 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 964 ierr = PetscPrintf(mat->comm," block size is %d\n",bs);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 } 968 969 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 970 Draw draw; 971 PetscTruth isnull; 972 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 973 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 974 } 975 976 if (size == 1) { 977 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 978 } else { 979 /* assemble the entire matrix onto first processor. */ 980 Mat A; 981 Mat_SeqBAIJ *Aloc; 982 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 983 int mbs = baij->mbs; 984 Scalar *a; 985 986 if (!rank) { 987 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 988 } else { 989 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 990 } 991 PLogObjectParent(mat,A); 992 993 /* copy over the A part */ 994 Aloc = (Mat_SeqBAIJ*) baij->A->data; 995 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 996 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 997 998 for ( i=0; i<mbs; i++ ) { 999 rvals[0] = bs*(baij->rstart + i); 1000 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1001 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1002 col = (baij->cstart+aj[j])*bs; 1003 for (k=0; k<bs; k++ ) { 1004 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1005 col++; a += bs; 1006 } 1007 } 1008 } 1009 /* copy over the B part */ 1010 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1011 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1012 for ( i=0; i<mbs; i++ ) { 1013 rvals[0] = bs*(baij->rstart + i); 1014 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1015 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1016 col = baij->garray[aj[j]]*bs; 1017 for (k=0; k<bs; k++ ) { 1018 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1019 col++; a += bs; 1020 } 1021 } 1022 } 1023 PetscFree(rvals); 1024 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1025 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1026 /* 1027 Everyone has to call to draw the matrix since the graphics waits are 1028 synchronized across all processors that share the Draw object 1029 */ 1030 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1031 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 1032 } 1033 ierr = MatDestroy(A);CHKERRQ(ierr); 1034 } 1035 PetscFunctionReturn(0); 1036 } 1037 1038 1039 1040 #undef __FUNC__ 1041 #define __FUNC__ "MatView_MPIBAIJ" 1042 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1043 { 1044 int ierr; 1045 ViewerType vtype; 1046 1047 PetscFunctionBegin; 1048 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 1049 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1050 PetscTypeCompare(vtype,SOCKET_VIEWER)|| PetscTypeCompare(vtype,BINARY_VIEWER)) { 1051 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1052 /* 1053 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1054 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1055 */ 1056 } else { 1057 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1058 } 1059 PetscFunctionReturn(0); 1060 } 1061 1062 #undef __FUNC__ 1063 #define __FUNC__ "MatDestroy_MPIBAIJ" 1064 int MatDestroy_MPIBAIJ(Mat mat) 1065 { 1066 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1067 int ierr; 1068 1069 PetscFunctionBegin; 1070 if (--mat->refct > 0) PetscFunctionReturn(0); 1071 1072 if (mat->mapping) { 1073 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1074 } 1075 if (mat->bmapping) { 1076 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1077 } 1078 if (mat->rmap) { 1079 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1080 } 1081 if (mat->cmap) { 1082 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1083 } 1084 #if defined(PETSC_USE_LOG) 1085 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1086 #endif 1087 1088 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1089 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1090 1091 PetscFree(baij->rowners); 1092 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1093 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1094 #if defined (PETSC_USE_CTABLE) 1095 if (baij->colmap) TableDelete(baij->colmap); 1096 #else 1097 if (baij->colmap) PetscFree(baij->colmap); 1098 #endif 1099 if (baij->garray) PetscFree(baij->garray); 1100 if (baij->lvec) VecDestroy(baij->lvec); 1101 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1102 if (baij->rowvalues) PetscFree(baij->rowvalues); 1103 if (baij->barray) PetscFree(baij->barray); 1104 if (baij->hd) PetscFree(baij->hd); 1105 PetscFree(baij); 1106 PLogObjectDestroy(mat); 1107 PetscHeaderDestroy(mat); 1108 PetscFunctionReturn(0); 1109 } 1110 1111 #undef __FUNC__ 1112 #define __FUNC__ "MatMult_MPIBAIJ" 1113 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1114 { 1115 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1116 int ierr, nt; 1117 1118 PetscFunctionBegin; 1119 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1120 if (nt != a->n) { 1121 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1122 } 1123 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1124 if (nt != a->m) { 1125 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1126 } 1127 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1128 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1129 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1130 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1131 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNC__ 1136 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1137 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1138 { 1139 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1140 int ierr; 1141 1142 PetscFunctionBegin; 1143 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1144 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1145 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1146 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNC__ 1151 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1152 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1153 { 1154 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1155 int ierr; 1156 1157 PetscFunctionBegin; 1158 /* do nondiagonal part */ 1159 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1160 /* send it on its way */ 1161 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1162 /* do local part */ 1163 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 1164 /* receive remote parts: note this assumes the values are not actually */ 1165 /* inserted in yy until the next line, which is true for my implementation*/ 1166 /* but is not perhaps always true. */ 1167 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 #undef __FUNC__ 1172 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1173 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1174 { 1175 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1176 int ierr; 1177 1178 PetscFunctionBegin; 1179 /* do nondiagonal part */ 1180 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1181 /* send it on its way */ 1182 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1183 /* do local part */ 1184 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1185 /* receive remote parts: note this assumes the values are not actually */ 1186 /* inserted in yy until the next line, which is true for my implementation*/ 1187 /* but is not perhaps always true. */ 1188 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 /* 1193 This only works correctly for square matrices where the subblock A->A is the 1194 diagonal block 1195 */ 1196 #undef __FUNC__ 1197 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1198 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1199 { 1200 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1201 int ierr; 1202 1203 PetscFunctionBegin; 1204 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1205 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNC__ 1210 #define __FUNC__ "MatScale_MPIBAIJ" 1211 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1212 { 1213 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1214 int ierr; 1215 1216 PetscFunctionBegin; 1217 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1218 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNC__ 1223 #define __FUNC__ "MatGetSize_MPIBAIJ" 1224 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1225 { 1226 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1227 1228 PetscFunctionBegin; 1229 if (m) *m = mat->M; 1230 if (n) *n = mat->N; 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNC__ 1235 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1236 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1237 { 1238 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1239 1240 PetscFunctionBegin; 1241 *m = mat->m; *n = mat->n; 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNC__ 1246 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1247 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1248 { 1249 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1250 1251 PetscFunctionBegin; 1252 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNC__ 1257 #define __FUNC__ "MatGetRow_MPIBAIJ" 1258 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1259 { 1260 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1261 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1262 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1263 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1264 int *cmap, *idx_p,cstart = mat->cstart; 1265 1266 PetscFunctionBegin; 1267 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1268 mat->getrowactive = PETSC_TRUE; 1269 1270 if (!mat->rowvalues && (idx || v)) { 1271 /* 1272 allocate enough space to hold information from the longest row. 1273 */ 1274 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1275 int max = 1,mbs = mat->mbs,tmp; 1276 for ( i=0; i<mbs; i++ ) { 1277 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1278 if (max < tmp) { max = tmp; } 1279 } 1280 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1281 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1282 } 1283 1284 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1285 lrow = row - brstart; 1286 1287 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1288 if (!v) {pvA = 0; pvB = 0;} 1289 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1290 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1291 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1292 nztot = nzA + nzB; 1293 1294 cmap = mat->garray; 1295 if (v || idx) { 1296 if (nztot) { 1297 /* Sort by increasing column numbers, assuming A and B already sorted */ 1298 int imark = -1; 1299 if (v) { 1300 *v = v_p = mat->rowvalues; 1301 for ( i=0; i<nzB; i++ ) { 1302 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1303 else break; 1304 } 1305 imark = i; 1306 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1307 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1308 } 1309 if (idx) { 1310 *idx = idx_p = mat->rowindices; 1311 if (imark > -1) { 1312 for ( i=0; i<imark; i++ ) { 1313 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1314 } 1315 } else { 1316 for ( i=0; i<nzB; i++ ) { 1317 if (cmap[cworkB[i]/bs] < cstart) 1318 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1319 else break; 1320 } 1321 imark = i; 1322 } 1323 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1324 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1325 } 1326 } else { 1327 if (idx) *idx = 0; 1328 if (v) *v = 0; 1329 } 1330 } 1331 *nz = nztot; 1332 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1333 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNC__ 1338 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1339 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1340 { 1341 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1342 1343 PetscFunctionBegin; 1344 if (baij->getrowactive == PETSC_FALSE) { 1345 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1346 } 1347 baij->getrowactive = PETSC_FALSE; 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNC__ 1352 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1353 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1354 { 1355 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1356 1357 PetscFunctionBegin; 1358 *bs = baij->bs; 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNC__ 1363 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1364 int MatZeroEntries_MPIBAIJ(Mat A) 1365 { 1366 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1367 int ierr; 1368 1369 PetscFunctionBegin; 1370 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1371 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNC__ 1376 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1377 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1378 { 1379 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1380 Mat A = a->A, B = a->B; 1381 int ierr; 1382 double isend[5], irecv[5]; 1383 1384 PetscFunctionBegin; 1385 info->block_size = (double)a->bs; 1386 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1387 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1388 isend[3] = info->memory; isend[4] = info->mallocs; 1389 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1390 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1391 isend[3] += info->memory; isend[4] += info->mallocs; 1392 if (flag == MAT_LOCAL) { 1393 info->nz_used = isend[0]; 1394 info->nz_allocated = isend[1]; 1395 info->nz_unneeded = isend[2]; 1396 info->memory = isend[3]; 1397 info->mallocs = isend[4]; 1398 } else if (flag == MAT_GLOBAL_MAX) { 1399 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1400 info->nz_used = irecv[0]; 1401 info->nz_allocated = irecv[1]; 1402 info->nz_unneeded = irecv[2]; 1403 info->memory = irecv[3]; 1404 info->mallocs = irecv[4]; 1405 } else if (flag == MAT_GLOBAL_SUM) { 1406 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1407 info->nz_used = irecv[0]; 1408 info->nz_allocated = irecv[1]; 1409 info->nz_unneeded = irecv[2]; 1410 info->memory = irecv[3]; 1411 info->mallocs = irecv[4]; 1412 } else { 1413 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1414 } 1415 info->rows_global = (double)a->M; 1416 info->columns_global = (double)a->N; 1417 info->rows_local = (double)a->m; 1418 info->columns_local = (double)a->N; 1419 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1420 info->fill_ratio_needed = 0; 1421 info->factor_mallocs = 0; 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNC__ 1426 #define __FUNC__ "MatSetOption_MPIBAIJ" 1427 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1428 { 1429 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1430 int ierr; 1431 1432 PetscFunctionBegin; 1433 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1434 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1435 op == MAT_COLUMNS_UNSORTED || 1436 op == MAT_COLUMNS_SORTED || 1437 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1438 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1439 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1440 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1441 } else if (op == MAT_ROW_ORIENTED) { 1442 a->roworiented = 1; 1443 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1444 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1445 } else if (op == MAT_ROWS_SORTED || 1446 op == MAT_ROWS_UNSORTED || 1447 op == MAT_SYMMETRIC || 1448 op == MAT_STRUCTURALLY_SYMMETRIC || 1449 op == MAT_YES_NEW_DIAGONALS || 1450 op == MAT_USE_HASH_TABLE) { 1451 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1452 } else if (op == MAT_COLUMN_ORIENTED) { 1453 a->roworiented = 0; 1454 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1455 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1456 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1457 a->donotstash = 1; 1458 } else if (op == MAT_NO_NEW_DIAGONALS) { 1459 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1460 } else if (op == MAT_USE_HASH_TABLE) { 1461 a->ht_flag = 1; 1462 } else { 1463 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNC__ 1469 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1470 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1471 { 1472 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1473 Mat_SeqBAIJ *Aloc; 1474 Mat B; 1475 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1476 int bs=baij->bs,mbs=baij->mbs; 1477 Scalar *a; 1478 1479 PetscFunctionBegin; 1480 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1481 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1482 CHKERRQ(ierr); 1483 1484 /* copy over the A part */ 1485 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1486 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1487 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1488 1489 for ( i=0; i<mbs; i++ ) { 1490 rvals[0] = bs*(baij->rstart + i); 1491 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1492 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1493 col = (baij->cstart+aj[j])*bs; 1494 for (k=0; k<bs; k++ ) { 1495 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1496 col++; a += bs; 1497 } 1498 } 1499 } 1500 /* copy over the B part */ 1501 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1502 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1503 for ( i=0; i<mbs; i++ ) { 1504 rvals[0] = bs*(baij->rstart + i); 1505 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1506 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1507 col = baij->garray[aj[j]]*bs; 1508 for (k=0; k<bs; k++ ) { 1509 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1510 col++; a += bs; 1511 } 1512 } 1513 } 1514 PetscFree(rvals); 1515 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1516 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1517 1518 if (matout != PETSC_NULL) { 1519 *matout = B; 1520 } else { 1521 PetscOps *Abops; 1522 MatOps Aops; 1523 1524 /* This isn't really an in-place transpose .... but free data structures from baij */ 1525 PetscFree(baij->rowners); 1526 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1527 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1528 #if defined (PETSC_USE_CTABLE) 1529 if (baij->colmap) TableDelete(baij->colmap); 1530 #else 1531 if (baij->colmap) PetscFree(baij->colmap); 1532 #endif 1533 if (baij->garray) PetscFree(baij->garray); 1534 if (baij->lvec) VecDestroy(baij->lvec); 1535 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1536 PetscFree(baij); 1537 1538 /* 1539 This is horrible, horrible code. We need to keep the 1540 A pointers for the bops and ops but copy everything 1541 else from C. 1542 */ 1543 Abops = A->bops; 1544 Aops = A->ops; 1545 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1546 A->bops = Abops; 1547 A->ops = Aops; 1548 1549 PetscHeaderDestroy(B); 1550 } 1551 PetscFunctionReturn(0); 1552 } 1553 1554 #undef __FUNC__ 1555 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1556 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1557 { 1558 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1559 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1560 int ierr,s1,s2,s3; 1561 1562 PetscFunctionBegin; 1563 if (ll) { 1564 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1565 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 1566 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1567 ierr = MatDiagonalScale(a,ll,0);CHKERRQ(ierr); 1568 ierr = MatDiagonalScale(b,ll,0);CHKERRQ(ierr); 1569 } 1570 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 #undef __FUNC__ 1575 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1576 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1577 { 1578 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1579 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1580 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1581 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1582 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1583 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1584 MPI_Comm comm = A->comm; 1585 MPI_Request *send_waits,*recv_waits; 1586 MPI_Status recv_status,*send_status; 1587 IS istmp; 1588 1589 PetscFunctionBegin; 1590 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1591 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1592 1593 /* first count number of contributors to each processor */ 1594 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 1595 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1596 procs = nprocs + size; 1597 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1598 for ( i=0; i<N; i++ ) { 1599 idx = rows[i]; 1600 found = 0; 1601 for ( j=0; j<size; j++ ) { 1602 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1603 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1604 } 1605 } 1606 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1607 } 1608 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1609 1610 /* inform other processors of number of messages and max length*/ 1611 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 1612 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1613 nrecvs = work[rank]; 1614 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1615 nmax = work[rank]; 1616 PetscFree(work); 1617 1618 /* post receives: */ 1619 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1620 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1621 for ( i=0; i<nrecvs; i++ ) { 1622 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1623 } 1624 1625 /* do sends: 1626 1) starts[i] gives the starting index in svalues for stuff going to 1627 the ith processor 1628 */ 1629 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 1630 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1631 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 1632 starts[0] = 0; 1633 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1634 for ( i=0; i<N; i++ ) { 1635 svalues[starts[owner[i]]++] = rows[i]; 1636 } 1637 ISRestoreIndices(is,&rows); 1638 1639 starts[0] = 0; 1640 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1641 count = 0; 1642 for ( i=0; i<size; i++ ) { 1643 if (procs[i]) { 1644 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1645 } 1646 } 1647 PetscFree(starts); 1648 1649 base = owners[rank]*bs; 1650 1651 /* wait on receives */ 1652 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 1653 source = lens + nrecvs; 1654 count = nrecvs; slen = 0; 1655 while (count) { 1656 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1657 /* unpack receives into our local space */ 1658 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1659 source[imdex] = recv_status.MPI_SOURCE; 1660 lens[imdex] = n; 1661 slen += n; 1662 count--; 1663 } 1664 PetscFree(recv_waits); 1665 1666 /* move the data into the send scatter */ 1667 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 1668 count = 0; 1669 for ( i=0; i<nrecvs; i++ ) { 1670 values = rvalues + i*nmax; 1671 for ( j=0; j<lens[i]; j++ ) { 1672 lrows[count++] = values[j] - base; 1673 } 1674 } 1675 PetscFree(rvalues); PetscFree(lens); 1676 PetscFree(owner); PetscFree(nprocs); 1677 1678 /* actually zap the local rows */ 1679 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1680 PLogObjectParent(A,istmp); 1681 1682 /* 1683 Zero the required rows. If the "diagonal block" of the matrix 1684 is square and the user wishes to set the diagonal we use seperate 1685 code so that MatSetValues() is not called for each diagonal allocating 1686 new memory, thus calling lots of mallocs and slowing things down. 1687 1688 Contributed by: Mathew Knepley 1689 */ 1690 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1691 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 1692 if (diag && (l->A->M == l->A->N)) { 1693 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 1694 } else if (diag) { 1695 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1696 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1697 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1698 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1699 } 1700 for ( i = 0; i < slen; i++ ) { 1701 row = lrows[i] + rstart_bs; 1702 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1703 } 1704 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1705 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1706 } else { 1707 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1708 } 1709 1710 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1711 PetscFree(lrows); 1712 1713 /* wait on sends */ 1714 if (nsends) { 1715 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1716 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1717 PetscFree(send_status); 1718 } 1719 PetscFree(send_waits); PetscFree(svalues); 1720 1721 PetscFunctionReturn(0); 1722 } 1723 1724 #undef __FUNC__ 1725 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1726 int MatPrintHelp_MPIBAIJ(Mat A) 1727 { 1728 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1729 MPI_Comm comm = A->comm; 1730 static int called = 0; 1731 int ierr; 1732 1733 PetscFunctionBegin; 1734 if (!a->rank) { 1735 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1736 } 1737 if (called) {PetscFunctionReturn(0);} else called = 1; 1738 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1739 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNC__ 1744 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1745 int MatSetUnfactored_MPIBAIJ(Mat A) 1746 { 1747 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1748 int ierr; 1749 1750 PetscFunctionBegin; 1751 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1752 PetscFunctionReturn(0); 1753 } 1754 1755 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1756 1757 #undef __FUNC__ 1758 #define __FUNC__ "MatEqual_MPIBAIJ" 1759 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag) 1760 { 1761 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data; 1762 Mat a, b, c, d; 1763 PetscTruth flg; 1764 int ierr; 1765 1766 PetscFunctionBegin; 1767 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1768 a = matA->A; b = matA->B; 1769 c = matB->A; d = matB->B; 1770 1771 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1772 if (flg == PETSC_TRUE) { 1773 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1774 } 1775 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1776 PetscFunctionReturn(0); 1777 } 1778 1779 /* -------------------------------------------------------------------*/ 1780 static struct _MatOps MatOps_Values = { 1781 MatSetValues_MPIBAIJ, 1782 MatGetRow_MPIBAIJ, 1783 MatRestoreRow_MPIBAIJ, 1784 MatMult_MPIBAIJ, 1785 MatMultAdd_MPIBAIJ, 1786 MatMultTrans_MPIBAIJ, 1787 MatMultTransAdd_MPIBAIJ, 1788 0, 1789 0, 1790 0, 1791 0, 1792 0, 1793 0, 1794 0, 1795 MatTranspose_MPIBAIJ, 1796 MatGetInfo_MPIBAIJ, 1797 MatEqual_MPIBAIJ, 1798 MatGetDiagonal_MPIBAIJ, 1799 MatDiagonalScale_MPIBAIJ, 1800 MatNorm_MPIBAIJ, 1801 MatAssemblyBegin_MPIBAIJ, 1802 MatAssemblyEnd_MPIBAIJ, 1803 0, 1804 MatSetOption_MPIBAIJ, 1805 MatZeroEntries_MPIBAIJ, 1806 MatZeroRows_MPIBAIJ, 1807 0, 1808 0, 1809 0, 1810 0, 1811 MatGetSize_MPIBAIJ, 1812 MatGetLocalSize_MPIBAIJ, 1813 MatGetOwnershipRange_MPIBAIJ, 1814 0, 1815 0, 1816 0, 1817 0, 1818 MatDuplicate_MPIBAIJ, 1819 0, 1820 0, 1821 0, 1822 0, 1823 0, 1824 MatGetSubMatrices_MPIBAIJ, 1825 MatIncreaseOverlap_MPIBAIJ, 1826 MatGetValues_MPIBAIJ, 1827 0, 1828 MatPrintHelp_MPIBAIJ, 1829 MatScale_MPIBAIJ, 1830 0, 1831 0, 1832 0, 1833 MatGetBlockSize_MPIBAIJ, 1834 0, 1835 0, 1836 0, 1837 0, 1838 0, 1839 0, 1840 MatSetUnfactored_MPIBAIJ, 1841 0, 1842 MatSetValuesBlocked_MPIBAIJ, 1843 0, 1844 0, 1845 0, 1846 MatGetMaps_Petsc}; 1847 1848 1849 EXTERN_C_BEGIN 1850 #undef __FUNC__ 1851 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1852 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1853 { 1854 PetscFunctionBegin; 1855 *a = ((Mat_MPIBAIJ *)A->data)->A; 1856 *iscopy = PETSC_FALSE; 1857 PetscFunctionReturn(0); 1858 } 1859 EXTERN_C_END 1860 1861 #undef __FUNC__ 1862 #define __FUNC__ "MatCreateMPIBAIJ" 1863 /*@C 1864 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1865 (block compressed row). For good matrix assembly performance 1866 the user should preallocate the matrix storage by setting the parameters 1867 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1868 performance can be increased by more than a factor of 50. 1869 1870 Collective on MPI_Comm 1871 1872 Input Parameters: 1873 + comm - MPI communicator 1874 . bs - size of blockk 1875 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1876 This value should be the same as the local size used in creating the 1877 y vector for the matrix-vector product y = Ax. 1878 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1879 This value should be the same as the local size used in creating the 1880 x vector for the matrix-vector product y = Ax. 1881 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1882 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1883 . d_nz - number of block nonzeros per block row in diagonal portion of local 1884 submatrix (same for all local rows) 1885 . d_nnz - array containing the number of block nonzeros in the various block rows 1886 of the in diagonal portion of the local (possibly different for each block 1887 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1888 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1889 submatrix (same for all local rows). 1890 - o_nnz - array containing the number of nonzeros in the various block rows of the 1891 off-diagonal portion of the local submatrix (possibly different for 1892 each block row) or PETSC_NULL. 1893 1894 Output Parameter: 1895 . A - the matrix 1896 1897 Options Database Keys: 1898 . -mat_no_unroll - uses code that does not unroll the loops in the 1899 block calculations (much slower) 1900 . -mat_block_size - size of the blocks to use 1901 . -mat_mpi - use the parallel matrix data structures even on one processor 1902 (defaults to using SeqBAIJ format on one processor) 1903 1904 Notes: 1905 The user MUST specify either the local or global matrix dimensions 1906 (possibly both). 1907 1908 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1909 than it must be used on all processors that share the object for that argument. 1910 1911 Storage Information: 1912 For a square global matrix we define each processor's diagonal portion 1913 to be its local rows and the corresponding columns (a square submatrix); 1914 each processor's off-diagonal portion encompasses the remainder of the 1915 local matrix (a rectangular submatrix). 1916 1917 The user can specify preallocated storage for the diagonal part of 1918 the local submatrix with either d_nz or d_nnz (not both). Set 1919 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1920 memory allocation. Likewise, specify preallocated storage for the 1921 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1922 1923 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1924 the figure below we depict these three local rows and all columns (0-11). 1925 1926 .vb 1927 0 1 2 3 4 5 6 7 8 9 10 11 1928 ------------------- 1929 row 3 | o o o d d d o o o o o o 1930 row 4 | o o o d d d o o o o o o 1931 row 5 | o o o d d d o o o o o o 1932 ------------------- 1933 .ve 1934 1935 Thus, any entries in the d locations are stored in the d (diagonal) 1936 submatrix, and any entries in the o locations are stored in the 1937 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1938 stored simply in the MATSEQBAIJ format for compressed row storage. 1939 1940 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1941 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1942 In general, for PDE problems in which most nonzeros are near the diagonal, 1943 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1944 or you will get TERRIBLE performance; see the users' manual chapter on 1945 matrices. 1946 1947 Level: intermediate 1948 1949 .keywords: matrix, block, aij, compressed row, sparse, parallel 1950 1951 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1952 @*/ 1953 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1954 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1955 { 1956 Mat B; 1957 Mat_MPIBAIJ *b; 1958 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1959 int flag1 = 0,flag2 = 0; 1960 1961 PetscFunctionBegin; 1962 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1963 1964 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1965 1966 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1967 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 1968 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1969 if (!flag1 && !flag2 && size == 1) { 1970 if (M == PETSC_DECIDE) M = m; 1971 if (N == PETSC_DECIDE) N = n; 1972 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1973 PetscFunctionReturn(0); 1974 } 1975 1976 *A = 0; 1977 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 1978 PLogObjectCreate(B); 1979 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 1980 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 1981 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1982 1983 B->ops->destroy = MatDestroy_MPIBAIJ; 1984 B->ops->view = MatView_MPIBAIJ; 1985 B->mapping = 0; 1986 B->factor = 0; 1987 B->assembled = PETSC_FALSE; 1988 1989 B->insertmode = NOT_SET_VALUES; 1990 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1991 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 1992 1993 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1994 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1995 } 1996 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1997 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1998 } 1999 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2000 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2001 } 2002 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2003 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2004 2005 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2006 work[0] = m; work[1] = n; 2007 mbs = m/bs; nbs = n/bs; 2008 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2009 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2010 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2011 } 2012 if (m == PETSC_DECIDE) { 2013 Mbs = M/bs; 2014 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2015 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2016 m = mbs*bs; 2017 } 2018 if (n == PETSC_DECIDE) { 2019 Nbs = N/bs; 2020 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2021 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2022 n = nbs*bs; 2023 } 2024 if (mbs*bs != m || nbs*bs != n) { 2025 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2026 } 2027 2028 b->m = m; B->m = m; 2029 b->n = n; B->n = n; 2030 b->N = N; B->N = N; 2031 b->M = M; B->M = M; 2032 b->bs = bs; 2033 b->bs2 = bs*bs; 2034 b->mbs = mbs; 2035 b->nbs = nbs; 2036 b->Mbs = Mbs; 2037 b->Nbs = Nbs; 2038 2039 /* the information in the maps duplicates the information computed below, eventually 2040 we should remove the duplicate information that is not contained in the maps */ 2041 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2042 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2043 2044 /* build local table of row and column ownerships */ 2045 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2046 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2047 b->cowners = b->rowners + b->size + 2; 2048 b->rowners_bs = b->cowners + b->size + 2; 2049 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2050 b->rowners[0] = 0; 2051 for ( i=2; i<=b->size; i++ ) { 2052 b->rowners[i] += b->rowners[i-1]; 2053 } 2054 for ( i=0; i<=b->size; i++ ) { 2055 b->rowners_bs[i] = b->rowners[i]*bs; 2056 } 2057 b->rstart = b->rowners[b->rank]; 2058 b->rend = b->rowners[b->rank+1]; 2059 b->rstart_bs = b->rstart * bs; 2060 b->rend_bs = b->rend * bs; 2061 2062 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2063 b->cowners[0] = 0; 2064 for ( i=2; i<=b->size; i++ ) { 2065 b->cowners[i] += b->cowners[i-1]; 2066 } 2067 b->cstart = b->cowners[b->rank]; 2068 b->cend = b->cowners[b->rank+1]; 2069 b->cstart_bs = b->cstart * bs; 2070 b->cend_bs = b->cend * bs; 2071 2072 2073 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2074 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2075 PLogObjectParent(B,b->A); 2076 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2077 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2078 PLogObjectParent(B,b->B); 2079 2080 /* build cache for off array entries formed */ 2081 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2082 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2083 b->donotstash = 0; 2084 b->colmap = 0; 2085 b->garray = 0; 2086 b->roworiented = 1; 2087 2088 /* stuff used in block assembly */ 2089 b->barray = 0; 2090 2091 /* stuff used for matrix vector multiply */ 2092 b->lvec = 0; 2093 b->Mvctx = 0; 2094 2095 /* stuff for MatGetRow() */ 2096 b->rowindices = 0; 2097 b->rowvalues = 0; 2098 b->getrowactive = PETSC_FALSE; 2099 2100 /* hash table stuff */ 2101 b->ht = 0; 2102 b->hd = 0; 2103 b->ht_size = 0; 2104 b->ht_flag = 0; 2105 b->ht_fact = 0; 2106 b->ht_total_ct = 0; 2107 b->ht_insert_ct = 0; 2108 2109 *A = B; 2110 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2111 if (flg) { 2112 double fact = 1.39; 2113 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2114 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr); 2115 if (fact <= 1.0) fact = 1.39; 2116 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2117 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2118 } 2119 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 2120 "MatStoreValues_MPIBAIJ", 2121 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2122 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 2123 "MatRetrieveValues_MPIBAIJ", 2124 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2125 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2126 "MatGetDiagonalBlock_MPIBAIJ", 2127 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2128 PetscFunctionReturn(0); 2129 } 2130 2131 #undef __FUNC__ 2132 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2133 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2134 { 2135 Mat mat; 2136 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2137 int ierr, len=0, flg; 2138 2139 PetscFunctionBegin; 2140 *newmat = 0; 2141 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2142 PLogObjectCreate(mat); 2143 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2144 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2145 mat->ops->destroy = MatDestroy_MPIBAIJ; 2146 mat->ops->view = MatView_MPIBAIJ; 2147 mat->factor = matin->factor; 2148 mat->assembled = PETSC_TRUE; 2149 mat->insertmode = NOT_SET_VALUES; 2150 2151 a->m = mat->m = oldmat->m; 2152 a->n = mat->n = oldmat->n; 2153 a->M = mat->M = oldmat->M; 2154 a->N = mat->N = oldmat->N; 2155 2156 a->bs = oldmat->bs; 2157 a->bs2 = oldmat->bs2; 2158 a->mbs = oldmat->mbs; 2159 a->nbs = oldmat->nbs; 2160 a->Mbs = oldmat->Mbs; 2161 a->Nbs = oldmat->Nbs; 2162 2163 a->rstart = oldmat->rstart; 2164 a->rend = oldmat->rend; 2165 a->cstart = oldmat->cstart; 2166 a->cend = oldmat->cend; 2167 a->size = oldmat->size; 2168 a->rank = oldmat->rank; 2169 a->donotstash = oldmat->donotstash; 2170 a->roworiented = oldmat->roworiented; 2171 a->rowindices = 0; 2172 a->rowvalues = 0; 2173 a->getrowactive = PETSC_FALSE; 2174 a->barray = 0; 2175 a->rstart_bs = oldmat->rstart_bs; 2176 a->rend_bs = oldmat->rend_bs; 2177 a->cstart_bs = oldmat->cstart_bs; 2178 a->cend_bs = oldmat->cend_bs; 2179 2180 /* hash table stuff */ 2181 a->ht = 0; 2182 a->hd = 0; 2183 a->ht_size = 0; 2184 a->ht_flag = oldmat->ht_flag; 2185 a->ht_fact = oldmat->ht_fact; 2186 a->ht_total_ct = 0; 2187 a->ht_insert_ct = 0; 2188 2189 2190 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2191 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2192 a->cowners = a->rowners + a->size + 2; 2193 a->rowners_bs = a->cowners + a->size + 2; 2194 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2195 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2196 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2197 if (oldmat->colmap) { 2198 #if defined (PETSC_USE_CTABLE) 2199 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2200 #else 2201 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2202 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2203 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2204 #endif 2205 } else a->colmap = 0; 2206 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2207 a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2208 PLogObjectMemory(mat,len*sizeof(int)); 2209 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2210 } else a->garray = 0; 2211 2212 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2213 PLogObjectParent(mat,a->lvec); 2214 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2215 2216 PLogObjectParent(mat,a->Mvctx); 2217 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2218 PLogObjectParent(mat,a->A); 2219 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2220 PLogObjectParent(mat,a->B); 2221 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2222 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2223 if (flg) { 2224 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2225 } 2226 *newmat = mat; 2227 PetscFunctionReturn(0); 2228 } 2229 2230 #include "sys.h" 2231 2232 #undef __FUNC__ 2233 #define __FUNC__ "MatLoad_MPIBAIJ" 2234 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2235 { 2236 Mat A; 2237 int i, nz, ierr, j,rstart, rend, fd; 2238 Scalar *vals,*buf; 2239 MPI_Comm comm = ((PetscObject)viewer)->comm; 2240 MPI_Status status; 2241 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2242 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2243 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2244 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2245 int dcount,kmax,k,nzcount,tmp; 2246 2247 PetscFunctionBegin; 2248 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2249 2250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2251 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2252 if (!rank) { 2253 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2254 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2255 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2256 if (header[3] < 0) { 2257 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2258 } 2259 } 2260 2261 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2262 M = header[1]; N = header[2]; 2263 2264 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2265 2266 /* 2267 This code adds extra rows to make sure the number of rows is 2268 divisible by the blocksize 2269 */ 2270 Mbs = M/bs; 2271 extra_rows = bs - M + bs*(Mbs); 2272 if (extra_rows == bs) extra_rows = 0; 2273 else Mbs++; 2274 if (extra_rows &&!rank) { 2275 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2276 } 2277 2278 /* determine ownership of all rows */ 2279 mbs = Mbs/size + ((Mbs % size) > rank); 2280 m = mbs * bs; 2281 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2282 browners = rowners + size + 1; 2283 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2284 rowners[0] = 0; 2285 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2286 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2287 rstart = rowners[rank]; 2288 rend = rowners[rank+1]; 2289 2290 /* distribute row lengths to all processors */ 2291 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens); 2292 if (!rank) { 2293 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths); 2294 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2295 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2296 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2297 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2298 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2299 PetscFree(sndcounts); 2300 } else { 2301 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2302 } 2303 2304 if (!rank) { 2305 /* calculate the number of nonzeros on each processor */ 2306 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2307 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2308 for ( i=0; i<size; i++ ) { 2309 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2310 procsnz[i] += rowlengths[j]; 2311 } 2312 } 2313 PetscFree(rowlengths); 2314 2315 /* determine max buffer needed and allocate it */ 2316 maxnz = 0; 2317 for ( i=0; i<size; i++ ) { 2318 maxnz = PetscMax(maxnz,procsnz[i]); 2319 } 2320 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2321 2322 /* read in my part of the matrix column indices */ 2323 nz = procsnz[0]; 2324 ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2325 mycols = ibuf; 2326 if (size == 1) nz -= extra_rows; 2327 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2328 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2329 2330 /* read in every ones (except the last) and ship off */ 2331 for ( i=1; i<size-1; i++ ) { 2332 nz = procsnz[i]; 2333 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2334 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2335 } 2336 /* read in the stuff for the last proc */ 2337 if ( size != 1 ) { 2338 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2339 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2340 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2341 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2342 } 2343 PetscFree(cols); 2344 } else { 2345 /* determine buffer space needed for message */ 2346 nz = 0; 2347 for ( i=0; i<m; i++ ) { 2348 nz += locrowlens[i]; 2349 } 2350 ibuf = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2351 mycols = ibuf; 2352 /* receive message of column indices*/ 2353 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2354 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2355 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2356 } 2357 2358 /* loop over local rows, determining number of off diagonal entries */ 2359 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens); 2360 odlens = dlens + (rend-rstart); 2361 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask); 2362 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2363 masked1 = mask + Mbs; 2364 masked2 = masked1 + Mbs; 2365 rowcount = 0; nzcount = 0; 2366 for ( i=0; i<mbs; i++ ) { 2367 dcount = 0; 2368 odcount = 0; 2369 for ( j=0; j<bs; j++ ) { 2370 kmax = locrowlens[rowcount]; 2371 for ( k=0; k<kmax; k++ ) { 2372 tmp = mycols[nzcount++]/bs; 2373 if (!mask[tmp]) { 2374 mask[tmp] = 1; 2375 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2376 else masked1[dcount++] = tmp; 2377 } 2378 } 2379 rowcount++; 2380 } 2381 2382 dlens[i] = dcount; 2383 odlens[i] = odcount; 2384 2385 /* zero out the mask elements we set */ 2386 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2387 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2388 } 2389 2390 /* create our matrix */ 2391 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2392 A = *newmat; 2393 MatSetOption(A,MAT_COLUMNS_SORTED); 2394 2395 if (!rank) { 2396 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf); 2397 /* read in my part of the matrix numerical values */ 2398 nz = procsnz[0]; 2399 vals = buf; 2400 mycols = ibuf; 2401 if (size == 1) nz -= extra_rows; 2402 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2403 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2404 2405 /* insert into matrix */ 2406 jj = rstart*bs; 2407 for ( i=0; i<m; i++ ) { 2408 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2409 mycols += locrowlens[i]; 2410 vals += locrowlens[i]; 2411 jj++; 2412 } 2413 /* read in other processors (except the last one) and ship out */ 2414 for ( i=1; i<size-1; i++ ) { 2415 nz = procsnz[i]; 2416 vals = buf; 2417 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2418 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2419 } 2420 /* the last proc */ 2421 if ( size != 1 ){ 2422 nz = procsnz[i] - extra_rows; 2423 vals = buf; 2424 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2425 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2426 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2427 } 2428 PetscFree(procsnz); 2429 } else { 2430 /* receive numeric values */ 2431 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf); 2432 2433 /* receive message of values*/ 2434 vals = buf; 2435 mycols = ibuf; 2436 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2437 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2438 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2439 2440 /* insert into matrix */ 2441 jj = rstart*bs; 2442 for ( i=0; i<m; i++ ) { 2443 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2444 mycols += locrowlens[i]; 2445 vals += locrowlens[i]; 2446 jj++; 2447 } 2448 } 2449 PetscFree(locrowlens); 2450 PetscFree(buf); 2451 PetscFree(ibuf); 2452 PetscFree(rowners); 2453 PetscFree(dlens); 2454 PetscFree(mask); 2455 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2456 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2457 PetscFunctionReturn(0); 2458 } 2459 2460 2461 2462 #undef __FUNC__ 2463 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2464 /*@ 2465 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2466 2467 Input Parameters: 2468 . mat - the matrix 2469 . fact - factor 2470 2471 Collective on Mat 2472 2473 Level: advanced 2474 2475 Notes: 2476 This can also be set by the command line option: -mat_use_hash_table fact 2477 2478 .keywords: matrix, hashtable, factor, HT 2479 2480 .seealso: MatSetOption() 2481 @*/ 2482 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2483 { 2484 Mat_MPIBAIJ *baij; 2485 2486 PetscFunctionBegin; 2487 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2488 if (mat->type != MATMPIBAIJ) { 2489 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2490 } 2491 baij = (Mat_MPIBAIJ*) mat->data; 2492 baij->ht_fact = fact; 2493 PetscFunctionReturn(0); 2494 } 2495