1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.171 1999/05/12 03:29:54 bsmith Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 EXTERN_C_BEGIN 20 #undef __FUNC__ 21 #define __FUNC__ "MatStoreValues_MPIBAIJ" 22 int MatStoreValues_MPIBAIJ(Mat mat) 23 { 24 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 25 int ierr; 26 27 PetscFunctionBegin; 28 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 29 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 EXTERN_C_END 33 34 EXTERN_C_BEGIN 35 #undef __FUNC__ 36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 37 int MatRetrieveValues_MPIBAIJ(Mat mat) 38 { 39 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 40 int ierr; 41 42 PetscFunctionBegin; 43 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 44 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 45 PetscFunctionReturn(0); 46 } 47 EXTERN_C_END 48 49 /* 50 Local utility routine that creates a mapping from the global column 51 number to the local number in the off-diagonal part of the local 52 storage of the matrix. This is done in a non scable way since the 53 length of colmap equals the global matrix length. 54 */ 55 #undef __FUNC__ 56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 57 static int CreateColmap_MPIBAIJ_Private(Mat mat) 58 { 59 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 60 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 61 int nbs = B->nbs,i,bs=B->bs,ierr; 62 63 PetscFunctionBegin; 64 #if defined (PETSC_USE_CTABLE) 65 ierr = TableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 66 for ( i=0; i<nbs; i++ ){ 67 ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 68 } 69 #else 70 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 71 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 72 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 73 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 74 #endif 75 PetscFunctionReturn(0); 76 } 77 78 #define CHUNKSIZE 10 79 80 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 81 { \ 82 \ 83 brow = row/bs; \ 84 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 85 rmax = aimax[brow]; nrow = ailen[brow]; \ 86 bcol = col/bs; \ 87 ridx = row % bs; cidx = col % bs; \ 88 low = 0; high = nrow; \ 89 while (high-low > 3) { \ 90 t = (low+high)/2; \ 91 if (rp[t] > bcol) high = t; \ 92 else low = t; \ 93 } \ 94 for ( _i=low; _i<high; _i++ ) { \ 95 if (rp[_i] > bcol) break; \ 96 if (rp[_i] == bcol) { \ 97 bap = ap + bs2*_i + bs*cidx + ridx; \ 98 if (addv == ADD_VALUES) *bap += value; \ 99 else *bap = value; \ 100 goto a_noinsert; \ 101 } \ 102 } \ 103 if (a->nonew == 1) goto a_noinsert; \ 104 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 105 if (nrow >= rmax) { \ 106 /* there is no extra room in row, therefore enlarge */ \ 107 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 108 Scalar *new_a; \ 109 \ 110 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 111 \ 112 /* malloc new storage space */ \ 113 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 114 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 115 new_j = (int *) (new_a + bs2*new_nz); \ 116 new_i = new_j + new_nz; \ 117 \ 118 /* copy over old data into new slots */ \ 119 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 120 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 121 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 122 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 123 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 124 len*sizeof(int));CHKERRQ(ierr); \ 125 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 126 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 127 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 128 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 129 /* free up old matrix storage */ \ 130 PetscFree(a->a); \ 131 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 132 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 133 a->singlemalloc = 1; \ 134 \ 135 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 136 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 137 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 138 a->maxnz += bs2*CHUNKSIZE; \ 139 a->reallocs++; \ 140 a->nz++; \ 141 } \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for ( ii=N; ii>=_i; ii-- ) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); } \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 a_noinsert:; \ 152 ailen[brow] = nrow; \ 153 } 154 155 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 156 { \ 157 \ 158 brow = row/bs; \ 159 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 160 rmax = bimax[brow]; nrow = bilen[brow]; \ 161 bcol = col/bs; \ 162 ridx = row % bs; cidx = col % bs; \ 163 low = 0; high = nrow; \ 164 while (high-low > 3) { \ 165 t = (low+high)/2; \ 166 if (rp[t] > bcol) high = t; \ 167 else low = t; \ 168 } \ 169 for ( _i=low; _i<high; _i++ ) { \ 170 if (rp[_i] > bcol) break; \ 171 if (rp[_i] == bcol) { \ 172 bap = ap + bs2*_i + bs*cidx + ridx; \ 173 if (addv == ADD_VALUES) *bap += value; \ 174 else *bap = value; \ 175 goto b_noinsert; \ 176 } \ 177 } \ 178 if (b->nonew == 1) goto b_noinsert; \ 179 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 180 if (nrow >= rmax) { \ 181 /* there is no extra room in row, therefore enlarge */ \ 182 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 183 Scalar *new_a; \ 184 \ 185 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 186 \ 187 /* malloc new storage space */ \ 188 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 189 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 190 new_j = (int *) (new_a + bs2*new_nz); \ 191 new_i = new_j + new_nz; \ 192 \ 193 /* copy over old data into new slots */ \ 194 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 195 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 196 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 197 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 198 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 199 len*sizeof(int));CHKERRQ(ierr); \ 200 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 201 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 202 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 203 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 204 /* free up old matrix storage */ \ 205 PetscFree(b->a); \ 206 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 207 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 208 b->singlemalloc = 1; \ 209 \ 210 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 211 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 212 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 213 b->maxnz += bs2*CHUNKSIZE; \ 214 b->reallocs++; \ 215 b->nz++; \ 216 } \ 217 N = nrow++ - 1; \ 218 /* shift up all the later entries in this row */ \ 219 for ( ii=N; ii>=_i; ii-- ) { \ 220 rp[ii+1] = rp[ii]; \ 221 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 222 } \ 223 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);} \ 224 rp[_i] = bcol; \ 225 ap[bs2*_i + bs*cidx + ridx] = value; \ 226 b_noinsert:; \ 227 bilen[brow] = nrow; \ 228 } 229 230 #undef __FUNC__ 231 #define __FUNC__ "MatSetValues_MPIBAIJ" 232 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 233 { 234 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 235 Scalar value; 236 int ierr,i,j,row,col; 237 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 238 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 239 int cend_orig=baij->cend_bs,bs=baij->bs; 240 241 /* Some Variables required in the macro */ 242 Mat A = baij->A; 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 244 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 245 Scalar *aa=a->a; 246 247 Mat B = baij->B; 248 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 249 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 250 Scalar *ba=b->a; 251 252 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 253 int low,high,t,ridx,cidx,bs2=a->bs2; 254 Scalar *ap,*bap; 255 256 PetscFunctionBegin; 257 for ( i=0; i<m; i++ ) { 258 if (im[i] < 0) continue; 259 #if defined(PETSC_USE_BOPT_g) 260 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 261 #endif 262 if (im[i] >= rstart_orig && im[i] < rend_orig) { 263 row = im[i] - rstart_orig; 264 for ( j=0; j<n; j++ ) { 265 if (in[j] >= cstart_orig && in[j] < cend_orig){ 266 col = in[j] - cstart_orig; 267 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 268 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 269 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 270 } else if (in[j] < 0) continue; 271 #if defined(PETSC_USE_BOPT_g) 272 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 273 #endif 274 else { 275 if (mat->was_assembled) { 276 if (!baij->colmap) { 277 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 278 } 279 #if defined (PETSC_USE_CTABLE) 280 ierr = TableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr); 281 col = col - 1 + in[j]%bs; 282 #else 283 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 284 #endif 285 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 286 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 287 col = in[j]; 288 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 289 B = baij->B; 290 b = (Mat_SeqBAIJ *) (B)->data; 291 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 292 ba=b->a; 293 } 294 } else col = in[j]; 295 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 296 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 297 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 298 } 299 } 300 } else { 301 if ( !baij->donotstash) { 302 if (roworiented) { 303 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 304 } else { 305 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 306 } 307 } 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNC__ 314 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 315 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 316 { 317 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 318 Scalar *value,*barray=baij->barray; 319 int ierr,i,j,ii,jj,row,col; 320 int roworiented = baij->roworiented,rstart=baij->rstart ; 321 int rend=baij->rend,cstart=baij->cstart,stepval; 322 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 323 324 if(!barray) { 325 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray); 326 } 327 328 if (roworiented) { 329 stepval = (n-1)*bs; 330 } else { 331 stepval = (m-1)*bs; 332 } 333 for ( i=0; i<m; i++ ) { 334 if (im[i] < 0) continue; 335 #if defined(PETSC_USE_BOPT_g) 336 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 337 #endif 338 if (im[i] >= rstart && im[i] < rend) { 339 row = im[i] - rstart; 340 for ( j=0; j<n; j++ ) { 341 /* If NumCol = 1 then a copy is not required */ 342 if ((roworiented) && (n == 1)) { 343 barray = v + i*bs2; 344 } else if((!roworiented) && (m == 1)) { 345 barray = v + j*bs2; 346 } else { /* Here a copy is required */ 347 if (roworiented) { 348 value = v + i*(stepval+bs)*bs + j*bs; 349 } else { 350 value = v + j*(stepval+bs)*bs + i*bs; 351 } 352 for ( ii=0; ii<bs; ii++,value+=stepval ) { 353 for (jj=0; jj<bs; jj++ ) { 354 *barray++ = *value++; 355 } 356 } 357 barray -=bs2; 358 } 359 360 if (in[j] >= cstart && in[j] < cend){ 361 col = in[j] - cstart; 362 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 363 } 364 else if (in[j] < 0) continue; 365 #if defined(PETSC_USE_BOPT_g) 366 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 367 #endif 368 else { 369 if (mat->was_assembled) { 370 if (!baij->colmap) { 371 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 372 } 373 374 #if defined(PETSC_USE_BOPT_g) 375 #if defined (PETSC_USE_CTABLE) 376 { int data; 377 ierr = TableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 378 if((data - 1) % bs) 379 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 380 } 381 #else 382 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 383 #endif 384 #endif 385 #if defined (PETSC_USE_CTABLE) 386 ierr = TableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 387 col = (col - 1)/bs; 388 #else 389 col = (baij->colmap[in[j]] - 1)/bs; 390 #endif 391 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 392 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 393 col = in[j]; 394 } 395 } 396 else col = in[j]; 397 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 398 } 399 } 400 } else { 401 if (!baij->donotstash) { 402 if (roworiented) { 403 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 404 } else { 405 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 406 } 407 } 408 } 409 } 410 PetscFunctionReturn(0); 411 } 412 #define HASH_KEY 0.6180339887 413 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 414 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 415 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 416 #undef __FUNC__ 417 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 418 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 419 { 420 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 421 int ierr,i,j,row,col; 422 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 423 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 424 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 425 double tmp; 426 Scalar ** HD = baij->hd,value; 427 #if defined(PETSC_USE_BOPT_g) 428 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 429 #endif 430 431 PetscFunctionBegin; 432 433 for ( i=0; i<m; i++ ) { 434 #if defined(PETSC_USE_BOPT_g) 435 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 436 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 437 #endif 438 row = im[i]; 439 if (row >= rstart_orig && row < rend_orig) { 440 for ( j=0; j<n; j++ ) { 441 col = in[j]; 442 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 443 /* Look up into the Hash Table */ 444 key = (row/bs)*Nbs+(col/bs)+1; 445 h1 = HASH(size,key,tmp); 446 447 448 idx = h1; 449 #if defined(PETSC_USE_BOPT_g) 450 insert_ct++; 451 total_ct++; 452 if (HT[idx] != key) { 453 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 454 if (idx == size) { 455 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 456 if (idx == h1) { 457 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 458 } 459 } 460 } 461 #else 462 if (HT[idx] != key) { 463 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 464 if (idx == size) { 465 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 466 if (idx == h1) { 467 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 468 } 469 } 470 } 471 #endif 472 /* A HASH table entry is found, so insert the values at the correct address */ 473 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 474 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 475 } 476 } else { 477 if (!baij->donotstash) { 478 if (roworiented) { 479 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 480 } else { 481 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 482 } 483 } 484 } 485 } 486 #if defined(PETSC_USE_BOPT_g) 487 baij->ht_total_ct = total_ct; 488 baij->ht_insert_ct = insert_ct; 489 #endif 490 PetscFunctionReturn(0); 491 } 492 493 #undef __FUNC__ 494 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 495 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 496 { 497 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 498 int ierr,i,j,ii,jj,row,col; 499 int roworiented = baij->roworiented,rstart=baij->rstart ; 500 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 501 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 502 double tmp; 503 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 504 #if defined(PETSC_USE_BOPT_g) 505 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 506 #endif 507 508 PetscFunctionBegin; 509 510 if (roworiented) { 511 stepval = (n-1)*bs; 512 } else { 513 stepval = (m-1)*bs; 514 } 515 for ( i=0; i<m; i++ ) { 516 #if defined(PETSC_USE_BOPT_g) 517 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 518 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 519 #endif 520 row = im[i]; 521 v_t = v + i*bs2; 522 if (row >= rstart && row < rend) { 523 for ( j=0; j<n; j++ ) { 524 col = in[j]; 525 526 /* Look up into the Hash Table */ 527 key = row*Nbs+col+1; 528 h1 = HASH(size,key,tmp); 529 530 idx = h1; 531 #if defined(PETSC_USE_BOPT_g) 532 total_ct++; 533 insert_ct++; 534 if (HT[idx] != key) { 535 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 536 if (idx == size) { 537 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 538 if (idx == h1) { 539 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 540 } 541 } 542 } 543 #else 544 if (HT[idx] != key) { 545 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 546 if (idx == size) { 547 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 548 if (idx == h1) { 549 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 550 } 551 } 552 } 553 #endif 554 baij_a = HD[idx]; 555 if (roworiented) { 556 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 557 /* value = v + (i*(stepval+bs)+j)*bs; */ 558 value = v_t; 559 v_t += bs; 560 if (addv == ADD_VALUES) { 561 for ( ii=0; ii<bs; ii++,value+=stepval) { 562 for ( jj=ii; jj<bs2; jj+=bs ) { 563 baij_a[jj] += *value++; 564 } 565 } 566 } else { 567 for ( ii=0; ii<bs; ii++,value+=stepval) { 568 for ( jj=ii; jj<bs2; jj+=bs ) { 569 baij_a[jj] = *value++; 570 } 571 } 572 } 573 } else { 574 value = v + j*(stepval+bs)*bs + i*bs; 575 if (addv == ADD_VALUES) { 576 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 577 for ( jj=0; jj<bs; jj++ ) { 578 baij_a[jj] += *value++; 579 } 580 } 581 } else { 582 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 583 for ( jj=0; jj<bs; jj++ ) { 584 baij_a[jj] = *value++; 585 } 586 } 587 } 588 } 589 } 590 } else { 591 if (!baij->donotstash) { 592 if (roworiented) { 593 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 594 } else { 595 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 596 } 597 } 598 } 599 } 600 #if defined(PETSC_USE_BOPT_g) 601 baij->ht_total_ct = total_ct; 602 baij->ht_insert_ct = insert_ct; 603 #endif 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNC__ 608 #define __FUNC__ "MatGetValues_MPIBAIJ" 609 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 610 { 611 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 612 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 613 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 614 615 PetscFunctionBegin; 616 for ( i=0; i<m; i++ ) { 617 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 618 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 619 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 620 row = idxm[i] - bsrstart; 621 for ( j=0; j<n; j++ ) { 622 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 623 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 624 if (idxn[j] >= bscstart && idxn[j] < bscend){ 625 col = idxn[j] - bscstart; 626 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 627 } else { 628 if (!baij->colmap) { 629 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 630 } 631 #if defined (PETSC_USE_CTABLE) 632 ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 633 data --; 634 #else 635 data = baij->colmap[idxn[j]/bs]-1; 636 #endif 637 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 638 else { 639 col = data + idxn[j]%bs; 640 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 641 } 642 } 643 } 644 } else { 645 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 646 } 647 } 648 PetscFunctionReturn(0); 649 } 650 651 #undef __FUNC__ 652 #define __FUNC__ "MatNorm_MPIBAIJ" 653 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 654 { 655 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 656 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 657 int ierr, i,bs2=baij->bs2; 658 double sum = 0.0; 659 Scalar *v; 660 661 PetscFunctionBegin; 662 if (baij->size == 1) { 663 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 664 } else { 665 if (type == NORM_FROBENIUS) { 666 v = amat->a; 667 for (i=0; i<amat->nz*bs2; i++ ) { 668 #if defined(PETSC_USE_COMPLEX) 669 sum += PetscReal(PetscConj(*v)*(*v)); v++; 670 #else 671 sum += (*v)*(*v); v++; 672 #endif 673 } 674 v = bmat->a; 675 for (i=0; i<bmat->nz*bs2; i++ ) { 676 #if defined(PETSC_USE_COMPLEX) 677 sum += PetscReal(PetscConj(*v)*(*v)); v++; 678 #else 679 sum += (*v)*(*v); v++; 680 #endif 681 } 682 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 683 *norm = sqrt(*norm); 684 } else { 685 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 686 } 687 } 688 PetscFunctionReturn(0); 689 } 690 691 692 /* 693 Creates the hash table, and sets the table 694 This table is created only once. 695 If new entried need to be added to the matrix 696 then the hash table has to be destroyed and 697 recreated. 698 */ 699 #undef __FUNC__ 700 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 701 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 702 { 703 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 704 Mat A = baij->A, B=baij->B; 705 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 706 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 707 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 708 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 709 int *HT,key; 710 Scalar **HD; 711 double tmp; 712 #if defined(PETSC_USE_BOPT_g) 713 int ct=0,max=0; 714 #endif 715 716 PetscFunctionBegin; 717 baij->ht_size=(int)(factor*nz); 718 size = baij->ht_size; 719 720 if (baij->ht) { 721 PetscFunctionReturn(0); 722 } 723 724 /* Allocate Memory for Hash Table */ 725 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd); 726 baij->ht = (int*)(baij->hd + size); 727 HD = baij->hd; 728 HT = baij->ht; 729 730 731 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 732 733 734 /* Loop Over A */ 735 for ( i=0; i<a->mbs; i++ ) { 736 for ( j=ai[i]; j<ai[i+1]; j++ ) { 737 row = i+rstart; 738 col = aj[j]+cstart; 739 740 key = row*Nbs + col + 1; 741 h1 = HASH(size,key,tmp); 742 for ( k=0; k<size; k++ ){ 743 if (HT[(h1+k)%size] == 0.0) { 744 HT[(h1+k)%size] = key; 745 HD[(h1+k)%size] = a->a + j*bs2; 746 break; 747 #if defined(PETSC_USE_BOPT_g) 748 } else { 749 ct++; 750 #endif 751 } 752 } 753 #if defined(PETSC_USE_BOPT_g) 754 if (k> max) max = k; 755 #endif 756 } 757 } 758 /* Loop Over B */ 759 for ( i=0; i<b->mbs; i++ ) { 760 for ( j=bi[i]; j<bi[i+1]; j++ ) { 761 row = i+rstart; 762 col = garray[bj[j]]; 763 key = row*Nbs + col + 1; 764 h1 = HASH(size,key,tmp); 765 for ( k=0; k<size; k++ ){ 766 if (HT[(h1+k)%size] == 0.0) { 767 HT[(h1+k)%size] = key; 768 HD[(h1+k)%size] = b->a + j*bs2; 769 break; 770 #if defined(PETSC_USE_BOPT_g) 771 } else { 772 ct++; 773 #endif 774 } 775 } 776 #if defined(PETSC_USE_BOPT_g) 777 if (k> max) max = k; 778 #endif 779 } 780 } 781 782 /* Print Summary */ 783 #if defined(PETSC_USE_BOPT_g) 784 for ( i=0,j=0; i<size; i++) 785 if (HT[i]) {j++;} 786 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 787 (j== 0)? 0.0:((double)(ct+j))/j,max); 788 #endif 789 PetscFunctionReturn(0); 790 } 791 792 #undef __FUNC__ 793 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 794 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 795 { 796 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 797 int ierr,nstash,reallocs; 798 InsertMode addv; 799 800 PetscFunctionBegin; 801 if (baij->donotstash) { 802 PetscFunctionReturn(0); 803 } 804 805 /* make sure all processors are either in INSERTMODE or ADDMODE */ 806 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 807 if (addv == (ADD_VALUES|INSERT_VALUES)) { 808 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 809 } 810 mat->insertmode = addv; /* in case this processor had no cache */ 811 812 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 813 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 814 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 815 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 816 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 817 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNC__ 822 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 823 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 824 { 825 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 826 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 827 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 828 int *row,*col,other_disassembled,r1,r2,r3; 829 Scalar *val; 830 InsertMode addv = mat->insertmode; 831 832 PetscFunctionBegin; 833 if (!baij->donotstash) { 834 while (1) { 835 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 836 if (!flg) break; 837 838 for ( i=0; i<n; ) { 839 /* Now identify the consecutive vals belonging to the same row */ 840 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 841 if (j < n) ncols = j-i; 842 else ncols = n-i; 843 /* Now assemble all these values with a single function call */ 844 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 845 i = j; 846 } 847 } 848 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 849 /* Now process the block-stash. Since the values are stashed column-oriented, 850 set the roworiented flag to column oriented, and after MatSetValues() 851 restore the original flags */ 852 r1 = baij->roworiented; 853 r2 = a->roworiented; 854 r3 = b->roworiented; 855 baij->roworiented = 0; 856 a->roworiented = 0; 857 b->roworiented = 0; 858 while (1) { 859 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 860 if (!flg) break; 861 862 for ( i=0; i<n; ) { 863 /* Now identify the consecutive vals belonging to the same row */ 864 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 865 if (j < n) ncols = j-i; 866 else ncols = n-i; 867 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 868 i = j; 869 } 870 } 871 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 872 baij->roworiented = r1; 873 a->roworiented = r2; 874 b->roworiented = r3; 875 } 876 877 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 878 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 879 880 /* determine if any processor has disassembled, if so we must 881 also disassemble ourselfs, in order that we may reassemble. */ 882 /* 883 if nonzero structure of submatrix B cannot change then we know that 884 no processor disassembled thus we can skip this stuff 885 */ 886 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 887 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 888 if (mat->was_assembled && !other_disassembled) { 889 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 890 } 891 } 892 893 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 894 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 895 } 896 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 897 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 898 899 #if defined(PETSC_USE_BOPT_g) 900 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 901 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 902 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 903 baij->ht_total_ct = 0; 904 baij->ht_insert_ct = 0; 905 } 906 #endif 907 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 908 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 909 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 910 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 911 } 912 913 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 914 PetscFunctionReturn(0); 915 } 916 917 #undef __FUNC__ 918 #define __FUNC__ "MatView_MPIBAIJ_Binary" 919 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 920 { 921 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 922 int ierr; 923 924 PetscFunctionBegin; 925 if (baij->size == 1) { 926 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 927 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNC__ 932 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 933 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 934 { 935 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 936 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 937 FILE *fd; 938 ViewerType vtype; 939 940 PetscFunctionBegin; 941 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 942 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 943 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 944 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 945 MatInfo info; 946 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 947 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 948 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 949 ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr); 950 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 951 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 952 baij->bs,(int)info.memory); 953 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 954 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 955 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 956 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 957 fflush(fd); 958 ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr); 959 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 962 ierr = PetscPrintf(mat->comm," block size is %d\n",bs);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 } 966 967 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 968 Draw draw; 969 PetscTruth isnull; 970 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 971 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 972 } 973 974 if (size == 1) { 975 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 976 } else { 977 /* assemble the entire matrix onto first processor. */ 978 Mat A; 979 Mat_SeqBAIJ *Aloc; 980 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 981 int mbs = baij->mbs; 982 Scalar *a; 983 984 if (!rank) { 985 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 986 } else { 987 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 988 } 989 PLogObjectParent(mat,A); 990 991 /* copy over the A part */ 992 Aloc = (Mat_SeqBAIJ*) baij->A->data; 993 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 994 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 995 996 for ( i=0; i<mbs; i++ ) { 997 rvals[0] = bs*(baij->rstart + i); 998 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 999 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1000 col = (baij->cstart+aj[j])*bs; 1001 for (k=0; k<bs; k++ ) { 1002 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1003 col++; a += bs; 1004 } 1005 } 1006 } 1007 /* copy over the B part */ 1008 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1009 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1010 for ( i=0; i<mbs; i++ ) { 1011 rvals[0] = bs*(baij->rstart + i); 1012 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1013 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1014 col = baij->garray[aj[j]]*bs; 1015 for (k=0; k<bs; k++ ) { 1016 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1017 col++; a += bs; 1018 } 1019 } 1020 } 1021 PetscFree(rvals); 1022 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1023 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1024 /* 1025 Everyone has to call to draw the matrix since the graphics waits are 1026 synchronized across all processors that share the Draw object 1027 */ 1028 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1029 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 1030 } 1031 ierr = MatDestroy(A);CHKERRQ(ierr); 1032 } 1033 PetscFunctionReturn(0); 1034 } 1035 1036 1037 1038 #undef __FUNC__ 1039 #define __FUNC__ "MatView_MPIBAIJ" 1040 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1041 { 1042 int ierr; 1043 ViewerType vtype; 1044 1045 PetscFunctionBegin; 1046 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 1047 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1048 PetscTypeCompare(vtype,SOCKET_VIEWER)|| PetscTypeCompare(vtype,BINARY_VIEWER)) { 1049 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1050 /* 1051 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1052 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1053 */ 1054 } else { 1055 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1056 } 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNC__ 1061 #define __FUNC__ "MatDestroy_MPIBAIJ" 1062 int MatDestroy_MPIBAIJ(Mat mat) 1063 { 1064 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1065 int ierr; 1066 1067 PetscFunctionBegin; 1068 if (--mat->refct > 0) PetscFunctionReturn(0); 1069 1070 if (mat->mapping) { 1071 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1072 } 1073 if (mat->bmapping) { 1074 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1075 } 1076 if (mat->rmap) { 1077 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1078 } 1079 if (mat->cmap) { 1080 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1081 } 1082 #if defined(PETSC_USE_LOG) 1083 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1084 #endif 1085 1086 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1087 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1088 1089 PetscFree(baij->rowners); 1090 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1091 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1092 #if defined (PETSC_USE_CTABLE) 1093 if (baij->colmap) TableDelete(baij->colmap); 1094 #else 1095 if (baij->colmap) PetscFree(baij->colmap); 1096 #endif 1097 if (baij->garray) PetscFree(baij->garray); 1098 if (baij->lvec) VecDestroy(baij->lvec); 1099 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1100 if (baij->rowvalues) PetscFree(baij->rowvalues); 1101 if (baij->barray) PetscFree(baij->barray); 1102 if (baij->hd) PetscFree(baij->hd); 1103 PetscFree(baij); 1104 PLogObjectDestroy(mat); 1105 PetscHeaderDestroy(mat); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNC__ 1110 #define __FUNC__ "MatMult_MPIBAIJ" 1111 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1112 { 1113 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1114 int ierr, nt; 1115 1116 PetscFunctionBegin; 1117 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1118 if (nt != a->n) { 1119 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1120 } 1121 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1122 if (nt != a->m) { 1123 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1124 } 1125 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1126 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1127 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1128 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1129 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNC__ 1134 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1135 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1136 { 1137 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1138 int ierr; 1139 1140 PetscFunctionBegin; 1141 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1142 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1143 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1144 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNC__ 1149 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1150 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1151 { 1152 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1153 int ierr; 1154 1155 PetscFunctionBegin; 1156 /* do nondiagonal part */ 1157 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* inserted in yy until the next line, which is true for my implementation*/ 1164 /* but is not perhaps always true. */ 1165 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #undef __FUNC__ 1170 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1171 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1172 { 1173 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1174 int ierr; 1175 1176 PetscFunctionBegin; 1177 /* do nondiagonal part */ 1178 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1179 /* send it on its way */ 1180 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1181 /* do local part */ 1182 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1183 /* receive remote parts: note this assumes the values are not actually */ 1184 /* inserted in yy until the next line, which is true for my implementation*/ 1185 /* but is not perhaps always true. */ 1186 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 /* 1191 This only works correctly for square matrices where the subblock A->A is the 1192 diagonal block 1193 */ 1194 #undef __FUNC__ 1195 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1196 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1197 { 1198 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1199 int ierr; 1200 1201 PetscFunctionBegin; 1202 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1203 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNC__ 1208 #define __FUNC__ "MatScale_MPIBAIJ" 1209 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1210 { 1211 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1212 int ierr; 1213 1214 PetscFunctionBegin; 1215 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1216 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNC__ 1221 #define __FUNC__ "MatGetSize_MPIBAIJ" 1222 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1223 { 1224 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1225 1226 PetscFunctionBegin; 1227 if (m) *m = mat->M; 1228 if (n) *n = mat->N; 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNC__ 1233 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1234 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1235 { 1236 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1237 1238 PetscFunctionBegin; 1239 *m = mat->m; *n = mat->n; 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNC__ 1244 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1245 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1246 { 1247 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1248 1249 PetscFunctionBegin; 1250 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNC__ 1255 #define __FUNC__ "MatGetRow_MPIBAIJ" 1256 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1257 { 1258 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1259 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1260 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1261 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1262 int *cmap, *idx_p,cstart = mat->cstart; 1263 1264 PetscFunctionBegin; 1265 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1266 mat->getrowactive = PETSC_TRUE; 1267 1268 if (!mat->rowvalues && (idx || v)) { 1269 /* 1270 allocate enough space to hold information from the longest row. 1271 */ 1272 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1273 int max = 1,mbs = mat->mbs,tmp; 1274 for ( i=0; i<mbs; i++ ) { 1275 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1276 if (max < tmp) { max = tmp; } 1277 } 1278 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1279 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1280 } 1281 1282 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1283 lrow = row - brstart; 1284 1285 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1286 if (!v) {pvA = 0; pvB = 0;} 1287 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1288 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1289 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1290 nztot = nzA + nzB; 1291 1292 cmap = mat->garray; 1293 if (v || idx) { 1294 if (nztot) { 1295 /* Sort by increasing column numbers, assuming A and B already sorted */ 1296 int imark = -1; 1297 if (v) { 1298 *v = v_p = mat->rowvalues; 1299 for ( i=0; i<nzB; i++ ) { 1300 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1301 else break; 1302 } 1303 imark = i; 1304 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1305 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1306 } 1307 if (idx) { 1308 *idx = idx_p = mat->rowindices; 1309 if (imark > -1) { 1310 for ( i=0; i<imark; i++ ) { 1311 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1312 } 1313 } else { 1314 for ( i=0; i<nzB; i++ ) { 1315 if (cmap[cworkB[i]/bs] < cstart) 1316 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1317 else break; 1318 } 1319 imark = i; 1320 } 1321 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1322 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1323 } 1324 } else { 1325 if (idx) *idx = 0; 1326 if (v) *v = 0; 1327 } 1328 } 1329 *nz = nztot; 1330 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1331 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNC__ 1336 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1337 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1338 { 1339 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1340 1341 PetscFunctionBegin; 1342 if (baij->getrowactive == PETSC_FALSE) { 1343 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1344 } 1345 baij->getrowactive = PETSC_FALSE; 1346 PetscFunctionReturn(0); 1347 } 1348 1349 #undef __FUNC__ 1350 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1351 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1352 { 1353 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1354 1355 PetscFunctionBegin; 1356 *bs = baij->bs; 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNC__ 1361 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1362 int MatZeroEntries_MPIBAIJ(Mat A) 1363 { 1364 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1365 int ierr; 1366 1367 PetscFunctionBegin; 1368 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1369 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNC__ 1374 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1375 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1376 { 1377 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1378 Mat A = a->A, B = a->B; 1379 int ierr; 1380 double isend[5], irecv[5]; 1381 1382 PetscFunctionBegin; 1383 info->block_size = (double)a->bs; 1384 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1385 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1386 isend[3] = info->memory; isend[4] = info->mallocs; 1387 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1388 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1389 isend[3] += info->memory; isend[4] += info->mallocs; 1390 if (flag == MAT_LOCAL) { 1391 info->nz_used = isend[0]; 1392 info->nz_allocated = isend[1]; 1393 info->nz_unneeded = isend[2]; 1394 info->memory = isend[3]; 1395 info->mallocs = isend[4]; 1396 } else if (flag == MAT_GLOBAL_MAX) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } else if (flag == MAT_GLOBAL_SUM) { 1404 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1405 info->nz_used = irecv[0]; 1406 info->nz_allocated = irecv[1]; 1407 info->nz_unneeded = irecv[2]; 1408 info->memory = irecv[3]; 1409 info->mallocs = irecv[4]; 1410 } else { 1411 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1412 } 1413 info->rows_global = (double)a->M; 1414 info->columns_global = (double)a->N; 1415 info->rows_local = (double)a->m; 1416 info->columns_local = (double)a->N; 1417 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1418 info->fill_ratio_needed = 0; 1419 info->factor_mallocs = 0; 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNC__ 1424 #define __FUNC__ "MatSetOption_MPIBAIJ" 1425 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1426 { 1427 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1428 int ierr; 1429 1430 PetscFunctionBegin; 1431 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1432 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1433 op == MAT_COLUMNS_UNSORTED || 1434 op == MAT_COLUMNS_SORTED || 1435 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1436 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1437 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1438 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1439 } else if (op == MAT_ROW_ORIENTED) { 1440 a->roworiented = 1; 1441 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1442 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1443 } else if (op == MAT_ROWS_SORTED || 1444 op == MAT_ROWS_UNSORTED || 1445 op == MAT_SYMMETRIC || 1446 op == MAT_STRUCTURALLY_SYMMETRIC || 1447 op == MAT_YES_NEW_DIAGONALS || 1448 op == MAT_USE_HASH_TABLE) { 1449 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1450 } else if (op == MAT_COLUMN_ORIENTED) { 1451 a->roworiented = 0; 1452 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1453 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1454 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1455 a->donotstash = 1; 1456 } else if (op == MAT_NO_NEW_DIAGONALS) { 1457 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1458 } else if (op == MAT_USE_HASH_TABLE) { 1459 a->ht_flag = 1; 1460 } else { 1461 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNC__ 1467 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1468 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1469 { 1470 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1471 Mat_SeqBAIJ *Aloc; 1472 Mat B; 1473 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1474 int bs=baij->bs,mbs=baij->mbs; 1475 Scalar *a; 1476 1477 PetscFunctionBegin; 1478 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1479 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1480 CHKERRQ(ierr); 1481 1482 /* copy over the A part */ 1483 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1484 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1485 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1486 1487 for ( i=0; i<mbs; i++ ) { 1488 rvals[0] = bs*(baij->rstart + i); 1489 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1490 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1491 col = (baij->cstart+aj[j])*bs; 1492 for (k=0; k<bs; k++ ) { 1493 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1494 col++; a += bs; 1495 } 1496 } 1497 } 1498 /* copy over the B part */ 1499 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1500 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1501 for ( i=0; i<mbs; i++ ) { 1502 rvals[0] = bs*(baij->rstart + i); 1503 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1504 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1505 col = baij->garray[aj[j]]*bs; 1506 for (k=0; k<bs; k++ ) { 1507 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1508 col++; a += bs; 1509 } 1510 } 1511 } 1512 PetscFree(rvals); 1513 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1514 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1515 1516 if (matout != PETSC_NULL) { 1517 *matout = B; 1518 } else { 1519 PetscOps *Abops; 1520 MatOps Aops; 1521 1522 /* This isn't really an in-place transpose .... but free data structures from baij */ 1523 PetscFree(baij->rowners); 1524 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1525 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1526 #if defined (PETSC_USE_CTABLE) 1527 if (baij->colmap) TableDelete(baij->colmap); 1528 #else 1529 if (baij->colmap) PetscFree(baij->colmap); 1530 #endif 1531 if (baij->garray) PetscFree(baij->garray); 1532 if (baij->lvec) VecDestroy(baij->lvec); 1533 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1534 PetscFree(baij); 1535 1536 /* 1537 This is horrible, horrible code. We need to keep the 1538 A pointers for the bops and ops but copy everything 1539 else from C. 1540 */ 1541 Abops = A->bops; 1542 Aops = A->ops; 1543 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1544 A->bops = Abops; 1545 A->ops = Aops; 1546 1547 PetscHeaderDestroy(B); 1548 } 1549 PetscFunctionReturn(0); 1550 } 1551 1552 #undef __FUNC__ 1553 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1554 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1555 { 1556 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1557 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1558 int ierr,s1,s2,s3; 1559 1560 PetscFunctionBegin; 1561 if (ll) { 1562 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1563 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 1564 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1565 ierr = MatDiagonalScale(a,ll,0);CHKERRQ(ierr); 1566 ierr = MatDiagonalScale(b,ll,0);CHKERRQ(ierr); 1567 } 1568 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1569 PetscFunctionReturn(0); 1570 } 1571 1572 #undef __FUNC__ 1573 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1574 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1575 { 1576 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1577 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1578 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1579 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1580 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1581 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1582 MPI_Comm comm = A->comm; 1583 MPI_Request *send_waits,*recv_waits; 1584 MPI_Status recv_status,*send_status; 1585 IS istmp; 1586 1587 PetscFunctionBegin; 1588 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1589 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1590 1591 /* first count number of contributors to each processor */ 1592 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 1593 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1594 procs = nprocs + size; 1595 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1596 for ( i=0; i<N; i++ ) { 1597 idx = rows[i]; 1598 found = 0; 1599 for ( j=0; j<size; j++ ) { 1600 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1601 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1602 } 1603 } 1604 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1605 } 1606 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1607 1608 /* inform other processors of number of messages and max length*/ 1609 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 1610 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1611 nrecvs = work[rank]; 1612 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1613 nmax = work[rank]; 1614 PetscFree(work); 1615 1616 /* post receives: */ 1617 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1618 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1619 for ( i=0; i<nrecvs; i++ ) { 1620 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1621 } 1622 1623 /* do sends: 1624 1) starts[i] gives the starting index in svalues for stuff going to 1625 the ith processor 1626 */ 1627 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 1628 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1629 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 1630 starts[0] = 0; 1631 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1632 for ( i=0; i<N; i++ ) { 1633 svalues[starts[owner[i]]++] = rows[i]; 1634 } 1635 ISRestoreIndices(is,&rows); 1636 1637 starts[0] = 0; 1638 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1639 count = 0; 1640 for ( i=0; i<size; i++ ) { 1641 if (procs[i]) { 1642 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1643 } 1644 } 1645 PetscFree(starts); 1646 1647 base = owners[rank]*bs; 1648 1649 /* wait on receives */ 1650 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 1651 source = lens + nrecvs; 1652 count = nrecvs; slen = 0; 1653 while (count) { 1654 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1655 /* unpack receives into our local space */ 1656 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1657 source[imdex] = recv_status.MPI_SOURCE; 1658 lens[imdex] = n; 1659 slen += n; 1660 count--; 1661 } 1662 PetscFree(recv_waits); 1663 1664 /* move the data into the send scatter */ 1665 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 1666 count = 0; 1667 for ( i=0; i<nrecvs; i++ ) { 1668 values = rvalues + i*nmax; 1669 for ( j=0; j<lens[i]; j++ ) { 1670 lrows[count++] = values[j] - base; 1671 } 1672 } 1673 PetscFree(rvalues); PetscFree(lens); 1674 PetscFree(owner); PetscFree(nprocs); 1675 1676 /* actually zap the local rows */ 1677 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1678 PLogObjectParent(A,istmp); 1679 1680 /* 1681 Zero the required rows. If the "diagonal block" of the matrix 1682 is square and the user wishes to set the diagonal we use seperate 1683 code so that MatSetValues() is not called for each diagonal allocating 1684 new memory, thus calling lots of mallocs and slowing things down. 1685 1686 Contributed by: Mathew Knepley 1687 */ 1688 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1689 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 1690 if (diag && (l->A->M == l->A->N)) { 1691 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 1692 } else if (diag) { 1693 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1694 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1695 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1696 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1697 } 1698 for ( i = 0; i < slen; i++ ) { 1699 row = lrows[i] + rstart_bs; 1700 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1701 } 1702 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1703 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1704 } else { 1705 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1706 } 1707 1708 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1709 PetscFree(lrows); 1710 1711 /* wait on sends */ 1712 if (nsends) { 1713 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1714 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1715 PetscFree(send_status); 1716 } 1717 PetscFree(send_waits); PetscFree(svalues); 1718 1719 PetscFunctionReturn(0); 1720 } 1721 1722 #undef __FUNC__ 1723 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1724 int MatPrintHelp_MPIBAIJ(Mat A) 1725 { 1726 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1727 MPI_Comm comm = A->comm; 1728 static int called = 0; 1729 int ierr; 1730 1731 PetscFunctionBegin; 1732 if (!a->rank) { 1733 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1734 } 1735 if (called) {PetscFunctionReturn(0);} else called = 1; 1736 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1737 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1738 PetscFunctionReturn(0); 1739 } 1740 1741 #undef __FUNC__ 1742 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1743 int MatSetUnfactored_MPIBAIJ(Mat A) 1744 { 1745 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1746 int ierr; 1747 1748 PetscFunctionBegin; 1749 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1750 PetscFunctionReturn(0); 1751 } 1752 1753 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1754 1755 #undef __FUNC__ 1756 #define __FUNC__ "MatEqual_MPIBAIJ" 1757 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag) 1758 { 1759 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data; 1760 Mat a, b, c, d; 1761 PetscTruth flg; 1762 int ierr; 1763 1764 PetscFunctionBegin; 1765 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1766 a = matA->A; b = matA->B; 1767 c = matB->A; d = matB->B; 1768 1769 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1770 if (flg == PETSC_TRUE) { 1771 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1772 } 1773 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1774 PetscFunctionReturn(0); 1775 } 1776 1777 /* -------------------------------------------------------------------*/ 1778 static struct _MatOps MatOps_Values = { 1779 MatSetValues_MPIBAIJ, 1780 MatGetRow_MPIBAIJ, 1781 MatRestoreRow_MPIBAIJ, 1782 MatMult_MPIBAIJ, 1783 MatMultAdd_MPIBAIJ, 1784 MatMultTrans_MPIBAIJ, 1785 MatMultTransAdd_MPIBAIJ, 1786 0, 1787 0, 1788 0, 1789 0, 1790 0, 1791 0, 1792 0, 1793 MatTranspose_MPIBAIJ, 1794 MatGetInfo_MPIBAIJ, 1795 MatEqual_MPIBAIJ, 1796 MatGetDiagonal_MPIBAIJ, 1797 MatDiagonalScale_MPIBAIJ, 1798 MatNorm_MPIBAIJ, 1799 MatAssemblyBegin_MPIBAIJ, 1800 MatAssemblyEnd_MPIBAIJ, 1801 0, 1802 MatSetOption_MPIBAIJ, 1803 MatZeroEntries_MPIBAIJ, 1804 MatZeroRows_MPIBAIJ, 1805 0, 1806 0, 1807 0, 1808 0, 1809 MatGetSize_MPIBAIJ, 1810 MatGetLocalSize_MPIBAIJ, 1811 MatGetOwnershipRange_MPIBAIJ, 1812 0, 1813 0, 1814 0, 1815 0, 1816 MatDuplicate_MPIBAIJ, 1817 0, 1818 0, 1819 0, 1820 0, 1821 0, 1822 MatGetSubMatrices_MPIBAIJ, 1823 MatIncreaseOverlap_MPIBAIJ, 1824 MatGetValues_MPIBAIJ, 1825 0, 1826 MatPrintHelp_MPIBAIJ, 1827 MatScale_MPIBAIJ, 1828 0, 1829 0, 1830 0, 1831 MatGetBlockSize_MPIBAIJ, 1832 0, 1833 0, 1834 0, 1835 0, 1836 0, 1837 0, 1838 MatSetUnfactored_MPIBAIJ, 1839 0, 1840 MatSetValuesBlocked_MPIBAIJ, 1841 0, 1842 0, 1843 0, 1844 MatGetMaps_Petsc}; 1845 1846 1847 EXTERN_C_BEGIN 1848 #undef __FUNC__ 1849 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1850 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1851 { 1852 PetscFunctionBegin; 1853 *a = ((Mat_MPIBAIJ *)A->data)->A; 1854 *iscopy = PETSC_FALSE; 1855 PetscFunctionReturn(0); 1856 } 1857 EXTERN_C_END 1858 1859 #undef __FUNC__ 1860 #define __FUNC__ "MatCreateMPIBAIJ" 1861 /*@C 1862 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1863 (block compressed row). For good matrix assembly performance 1864 the user should preallocate the matrix storage by setting the parameters 1865 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1866 performance can be increased by more than a factor of 50. 1867 1868 Collective on MPI_Comm 1869 1870 Input Parameters: 1871 + comm - MPI communicator 1872 . bs - size of blockk 1873 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1874 This value should be the same as the local size used in creating the 1875 y vector for the matrix-vector product y = Ax. 1876 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1877 This value should be the same as the local size used in creating the 1878 x vector for the matrix-vector product y = Ax. 1879 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1880 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1881 . d_nz - number of block nonzeros per block row in diagonal portion of local 1882 submatrix (same for all local rows) 1883 . d_nnz - array containing the number of block nonzeros in the various block rows 1884 of the in diagonal portion of the local (possibly different for each block 1885 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1886 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1887 submatrix (same for all local rows). 1888 - o_nnz - array containing the number of nonzeros in the various block rows of the 1889 off-diagonal portion of the local submatrix (possibly different for 1890 each block row) or PETSC_NULL. 1891 1892 Output Parameter: 1893 . A - the matrix 1894 1895 Options Database Keys: 1896 . -mat_no_unroll - uses code that does not unroll the loops in the 1897 block calculations (much slower) 1898 . -mat_block_size - size of the blocks to use 1899 . -mat_mpi - use the parallel matrix data structures even on one processor 1900 (defaults to using SeqBAIJ format on one processor) 1901 1902 Notes: 1903 The user MUST specify either the local or global matrix dimensions 1904 (possibly both). 1905 1906 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1907 than it must be used on all processors that share the object for that argument. 1908 1909 Storage Information: 1910 For a square global matrix we define each processor's diagonal portion 1911 to be its local rows and the corresponding columns (a square submatrix); 1912 each processor's off-diagonal portion encompasses the remainder of the 1913 local matrix (a rectangular submatrix). 1914 1915 The user can specify preallocated storage for the diagonal part of 1916 the local submatrix with either d_nz or d_nnz (not both). Set 1917 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1918 memory allocation. Likewise, specify preallocated storage for the 1919 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1920 1921 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1922 the figure below we depict these three local rows and all columns (0-11). 1923 1924 .vb 1925 0 1 2 3 4 5 6 7 8 9 10 11 1926 ------------------- 1927 row 3 | o o o d d d o o o o o o 1928 row 4 | o o o d d d o o o o o o 1929 row 5 | o o o d d d o o o o o o 1930 ------------------- 1931 .ve 1932 1933 Thus, any entries in the d locations are stored in the d (diagonal) 1934 submatrix, and any entries in the o locations are stored in the 1935 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1936 stored simply in the MATSEQBAIJ format for compressed row storage. 1937 1938 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1939 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1940 In general, for PDE problems in which most nonzeros are near the diagonal, 1941 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1942 or you will get TERRIBLE performance; see the users' manual chapter on 1943 matrices. 1944 1945 Level: intermediate 1946 1947 .keywords: matrix, block, aij, compressed row, sparse, parallel 1948 1949 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1950 @*/ 1951 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1952 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1953 { 1954 Mat B; 1955 Mat_MPIBAIJ *b; 1956 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1957 int flag1 = 0,flag2 = 0; 1958 1959 PetscFunctionBegin; 1960 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1961 1962 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1963 1964 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1965 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 1966 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1967 if (!flag1 && !flag2 && size == 1) { 1968 if (M == PETSC_DECIDE) M = m; 1969 if (N == PETSC_DECIDE) N = n; 1970 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1971 PetscFunctionReturn(0); 1972 } 1973 1974 *A = 0; 1975 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 1976 PLogObjectCreate(B); 1977 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 1978 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 1979 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1980 1981 B->ops->destroy = MatDestroy_MPIBAIJ; 1982 B->ops->view = MatView_MPIBAIJ; 1983 B->mapping = 0; 1984 B->factor = 0; 1985 B->assembled = PETSC_FALSE; 1986 1987 B->insertmode = NOT_SET_VALUES; 1988 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1989 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 1990 1991 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1992 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1993 } 1994 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1995 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1996 } 1997 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1998 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1999 } 2000 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2001 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2002 2003 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2004 work[0] = m; work[1] = n; 2005 mbs = m/bs; nbs = n/bs; 2006 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2007 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2008 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2009 } 2010 if (m == PETSC_DECIDE) { 2011 Mbs = M/bs; 2012 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2013 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2014 m = mbs*bs; 2015 } 2016 if (n == PETSC_DECIDE) { 2017 Nbs = N/bs; 2018 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2019 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2020 n = nbs*bs; 2021 } 2022 if (mbs*bs != m || nbs*bs != n) { 2023 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2024 } 2025 2026 b->m = m; B->m = m; 2027 b->n = n; B->n = n; 2028 b->N = N; B->N = N; 2029 b->M = M; B->M = M; 2030 b->bs = bs; 2031 b->bs2 = bs*bs; 2032 b->mbs = mbs; 2033 b->nbs = nbs; 2034 b->Mbs = Mbs; 2035 b->Nbs = Nbs; 2036 2037 /* the information in the maps duplicates the information computed below, eventually 2038 we should remove the duplicate information that is not contained in the maps */ 2039 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2040 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2041 2042 /* build local table of row and column ownerships */ 2043 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2044 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2045 b->cowners = b->rowners + b->size + 2; 2046 b->rowners_bs = b->cowners + b->size + 2; 2047 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2048 b->rowners[0] = 0; 2049 for ( i=2; i<=b->size; i++ ) { 2050 b->rowners[i] += b->rowners[i-1]; 2051 } 2052 for ( i=0; i<=b->size; i++ ) { 2053 b->rowners_bs[i] = b->rowners[i]*bs; 2054 } 2055 b->rstart = b->rowners[b->rank]; 2056 b->rend = b->rowners[b->rank+1]; 2057 b->rstart_bs = b->rstart * bs; 2058 b->rend_bs = b->rend * bs; 2059 2060 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2061 b->cowners[0] = 0; 2062 for ( i=2; i<=b->size; i++ ) { 2063 b->cowners[i] += b->cowners[i-1]; 2064 } 2065 b->cstart = b->cowners[b->rank]; 2066 b->cend = b->cowners[b->rank+1]; 2067 b->cstart_bs = b->cstart * bs; 2068 b->cend_bs = b->cend * bs; 2069 2070 2071 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2072 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2073 PLogObjectParent(B,b->A); 2074 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2075 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2076 PLogObjectParent(B,b->B); 2077 2078 /* build cache for off array entries formed */ 2079 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2080 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2081 b->donotstash = 0; 2082 b->colmap = 0; 2083 b->garray = 0; 2084 b->roworiented = 1; 2085 2086 /* stuff used in block assembly */ 2087 b->barray = 0; 2088 2089 /* stuff used for matrix vector multiply */ 2090 b->lvec = 0; 2091 b->Mvctx = 0; 2092 2093 /* stuff for MatGetRow() */ 2094 b->rowindices = 0; 2095 b->rowvalues = 0; 2096 b->getrowactive = PETSC_FALSE; 2097 2098 /* hash table stuff */ 2099 b->ht = 0; 2100 b->hd = 0; 2101 b->ht_size = 0; 2102 b->ht_flag = 0; 2103 b->ht_fact = 0; 2104 b->ht_total_ct = 0; 2105 b->ht_insert_ct = 0; 2106 2107 *A = B; 2108 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2109 if (flg) { 2110 double fact = 1.39; 2111 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2112 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr); 2113 if (fact <= 1.0) fact = 1.39; 2114 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2115 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2116 } 2117 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 2118 "MatStoreValues_MPIBAIJ", 2119 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2120 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 2121 "MatRetrieveValues_MPIBAIJ", 2122 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2123 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2124 "MatGetDiagonalBlock_MPIBAIJ", 2125 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2126 PetscFunctionReturn(0); 2127 } 2128 2129 #undef __FUNC__ 2130 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2131 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2132 { 2133 Mat mat; 2134 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2135 int ierr, len=0, flg; 2136 2137 PetscFunctionBegin; 2138 *newmat = 0; 2139 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2140 PLogObjectCreate(mat); 2141 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2142 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2143 mat->ops->destroy = MatDestroy_MPIBAIJ; 2144 mat->ops->view = MatView_MPIBAIJ; 2145 mat->factor = matin->factor; 2146 mat->assembled = PETSC_TRUE; 2147 mat->insertmode = NOT_SET_VALUES; 2148 2149 a->m = mat->m = oldmat->m; 2150 a->n = mat->n = oldmat->n; 2151 a->M = mat->M = oldmat->M; 2152 a->N = mat->N = oldmat->N; 2153 2154 a->bs = oldmat->bs; 2155 a->bs2 = oldmat->bs2; 2156 a->mbs = oldmat->mbs; 2157 a->nbs = oldmat->nbs; 2158 a->Mbs = oldmat->Mbs; 2159 a->Nbs = oldmat->Nbs; 2160 2161 a->rstart = oldmat->rstart; 2162 a->rend = oldmat->rend; 2163 a->cstart = oldmat->cstart; 2164 a->cend = oldmat->cend; 2165 a->size = oldmat->size; 2166 a->rank = oldmat->rank; 2167 a->donotstash = oldmat->donotstash; 2168 a->roworiented = oldmat->roworiented; 2169 a->rowindices = 0; 2170 a->rowvalues = 0; 2171 a->getrowactive = PETSC_FALSE; 2172 a->barray = 0; 2173 a->rstart_bs = oldmat->rstart_bs; 2174 a->rend_bs = oldmat->rend_bs; 2175 a->cstart_bs = oldmat->cstart_bs; 2176 a->cend_bs = oldmat->cend_bs; 2177 2178 /* hash table stuff */ 2179 a->ht = 0; 2180 a->hd = 0; 2181 a->ht_size = 0; 2182 a->ht_flag = oldmat->ht_flag; 2183 a->ht_fact = oldmat->ht_fact; 2184 a->ht_total_ct = 0; 2185 a->ht_insert_ct = 0; 2186 2187 2188 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2189 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2190 a->cowners = a->rowners + a->size + 2; 2191 a->rowners_bs = a->cowners + a->size + 2; 2192 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2193 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2194 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2195 if (oldmat->colmap) { 2196 #if defined (PETSC_USE_CTABLE) 2197 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2198 #else 2199 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2200 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2201 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2202 #endif 2203 } else a->colmap = 0; 2204 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2205 a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2206 PLogObjectMemory(mat,len*sizeof(int)); 2207 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2208 } else a->garray = 0; 2209 2210 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2211 PLogObjectParent(mat,a->lvec); 2212 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2213 2214 PLogObjectParent(mat,a->Mvctx); 2215 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2216 PLogObjectParent(mat,a->A); 2217 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2218 PLogObjectParent(mat,a->B); 2219 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2220 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2221 if (flg) { 2222 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2223 } 2224 *newmat = mat; 2225 PetscFunctionReturn(0); 2226 } 2227 2228 #include "sys.h" 2229 2230 #undef __FUNC__ 2231 #define __FUNC__ "MatLoad_MPIBAIJ" 2232 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2233 { 2234 Mat A; 2235 int i, nz, ierr, j,rstart, rend, fd; 2236 Scalar *vals,*buf; 2237 MPI_Comm comm = ((PetscObject)viewer)->comm; 2238 MPI_Status status; 2239 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2240 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2241 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2242 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2243 int dcount,kmax,k,nzcount,tmp; 2244 2245 PetscFunctionBegin; 2246 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2247 2248 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2250 if (!rank) { 2251 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2252 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2253 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2254 if (header[3] < 0) { 2255 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2256 } 2257 } 2258 2259 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2260 M = header[1]; N = header[2]; 2261 2262 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2263 2264 /* 2265 This code adds extra rows to make sure the number of rows is 2266 divisible by the blocksize 2267 */ 2268 Mbs = M/bs; 2269 extra_rows = bs - M + bs*(Mbs); 2270 if (extra_rows == bs) extra_rows = 0; 2271 else Mbs++; 2272 if (extra_rows &&!rank) { 2273 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2274 } 2275 2276 /* determine ownership of all rows */ 2277 mbs = Mbs/size + ((Mbs % size) > rank); 2278 m = mbs * bs; 2279 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2280 browners = rowners + size + 1; 2281 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2282 rowners[0] = 0; 2283 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2284 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2285 rstart = rowners[rank]; 2286 rend = rowners[rank+1]; 2287 2288 /* distribute row lengths to all processors */ 2289 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens); 2290 if (!rank) { 2291 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths); 2292 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2293 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2294 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2295 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2296 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2297 PetscFree(sndcounts); 2298 } else { 2299 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2300 } 2301 2302 if (!rank) { 2303 /* calculate the number of nonzeros on each processor */ 2304 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2305 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2306 for ( i=0; i<size; i++ ) { 2307 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2308 procsnz[i] += rowlengths[j]; 2309 } 2310 } 2311 PetscFree(rowlengths); 2312 2313 /* determine max buffer needed and allocate it */ 2314 maxnz = 0; 2315 for ( i=0; i<size; i++ ) { 2316 maxnz = PetscMax(maxnz,procsnz[i]); 2317 } 2318 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2319 2320 /* read in my part of the matrix column indices */ 2321 nz = procsnz[0]; 2322 ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2323 mycols = ibuf; 2324 if (size == 1) nz -= extra_rows; 2325 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2326 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2327 2328 /* read in every ones (except the last) and ship off */ 2329 for ( i=1; i<size-1; i++ ) { 2330 nz = procsnz[i]; 2331 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2332 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2333 } 2334 /* read in the stuff for the last proc */ 2335 if ( size != 1 ) { 2336 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2337 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2338 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2339 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2340 } 2341 PetscFree(cols); 2342 } else { 2343 /* determine buffer space needed for message */ 2344 nz = 0; 2345 for ( i=0; i<m; i++ ) { 2346 nz += locrowlens[i]; 2347 } 2348 ibuf = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2349 mycols = ibuf; 2350 /* receive message of column indices*/ 2351 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2352 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2353 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2354 } 2355 2356 /* loop over local rows, determining number of off diagonal entries */ 2357 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens); 2358 odlens = dlens + (rend-rstart); 2359 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask); 2360 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2361 masked1 = mask + Mbs; 2362 masked2 = masked1 + Mbs; 2363 rowcount = 0; nzcount = 0; 2364 for ( i=0; i<mbs; i++ ) { 2365 dcount = 0; 2366 odcount = 0; 2367 for ( j=0; j<bs; j++ ) { 2368 kmax = locrowlens[rowcount]; 2369 for ( k=0; k<kmax; k++ ) { 2370 tmp = mycols[nzcount++]/bs; 2371 if (!mask[tmp]) { 2372 mask[tmp] = 1; 2373 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2374 else masked1[dcount++] = tmp; 2375 } 2376 } 2377 rowcount++; 2378 } 2379 2380 dlens[i] = dcount; 2381 odlens[i] = odcount; 2382 2383 /* zero out the mask elements we set */ 2384 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2385 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2386 } 2387 2388 /* create our matrix */ 2389 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2390 A = *newmat; 2391 MatSetOption(A,MAT_COLUMNS_SORTED); 2392 2393 if (!rank) { 2394 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf); 2395 /* read in my part of the matrix numerical values */ 2396 nz = procsnz[0]; 2397 vals = buf; 2398 mycols = ibuf; 2399 if (size == 1) nz -= extra_rows; 2400 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2401 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2402 2403 /* insert into matrix */ 2404 jj = rstart*bs; 2405 for ( i=0; i<m; i++ ) { 2406 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2407 mycols += locrowlens[i]; 2408 vals += locrowlens[i]; 2409 jj++; 2410 } 2411 /* read in other processors (except the last one) and ship out */ 2412 for ( i=1; i<size-1; i++ ) { 2413 nz = procsnz[i]; 2414 vals = buf; 2415 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2416 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2417 } 2418 /* the last proc */ 2419 if ( size != 1 ){ 2420 nz = procsnz[i] - extra_rows; 2421 vals = buf; 2422 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2423 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2424 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2425 } 2426 PetscFree(procsnz); 2427 } else { 2428 /* receive numeric values */ 2429 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf); 2430 2431 /* receive message of values*/ 2432 vals = buf; 2433 mycols = ibuf; 2434 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2435 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2436 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2437 2438 /* insert into matrix */ 2439 jj = rstart*bs; 2440 for ( i=0; i<m; i++ ) { 2441 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2442 mycols += locrowlens[i]; 2443 vals += locrowlens[i]; 2444 jj++; 2445 } 2446 } 2447 PetscFree(locrowlens); 2448 PetscFree(buf); 2449 PetscFree(ibuf); 2450 PetscFree(rowners); 2451 PetscFree(dlens); 2452 PetscFree(mask); 2453 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2454 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2455 PetscFunctionReturn(0); 2456 } 2457 2458 2459 2460 #undef __FUNC__ 2461 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2462 /*@ 2463 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2464 2465 Input Parameters: 2466 . mat - the matrix 2467 . fact - factor 2468 2469 Collective on Mat 2470 2471 Level: advanced 2472 2473 Notes: 2474 This can also be set by the command line option: -mat_use_hash_table fact 2475 2476 .keywords: matrix, hashtable, factor, HT 2477 2478 .seealso: MatSetOption() 2479 @*/ 2480 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2481 { 2482 Mat_MPIBAIJ *baij; 2483 2484 PetscFunctionBegin; 2485 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2486 if (mat->type != MATMPIBAIJ) { 2487 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2488 } 2489 baij = (Mat_MPIBAIJ*) mat->data; 2490 baij->ht_fact = fact; 2491 PetscFunctionReturn(0); 2492 } 2493