1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.174 1999/06/30 23:51:57 balay Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 EXTERN_C_BEGIN 20 #undef __FUNC__ 21 #define __FUNC__ "MatStoreValues_MPIBAIJ" 22 int MatStoreValues_MPIBAIJ(Mat mat) 23 { 24 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 25 int ierr; 26 27 PetscFunctionBegin; 28 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 29 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 EXTERN_C_END 33 34 EXTERN_C_BEGIN 35 #undef __FUNC__ 36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 37 int MatRetrieveValues_MPIBAIJ(Mat mat) 38 { 39 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 40 int ierr; 41 42 PetscFunctionBegin; 43 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 44 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 45 PetscFunctionReturn(0); 46 } 47 EXTERN_C_END 48 49 /* 50 Local utility routine that creates a mapping from the global column 51 number to the local number in the off-diagonal part of the local 52 storage of the matrix. This is done in a non scable way since the 53 length of colmap equals the global matrix length. 54 */ 55 #undef __FUNC__ 56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 57 static int CreateColmap_MPIBAIJ_Private(Mat mat) 58 { 59 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 60 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 61 int nbs = B->nbs,i,bs=B->bs,ierr; 62 63 PetscFunctionBegin; 64 #if defined (PETSC_USE_CTABLE) 65 ierr = TableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 66 for ( i=0; i<nbs; i++ ){ 67 ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 68 } 69 #else 70 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 71 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 72 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 73 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 74 #endif 75 PetscFunctionReturn(0); 76 } 77 78 #define CHUNKSIZE 10 79 80 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 81 { \ 82 \ 83 brow = row/bs; \ 84 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 85 rmax = aimax[brow]; nrow = ailen[brow]; \ 86 bcol = col/bs; \ 87 ridx = row % bs; cidx = col % bs; \ 88 low = 0; high = nrow; \ 89 while (high-low > 3) { \ 90 t = (low+high)/2; \ 91 if (rp[t] > bcol) high = t; \ 92 else low = t; \ 93 } \ 94 for ( _i=low; _i<high; _i++ ) { \ 95 if (rp[_i] > bcol) break; \ 96 if (rp[_i] == bcol) { \ 97 bap = ap + bs2*_i + bs*cidx + ridx; \ 98 if (addv == ADD_VALUES) *bap += value; \ 99 else *bap = value; \ 100 goto a_noinsert; \ 101 } \ 102 } \ 103 if (a->nonew == 1) goto a_noinsert; \ 104 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 105 if (nrow >= rmax) { \ 106 /* there is no extra room in row, therefore enlarge */ \ 107 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 108 Scalar *new_a; \ 109 \ 110 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 111 \ 112 /* malloc new storage space */ \ 113 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 114 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 115 new_j = (int *) (new_a + bs2*new_nz); \ 116 new_i = new_j + new_nz; \ 117 \ 118 /* copy over old data into new slots */ \ 119 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 120 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 121 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 122 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 123 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 124 len*sizeof(int));CHKERRQ(ierr); \ 125 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 126 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 127 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 128 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 129 /* free up old matrix storage */ \ 130 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 131 if (!a->singlemalloc) { \ 132 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 133 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 134 } \ 135 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 136 a->singlemalloc = 1; \ 137 \ 138 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 139 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 140 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 141 a->maxnz += bs2*CHUNKSIZE; \ 142 a->reallocs++; \ 143 a->nz++; \ 144 } \ 145 N = nrow++ - 1; \ 146 /* shift up all the later entries in this row */ \ 147 for ( ii=N; ii>=_i; ii-- ) { \ 148 rp[ii+1] = rp[ii]; \ 149 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 150 } \ 151 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); } \ 152 rp[_i] = bcol; \ 153 ap[bs2*_i + bs*cidx + ridx] = value; \ 154 a_noinsert:; \ 155 ailen[brow] = nrow; \ 156 } 157 158 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 159 { \ 160 \ 161 brow = row/bs; \ 162 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 163 rmax = bimax[brow]; nrow = bilen[brow]; \ 164 bcol = col/bs; \ 165 ridx = row % bs; cidx = col % bs; \ 166 low = 0; high = nrow; \ 167 while (high-low > 3) { \ 168 t = (low+high)/2; \ 169 if (rp[t] > bcol) high = t; \ 170 else low = t; \ 171 } \ 172 for ( _i=low; _i<high; _i++ ) { \ 173 if (rp[_i] > bcol) break; \ 174 if (rp[_i] == bcol) { \ 175 bap = ap + bs2*_i + bs*cidx + ridx; \ 176 if (addv == ADD_VALUES) *bap += value; \ 177 else *bap = value; \ 178 goto b_noinsert; \ 179 } \ 180 } \ 181 if (b->nonew == 1) goto b_noinsert; \ 182 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 183 if (nrow >= rmax) { \ 184 /* there is no extra room in row, therefore enlarge */ \ 185 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 186 Scalar *new_a; \ 187 \ 188 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 189 \ 190 /* malloc new storage space */ \ 191 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 192 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 193 new_j = (int *) (new_a + bs2*new_nz); \ 194 new_i = new_j + new_nz; \ 195 \ 196 /* copy over old data into new slots */ \ 197 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 198 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 199 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 200 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 201 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 202 len*sizeof(int));CHKERRQ(ierr); \ 203 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 204 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 205 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 206 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 207 /* free up old matrix storage */ \ 208 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 209 if (!b->singlemalloc) { \ 210 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 211 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 212 } \ 213 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 214 b->singlemalloc = 1; \ 215 \ 216 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 217 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 218 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 219 b->maxnz += bs2*CHUNKSIZE; \ 220 b->reallocs++; \ 221 b->nz++; \ 222 } \ 223 N = nrow++ - 1; \ 224 /* shift up all the later entries in this row */ \ 225 for ( ii=N; ii>=_i; ii-- ) { \ 226 rp[ii+1] = rp[ii]; \ 227 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 228 } \ 229 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);} \ 230 rp[_i] = bcol; \ 231 ap[bs2*_i + bs*cidx + ridx] = value; \ 232 b_noinsert:; \ 233 bilen[brow] = nrow; \ 234 } 235 236 #undef __FUNC__ 237 #define __FUNC__ "MatSetValues_MPIBAIJ" 238 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 239 { 240 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 241 Scalar value; 242 int ierr,i,j,row,col; 243 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 244 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 245 int cend_orig=baij->cend_bs,bs=baij->bs; 246 247 /* Some Variables required in the macro */ 248 Mat A = baij->A; 249 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 250 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 251 Scalar *aa=a->a; 252 253 Mat B = baij->B; 254 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 255 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 256 Scalar *ba=b->a; 257 258 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 259 int low,high,t,ridx,cidx,bs2=a->bs2; 260 Scalar *ap,*bap; 261 262 PetscFunctionBegin; 263 for ( i=0; i<m; i++ ) { 264 if (im[i] < 0) continue; 265 #if defined(PETSC_USE_BOPT_g) 266 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 267 #endif 268 if (im[i] >= rstart_orig && im[i] < rend_orig) { 269 row = im[i] - rstart_orig; 270 for ( j=0; j<n; j++ ) { 271 if (in[j] >= cstart_orig && in[j] < cend_orig){ 272 col = in[j] - cstart_orig; 273 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 274 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 275 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 276 } else if (in[j] < 0) continue; 277 #if defined(PETSC_USE_BOPT_g) 278 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 279 #endif 280 else { 281 if (mat->was_assembled) { 282 if (!baij->colmap) { 283 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 284 } 285 #if defined (PETSC_USE_CTABLE) 286 ierr = TableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr); 287 col = col - 1 + in[j]%bs; 288 #else 289 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 290 #endif 291 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 292 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 293 col = in[j]; 294 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 295 B = baij->B; 296 b = (Mat_SeqBAIJ *) (B)->data; 297 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 298 ba=b->a; 299 } 300 } else col = in[j]; 301 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 302 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 303 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 304 } 305 } 306 } else { 307 if ( !baij->donotstash) { 308 if (roworiented) { 309 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 310 } else { 311 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 312 } 313 } 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNC__ 320 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 321 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 322 { 323 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 324 Scalar *value,*barray=baij->barray; 325 int ierr,i,j,ii,jj,row,col; 326 int roworiented = baij->roworiented,rstart=baij->rstart ; 327 int rend=baij->rend,cstart=baij->cstart,stepval; 328 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 329 330 if(!barray) { 331 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray); 332 } 333 334 if (roworiented) { 335 stepval = (n-1)*bs; 336 } else { 337 stepval = (m-1)*bs; 338 } 339 for ( i=0; i<m; i++ ) { 340 if (im[i] < 0) continue; 341 #if defined(PETSC_USE_BOPT_g) 342 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 343 #endif 344 if (im[i] >= rstart && im[i] < rend) { 345 row = im[i] - rstart; 346 for ( j=0; j<n; j++ ) { 347 /* If NumCol = 1 then a copy is not required */ 348 if ((roworiented) && (n == 1)) { 349 barray = v + i*bs2; 350 } else if((!roworiented) && (m == 1)) { 351 barray = v + j*bs2; 352 } else { /* Here a copy is required */ 353 if (roworiented) { 354 value = v + i*(stepval+bs)*bs + j*bs; 355 } else { 356 value = v + j*(stepval+bs)*bs + i*bs; 357 } 358 for ( ii=0; ii<bs; ii++,value+=stepval ) { 359 for (jj=0; jj<bs; jj++ ) { 360 *barray++ = *value++; 361 } 362 } 363 barray -=bs2; 364 } 365 366 if (in[j] >= cstart && in[j] < cend){ 367 col = in[j] - cstart; 368 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 369 } 370 else if (in[j] < 0) continue; 371 #if defined(PETSC_USE_BOPT_g) 372 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 373 #endif 374 else { 375 if (mat->was_assembled) { 376 if (!baij->colmap) { 377 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 378 } 379 380 #if defined(PETSC_USE_BOPT_g) 381 #if defined (PETSC_USE_CTABLE) 382 { int data; 383 ierr = TableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 384 if((data - 1) % bs) 385 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 386 } 387 #else 388 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 389 #endif 390 #endif 391 #if defined (PETSC_USE_CTABLE) 392 ierr = TableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 393 col = (col - 1)/bs; 394 #else 395 col = (baij->colmap[in[j]] - 1)/bs; 396 #endif 397 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 398 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 399 col = in[j]; 400 } 401 } 402 else col = in[j]; 403 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 404 } 405 } 406 } else { 407 if (!baij->donotstash) { 408 if (roworiented) { 409 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 410 } else { 411 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 412 } 413 } 414 } 415 } 416 PetscFunctionReturn(0); 417 } 418 #define HASH_KEY 0.6180339887 419 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 420 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 421 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 422 #undef __FUNC__ 423 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 424 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 425 { 426 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 427 int ierr,i,j,row,col; 428 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 429 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 430 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 431 double tmp; 432 Scalar ** HD = baij->hd,value; 433 #if defined(PETSC_USE_BOPT_g) 434 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 435 #endif 436 437 PetscFunctionBegin; 438 439 for ( i=0; i<m; i++ ) { 440 #if defined(PETSC_USE_BOPT_g) 441 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 442 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 443 #endif 444 row = im[i]; 445 if (row >= rstart_orig && row < rend_orig) { 446 for ( j=0; j<n; j++ ) { 447 col = in[j]; 448 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 449 /* Look up into the Hash Table */ 450 key = (row/bs)*Nbs+(col/bs)+1; 451 h1 = HASH(size,key,tmp); 452 453 454 idx = h1; 455 #if defined(PETSC_USE_BOPT_g) 456 insert_ct++; 457 total_ct++; 458 if (HT[idx] != key) { 459 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 460 if (idx == size) { 461 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 462 if (idx == h1) { 463 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 464 } 465 } 466 } 467 #else 468 if (HT[idx] != key) { 469 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 470 if (idx == size) { 471 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 472 if (idx == h1) { 473 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 474 } 475 } 476 } 477 #endif 478 /* A HASH table entry is found, so insert the values at the correct address */ 479 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 480 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 481 } 482 } else { 483 if (!baij->donotstash) { 484 if (roworiented) { 485 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 486 } else { 487 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 488 } 489 } 490 } 491 } 492 #if defined(PETSC_USE_BOPT_g) 493 baij->ht_total_ct = total_ct; 494 baij->ht_insert_ct = insert_ct; 495 #endif 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNC__ 500 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 501 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 502 { 503 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 504 int ierr,i,j,ii,jj,row,col; 505 int roworiented = baij->roworiented,rstart=baij->rstart ; 506 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 507 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 508 double tmp; 509 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 510 #if defined(PETSC_USE_BOPT_g) 511 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 512 #endif 513 514 PetscFunctionBegin; 515 516 if (roworiented) { 517 stepval = (n-1)*bs; 518 } else { 519 stepval = (m-1)*bs; 520 } 521 for ( i=0; i<m; i++ ) { 522 #if defined(PETSC_USE_BOPT_g) 523 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 524 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 525 #endif 526 row = im[i]; 527 v_t = v + i*bs2; 528 if (row >= rstart && row < rend) { 529 for ( j=0; j<n; j++ ) { 530 col = in[j]; 531 532 /* Look up into the Hash Table */ 533 key = row*Nbs+col+1; 534 h1 = HASH(size,key,tmp); 535 536 idx = h1; 537 #if defined(PETSC_USE_BOPT_g) 538 total_ct++; 539 insert_ct++; 540 if (HT[idx] != key) { 541 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 542 if (idx == size) { 543 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 544 if (idx == h1) { 545 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 546 } 547 } 548 } 549 #else 550 if (HT[idx] != key) { 551 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 552 if (idx == size) { 553 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 554 if (idx == h1) { 555 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 556 } 557 } 558 } 559 #endif 560 baij_a = HD[idx]; 561 if (roworiented) { 562 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 563 /* value = v + (i*(stepval+bs)+j)*bs; */ 564 value = v_t; 565 v_t += bs; 566 if (addv == ADD_VALUES) { 567 for ( ii=0; ii<bs; ii++,value+=stepval) { 568 for ( jj=ii; jj<bs2; jj+=bs ) { 569 baij_a[jj] += *value++; 570 } 571 } 572 } else { 573 for ( ii=0; ii<bs; ii++,value+=stepval) { 574 for ( jj=ii; jj<bs2; jj+=bs ) { 575 baij_a[jj] = *value++; 576 } 577 } 578 } 579 } else { 580 value = v + j*(stepval+bs)*bs + i*bs; 581 if (addv == ADD_VALUES) { 582 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 583 for ( jj=0; jj<bs; jj++ ) { 584 baij_a[jj] += *value++; 585 } 586 } 587 } else { 588 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 589 for ( jj=0; jj<bs; jj++ ) { 590 baij_a[jj] = *value++; 591 } 592 } 593 } 594 } 595 } 596 } else { 597 if (!baij->donotstash) { 598 if (roworiented) { 599 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 600 } else { 601 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 602 } 603 } 604 } 605 } 606 #if defined(PETSC_USE_BOPT_g) 607 baij->ht_total_ct = total_ct; 608 baij->ht_insert_ct = insert_ct; 609 #endif 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNC__ 614 #define __FUNC__ "MatGetValues_MPIBAIJ" 615 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 616 { 617 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 618 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 619 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 620 621 PetscFunctionBegin; 622 for ( i=0; i<m; i++ ) { 623 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 624 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 625 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 626 row = idxm[i] - bsrstart; 627 for ( j=0; j<n; j++ ) { 628 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 629 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 630 if (idxn[j] >= bscstart && idxn[j] < bscend){ 631 col = idxn[j] - bscstart; 632 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } else { 634 if (!baij->colmap) { 635 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 636 } 637 #if defined (PETSC_USE_CTABLE) 638 ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 639 data --; 640 #else 641 data = baij->colmap[idxn[j]/bs]-1; 642 #endif 643 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 644 else { 645 col = data + idxn[j]%bs; 646 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } 648 } 649 } 650 } else { 651 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 652 } 653 } 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNC__ 658 #define __FUNC__ "MatNorm_MPIBAIJ" 659 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 660 { 661 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 662 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 663 int ierr, i,bs2=baij->bs2; 664 double sum = 0.0; 665 Scalar *v; 666 667 PetscFunctionBegin; 668 if (baij->size == 1) { 669 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 670 } else { 671 if (type == NORM_FROBENIUS) { 672 v = amat->a; 673 for (i=0; i<amat->nz*bs2; i++ ) { 674 #if defined(PETSC_USE_COMPLEX) 675 sum += PetscReal(PetscConj(*v)*(*v)); v++; 676 #else 677 sum += (*v)*(*v); v++; 678 #endif 679 } 680 v = bmat->a; 681 for (i=0; i<bmat->nz*bs2; i++ ) { 682 #if defined(PETSC_USE_COMPLEX) 683 sum += PetscReal(PetscConj(*v)*(*v)); v++; 684 #else 685 sum += (*v)*(*v); v++; 686 #endif 687 } 688 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 689 *norm = sqrt(*norm); 690 } else { 691 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 692 } 693 } 694 PetscFunctionReturn(0); 695 } 696 697 698 /* 699 Creates the hash table, and sets the table 700 This table is created only once. 701 If new entried need to be added to the matrix 702 then the hash table has to be destroyed and 703 recreated. 704 */ 705 #undef __FUNC__ 706 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 707 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 708 { 709 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 710 Mat A = baij->A, B=baij->B; 711 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 712 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 713 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 714 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 715 int *HT,key; 716 Scalar **HD; 717 double tmp; 718 #if defined(PETSC_USE_BOPT_g) 719 int ct=0,max=0; 720 #endif 721 722 PetscFunctionBegin; 723 baij->ht_size=(int)(factor*nz); 724 size = baij->ht_size; 725 726 if (baij->ht) { 727 PetscFunctionReturn(0); 728 } 729 730 /* Allocate Memory for Hash Table */ 731 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd); 732 baij->ht = (int*)(baij->hd + size); 733 HD = baij->hd; 734 HT = baij->ht; 735 736 737 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 738 739 740 /* Loop Over A */ 741 for ( i=0; i<a->mbs; i++ ) { 742 for ( j=ai[i]; j<ai[i+1]; j++ ) { 743 row = i+rstart; 744 col = aj[j]+cstart; 745 746 key = row*Nbs + col + 1; 747 h1 = HASH(size,key,tmp); 748 for ( k=0; k<size; k++ ){ 749 if (HT[(h1+k)%size] == 0.0) { 750 HT[(h1+k)%size] = key; 751 HD[(h1+k)%size] = a->a + j*bs2; 752 break; 753 #if defined(PETSC_USE_BOPT_g) 754 } else { 755 ct++; 756 #endif 757 } 758 } 759 #if defined(PETSC_USE_BOPT_g) 760 if (k> max) max = k; 761 #endif 762 } 763 } 764 /* Loop Over B */ 765 for ( i=0; i<b->mbs; i++ ) { 766 for ( j=bi[i]; j<bi[i+1]; j++ ) { 767 row = i+rstart; 768 col = garray[bj[j]]; 769 key = row*Nbs + col + 1; 770 h1 = HASH(size,key,tmp); 771 for ( k=0; k<size; k++ ){ 772 if (HT[(h1+k)%size] == 0.0) { 773 HT[(h1+k)%size] = key; 774 HD[(h1+k)%size] = b->a + j*bs2; 775 break; 776 #if defined(PETSC_USE_BOPT_g) 777 } else { 778 ct++; 779 #endif 780 } 781 } 782 #if defined(PETSC_USE_BOPT_g) 783 if (k> max) max = k; 784 #endif 785 } 786 } 787 788 /* Print Summary */ 789 #if defined(PETSC_USE_BOPT_g) 790 for ( i=0,j=0; i<size; i++) 791 if (HT[i]) {j++;} 792 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 793 (j== 0)? 0.0:((double)(ct+j))/j,max); 794 #endif 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNC__ 799 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 800 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 801 { 802 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 803 int ierr,nstash,reallocs; 804 InsertMode addv; 805 806 PetscFunctionBegin; 807 if (baij->donotstash) { 808 PetscFunctionReturn(0); 809 } 810 811 /* make sure all processors are either in INSERTMODE or ADDMODE */ 812 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 813 if (addv == (ADD_VALUES|INSERT_VALUES)) { 814 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 815 } 816 mat->insertmode = addv; /* in case this processor had no cache */ 817 818 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 819 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 820 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 821 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 822 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 823 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNC__ 828 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 829 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 830 { 831 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 832 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 833 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 834 int *row,*col,other_disassembled,r1,r2,r3; 835 Scalar *val; 836 InsertMode addv = mat->insertmode; 837 838 PetscFunctionBegin; 839 if (!baij->donotstash) { 840 while (1) { 841 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 842 if (!flg) break; 843 844 for ( i=0; i<n; ) { 845 /* Now identify the consecutive vals belonging to the same row */ 846 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 847 if (j < n) ncols = j-i; 848 else ncols = n-i; 849 /* Now assemble all these values with a single function call */ 850 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 851 i = j; 852 } 853 } 854 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 855 /* Now process the block-stash. Since the values are stashed column-oriented, 856 set the roworiented flag to column oriented, and after MatSetValues() 857 restore the original flags */ 858 r1 = baij->roworiented; 859 r2 = a->roworiented; 860 r3 = b->roworiented; 861 baij->roworiented = 0; 862 a->roworiented = 0; 863 b->roworiented = 0; 864 while (1) { 865 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 866 if (!flg) break; 867 868 for ( i=0; i<n; ) { 869 /* Now identify the consecutive vals belonging to the same row */ 870 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 871 if (j < n) ncols = j-i; 872 else ncols = n-i; 873 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 874 i = j; 875 } 876 } 877 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 878 baij->roworiented = r1; 879 a->roworiented = r2; 880 b->roworiented = r3; 881 } 882 883 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 884 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 885 886 /* determine if any processor has disassembled, if so we must 887 also disassemble ourselfs, in order that we may reassemble. */ 888 /* 889 if nonzero structure of submatrix B cannot change then we know that 890 no processor disassembled thus we can skip this stuff 891 */ 892 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 893 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 894 if (mat->was_assembled && !other_disassembled) { 895 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 896 } 897 } 898 899 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 900 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 901 } 902 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 903 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 904 905 #if defined(PETSC_USE_BOPT_g) 906 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 907 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 908 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 909 baij->ht_total_ct = 0; 910 baij->ht_insert_ct = 0; 911 } 912 #endif 913 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 914 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 915 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 916 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 917 } 918 919 if (baij->rowvalues) { 920 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 921 baij->rowvalues = 0; 922 } 923 PetscFunctionReturn(0); 924 } 925 926 /* 927 #undef __FUNC__ 928 #define __FUNC__ "MatView_MPIBAIJ_Binary" 929 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 930 { 931 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 932 int ierr; 933 934 PetscFunctionBegin; 935 if (baij->size == 1) { 936 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 937 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 938 PetscFunctionReturn(0); 939 } 940 */ 941 942 #undef __FUNC__ 943 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 944 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 945 { 946 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 947 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 948 FILE *fd; 949 ViewerType vtype; 950 951 PetscFunctionBegin; 952 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 953 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 954 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 955 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 956 MatInfo info; 957 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 958 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 959 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 960 ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr); 961 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 962 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 963 baij->bs,(int)info.memory); 964 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 965 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 966 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 967 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 968 fflush(fd); 969 ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr); 970 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 973 ierr = PetscPrintf(mat->comm," block size is %d\n",bs);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 } 977 978 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 979 Draw draw; 980 PetscTruth isnull; 981 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 982 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 983 } 984 985 if (size == 1) { 986 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 987 } else { 988 /* assemble the entire matrix onto first processor. */ 989 Mat A; 990 Mat_SeqBAIJ *Aloc; 991 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 992 int mbs = baij->mbs; 993 Scalar *a; 994 995 if (!rank) { 996 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 997 } else { 998 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 999 } 1000 PLogObjectParent(mat,A); 1001 1002 /* copy over the A part */ 1003 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1004 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1005 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1006 1007 for ( i=0; i<mbs; i++ ) { 1008 rvals[0] = bs*(baij->rstart + i); 1009 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1010 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1011 col = (baij->cstart+aj[j])*bs; 1012 for (k=0; k<bs; k++ ) { 1013 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1014 col++; a += bs; 1015 } 1016 } 1017 } 1018 /* copy over the B part */ 1019 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1020 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1021 for ( i=0; i<mbs; i++ ) { 1022 rvals[0] = bs*(baij->rstart + i); 1023 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1024 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1025 col = baij->garray[aj[j]]*bs; 1026 for (k=0; k<bs; k++ ) { 1027 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1028 col++; a += bs; 1029 } 1030 } 1031 } 1032 ierr = PetscFree(rvals);CHKERRQ(ierr); 1033 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1034 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1035 /* 1036 Everyone has to call to draw the matrix since the graphics waits are 1037 synchronized across all processors that share the Draw object 1038 */ 1039 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1040 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 1041 } 1042 ierr = MatDestroy(A);CHKERRQ(ierr); 1043 } 1044 PetscFunctionReturn(0); 1045 } 1046 1047 1048 1049 #undef __FUNC__ 1050 #define __FUNC__ "MatView_MPIBAIJ" 1051 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1052 { 1053 int ierr; 1054 ViewerType vtype; 1055 1056 PetscFunctionBegin; 1057 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 1058 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1059 PetscTypeCompare(vtype,SOCKET_VIEWER)|| PetscTypeCompare(vtype,BINARY_VIEWER)) { 1060 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1061 /* 1062 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1063 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1064 */ 1065 } else { 1066 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1067 } 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNC__ 1072 #define __FUNC__ "MatDestroy_MPIBAIJ" 1073 int MatDestroy_MPIBAIJ(Mat mat) 1074 { 1075 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1076 int ierr; 1077 1078 PetscFunctionBegin; 1079 1080 if (mat->mapping) { 1081 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1082 } 1083 if (mat->bmapping) { 1084 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1085 } 1086 if (mat->rmap) { 1087 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1088 } 1089 if (mat->cmap) { 1090 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1091 } 1092 #if defined(PETSC_USE_LOG) 1093 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1094 #endif 1095 1096 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1097 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1098 1099 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1100 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1101 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1102 #if defined (PETSC_USE_CTABLE) 1103 if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);} 1104 #else 1105 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1106 #endif 1107 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1108 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1109 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1110 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1111 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1112 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1113 ierr = PetscFree(baij);CHKERRQ(ierr); 1114 PLogObjectDestroy(mat); 1115 PetscHeaderDestroy(mat); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNC__ 1120 #define __FUNC__ "MatMult_MPIBAIJ" 1121 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1122 { 1123 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1124 int ierr, nt; 1125 1126 PetscFunctionBegin; 1127 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1128 if (nt != a->n) { 1129 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1130 } 1131 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1132 if (nt != a->m) { 1133 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1134 } 1135 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1136 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1137 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1138 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1139 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNC__ 1144 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1145 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1146 { 1147 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1148 int ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1152 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1153 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1154 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1155 PetscFunctionReturn(0); 1156 } 1157 1158 #undef __FUNC__ 1159 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1160 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1161 { 1162 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1163 int ierr; 1164 1165 PetscFunctionBegin; 1166 /* do nondiagonal part */ 1167 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1170 /* do local part */ 1171 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 1172 /* receive remote parts: note this assumes the values are not actually */ 1173 /* inserted in yy until the next line, which is true for my implementation*/ 1174 /* but is not perhaps always true. */ 1175 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNC__ 1180 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1181 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1182 { 1183 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1184 int ierr; 1185 1186 PetscFunctionBegin; 1187 /* do nondiagonal part */ 1188 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1189 /* send it on its way */ 1190 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1191 /* do local part */ 1192 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1193 /* receive remote parts: note this assumes the values are not actually */ 1194 /* inserted in yy until the next line, which is true for my implementation*/ 1195 /* but is not perhaps always true. */ 1196 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 /* 1201 This only works correctly for square matrices where the subblock A->A is the 1202 diagonal block 1203 */ 1204 #undef __FUNC__ 1205 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1206 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1207 { 1208 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1209 int ierr; 1210 1211 PetscFunctionBegin; 1212 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1213 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNC__ 1218 #define __FUNC__ "MatScale_MPIBAIJ" 1219 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1220 { 1221 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1222 int ierr; 1223 1224 PetscFunctionBegin; 1225 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1226 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNC__ 1231 #define __FUNC__ "MatGetSize_MPIBAIJ" 1232 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1233 { 1234 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1235 1236 PetscFunctionBegin; 1237 if (m) *m = mat->M; 1238 if (n) *n = mat->N; 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNC__ 1243 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1244 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1245 { 1246 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1247 1248 PetscFunctionBegin; 1249 *m = mat->m; *n = mat->n; 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNC__ 1254 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1255 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1256 { 1257 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1258 1259 PetscFunctionBegin; 1260 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNC__ 1265 #define __FUNC__ "MatGetRow_MPIBAIJ" 1266 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1267 { 1268 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1269 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1270 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1271 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1272 int *cmap, *idx_p,cstart = mat->cstart; 1273 1274 PetscFunctionBegin; 1275 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1276 mat->getrowactive = PETSC_TRUE; 1277 1278 if (!mat->rowvalues && (idx || v)) { 1279 /* 1280 allocate enough space to hold information from the longest row. 1281 */ 1282 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1283 int max = 1,mbs = mat->mbs,tmp; 1284 for ( i=0; i<mbs; i++ ) { 1285 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1286 if (max < tmp) { max = tmp; } 1287 } 1288 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1289 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1290 } 1291 1292 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1293 lrow = row - brstart; 1294 1295 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1296 if (!v) {pvA = 0; pvB = 0;} 1297 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1298 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1299 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1300 nztot = nzA + nzB; 1301 1302 cmap = mat->garray; 1303 if (v || idx) { 1304 if (nztot) { 1305 /* Sort by increasing column numbers, assuming A and B already sorted */ 1306 int imark = -1; 1307 if (v) { 1308 *v = v_p = mat->rowvalues; 1309 for ( i=0; i<nzB; i++ ) { 1310 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1311 else break; 1312 } 1313 imark = i; 1314 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1315 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1316 } 1317 if (idx) { 1318 *idx = idx_p = mat->rowindices; 1319 if (imark > -1) { 1320 for ( i=0; i<imark; i++ ) { 1321 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1322 } 1323 } else { 1324 for ( i=0; i<nzB; i++ ) { 1325 if (cmap[cworkB[i]/bs] < cstart) 1326 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1327 else break; 1328 } 1329 imark = i; 1330 } 1331 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1332 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1333 } 1334 } else { 1335 if (idx) *idx = 0; 1336 if (v) *v = 0; 1337 } 1338 } 1339 *nz = nztot; 1340 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1341 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNC__ 1346 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1347 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1348 { 1349 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1350 1351 PetscFunctionBegin; 1352 if (baij->getrowactive == PETSC_FALSE) { 1353 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1354 } 1355 baij->getrowactive = PETSC_FALSE; 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNC__ 1360 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1361 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1362 { 1363 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1364 1365 PetscFunctionBegin; 1366 *bs = baij->bs; 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNC__ 1371 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1372 int MatZeroEntries_MPIBAIJ(Mat A) 1373 { 1374 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1375 int ierr; 1376 1377 PetscFunctionBegin; 1378 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1379 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNC__ 1384 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1385 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1386 { 1387 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1388 Mat A = a->A, B = a->B; 1389 int ierr; 1390 double isend[5], irecv[5]; 1391 1392 PetscFunctionBegin; 1393 info->block_size = (double)a->bs; 1394 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1395 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1396 isend[3] = info->memory; isend[4] = info->mallocs; 1397 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1398 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1399 isend[3] += info->memory; isend[4] += info->mallocs; 1400 if (flag == MAT_LOCAL) { 1401 info->nz_used = isend[0]; 1402 info->nz_allocated = isend[1]; 1403 info->nz_unneeded = isend[2]; 1404 info->memory = isend[3]; 1405 info->mallocs = isend[4]; 1406 } else if (flag == MAT_GLOBAL_MAX) { 1407 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1408 info->nz_used = irecv[0]; 1409 info->nz_allocated = irecv[1]; 1410 info->nz_unneeded = irecv[2]; 1411 info->memory = irecv[3]; 1412 info->mallocs = irecv[4]; 1413 } else if (flag == MAT_GLOBAL_SUM) { 1414 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1415 info->nz_used = irecv[0]; 1416 info->nz_allocated = irecv[1]; 1417 info->nz_unneeded = irecv[2]; 1418 info->memory = irecv[3]; 1419 info->mallocs = irecv[4]; 1420 } else { 1421 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1422 } 1423 info->rows_global = (double)a->M; 1424 info->columns_global = (double)a->N; 1425 info->rows_local = (double)a->m; 1426 info->columns_local = (double)a->N; 1427 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1428 info->fill_ratio_needed = 0; 1429 info->factor_mallocs = 0; 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNC__ 1434 #define __FUNC__ "MatSetOption_MPIBAIJ" 1435 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1436 { 1437 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1438 int ierr; 1439 1440 PetscFunctionBegin; 1441 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1442 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1443 op == MAT_COLUMNS_UNSORTED || 1444 op == MAT_COLUMNS_SORTED || 1445 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1446 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1447 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1448 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1449 } else if (op == MAT_ROW_ORIENTED) { 1450 a->roworiented = 1; 1451 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1452 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1453 } else if (op == MAT_ROWS_SORTED || 1454 op == MAT_ROWS_UNSORTED || 1455 op == MAT_SYMMETRIC || 1456 op == MAT_STRUCTURALLY_SYMMETRIC || 1457 op == MAT_YES_NEW_DIAGONALS || 1458 op == MAT_USE_HASH_TABLE) { 1459 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1460 } else if (op == MAT_COLUMN_ORIENTED) { 1461 a->roworiented = 0; 1462 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1463 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1464 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1465 a->donotstash = 1; 1466 } else if (op == MAT_NO_NEW_DIAGONALS) { 1467 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1468 } else if (op == MAT_USE_HASH_TABLE) { 1469 a->ht_flag = 1; 1470 } else { 1471 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1472 } 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNC__ 1477 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1478 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1479 { 1480 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1481 Mat_SeqBAIJ *Aloc; 1482 Mat B; 1483 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1484 int bs=baij->bs,mbs=baij->mbs; 1485 Scalar *a; 1486 1487 PetscFunctionBegin; 1488 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1489 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1490 CHKERRQ(ierr); 1491 1492 /* copy over the A part */ 1493 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1494 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1495 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1496 1497 for ( i=0; i<mbs; i++ ) { 1498 rvals[0] = bs*(baij->rstart + i); 1499 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1500 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1501 col = (baij->cstart+aj[j])*bs; 1502 for (k=0; k<bs; k++ ) { 1503 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1504 col++; a += bs; 1505 } 1506 } 1507 } 1508 /* copy over the B part */ 1509 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1510 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1511 for ( i=0; i<mbs; i++ ) { 1512 rvals[0] = bs*(baij->rstart + i); 1513 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1514 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1515 col = baij->garray[aj[j]]*bs; 1516 for (k=0; k<bs; k++ ) { 1517 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1518 col++; a += bs; 1519 } 1520 } 1521 } 1522 ierr = PetscFree(rvals);CHKERRQ(ierr); 1523 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1524 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1525 1526 if (matout != PETSC_NULL) { 1527 *matout = B; 1528 } else { 1529 PetscOps *Abops; 1530 MatOps Aops; 1531 1532 /* This isn't really an in-place transpose .... but free data structures from baij */ 1533 ierr = PetscFree(baij->rowners); CHKERRQ(ierr); 1534 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1535 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1536 #if defined (PETSC_USE_CTABLE) 1537 if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);} 1538 #else 1539 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1540 #endif 1541 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1542 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1543 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1544 ierr = PetscFree(baij);CHKERRQ(ierr); 1545 1546 /* 1547 This is horrible, horrible code. We need to keep the 1548 A pointers for the bops and ops but copy everything 1549 else from C. 1550 */ 1551 Abops = A->bops; 1552 Aops = A->ops; 1553 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1554 A->bops = Abops; 1555 A->ops = Aops; 1556 1557 PetscHeaderDestroy(B); 1558 } 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNC__ 1563 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1564 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1565 { 1566 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1567 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1568 int ierr,s1,s2,s3; 1569 1570 PetscFunctionBegin; 1571 if (ll) { 1572 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1573 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 1574 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1575 ierr = MatDiagonalScale(a,ll,0);CHKERRQ(ierr); 1576 ierr = MatDiagonalScale(b,ll,0);CHKERRQ(ierr); 1577 } 1578 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNC__ 1583 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1584 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1585 { 1586 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1587 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1588 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1589 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1590 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1591 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1592 MPI_Comm comm = A->comm; 1593 MPI_Request *send_waits,*recv_waits; 1594 MPI_Status recv_status,*send_status; 1595 IS istmp; 1596 1597 PetscFunctionBegin; 1598 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1599 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1600 1601 /* first count number of contributors to each processor */ 1602 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 1603 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1604 procs = nprocs + size; 1605 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1606 for ( i=0; i<N; i++ ) { 1607 idx = rows[i]; 1608 found = 0; 1609 for ( j=0; j<size; j++ ) { 1610 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1611 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1612 } 1613 } 1614 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1615 } 1616 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1617 1618 /* inform other processors of number of messages and max length*/ 1619 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 1620 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1621 nrecvs = work[rank]; 1622 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1623 nmax = work[rank]; 1624 ierr = PetscFree(work);CHKERRQ(ierr); 1625 1626 /* post receives: */ 1627 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1628 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1629 for ( i=0; i<nrecvs; i++ ) { 1630 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1631 } 1632 1633 /* do sends: 1634 1) starts[i] gives the starting index in svalues for stuff going to 1635 the ith processor 1636 */ 1637 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 1638 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1639 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 1640 starts[0] = 0; 1641 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1642 for ( i=0; i<N; i++ ) { 1643 svalues[starts[owner[i]]++] = rows[i]; 1644 } 1645 ISRestoreIndices(is,&rows); 1646 1647 starts[0] = 0; 1648 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1649 count = 0; 1650 for ( i=0; i<size; i++ ) { 1651 if (procs[i]) { 1652 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1653 } 1654 } 1655 ierr = PetscFree(starts);CHKERRQ(ierr); 1656 1657 base = owners[rank]*bs; 1658 1659 /* wait on receives */ 1660 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 1661 source = lens + nrecvs; 1662 count = nrecvs; slen = 0; 1663 while (count) { 1664 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1665 /* unpack receives into our local space */ 1666 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1667 source[imdex] = recv_status.MPI_SOURCE; 1668 lens[imdex] = n; 1669 slen += n; 1670 count--; 1671 } 1672 ierr = PetscFree(recv_waits); CHKERRQ(ierr); 1673 1674 /* move the data into the send scatter */ 1675 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 1676 count = 0; 1677 for ( i=0; i<nrecvs; i++ ) { 1678 values = rvalues + i*nmax; 1679 for ( j=0; j<lens[i]; j++ ) { 1680 lrows[count++] = values[j] - base; 1681 } 1682 } 1683 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1684 ierr = PetscFree(lens);CHKERRQ(ierr); 1685 ierr = PetscFree(owner);CHKERRQ(ierr); 1686 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1687 1688 /* actually zap the local rows */ 1689 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1690 PLogObjectParent(A,istmp); 1691 1692 /* 1693 Zero the required rows. If the "diagonal block" of the matrix 1694 is square and the user wishes to set the diagonal we use seperate 1695 code so that MatSetValues() is not called for each diagonal allocating 1696 new memory, thus calling lots of mallocs and slowing things down. 1697 1698 Contributed by: Mathew Knepley 1699 */ 1700 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1701 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 1702 if (diag && (l->A->M == l->A->N)) { 1703 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 1704 } else if (diag) { 1705 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1706 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1707 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1708 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1709 } 1710 for ( i = 0; i < slen; i++ ) { 1711 row = lrows[i] + rstart_bs; 1712 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1715 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1716 } else { 1717 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1718 } 1719 1720 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1721 ierr = PetscFree(lrows);CHKERRQ(ierr); 1722 1723 /* wait on sends */ 1724 if (nsends) { 1725 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1726 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1727 ierr = PetscFree(send_status);CHKERRQ(ierr); 1728 } 1729 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1730 ierr = PetscFree(svalues);CHKERRQ(ierr); 1731 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNC__ 1736 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1737 int MatPrintHelp_MPIBAIJ(Mat A) 1738 { 1739 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1740 MPI_Comm comm = A->comm; 1741 static int called = 0; 1742 int ierr; 1743 1744 PetscFunctionBegin; 1745 if (!a->rank) { 1746 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1747 } 1748 if (called) {PetscFunctionReturn(0);} else called = 1; 1749 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1750 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #undef __FUNC__ 1755 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1756 int MatSetUnfactored_MPIBAIJ(Mat A) 1757 { 1758 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1759 int ierr; 1760 1761 PetscFunctionBegin; 1762 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1763 PetscFunctionReturn(0); 1764 } 1765 1766 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1767 1768 #undef __FUNC__ 1769 #define __FUNC__ "MatEqual_MPIBAIJ" 1770 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag) 1771 { 1772 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data; 1773 Mat a, b, c, d; 1774 PetscTruth flg; 1775 int ierr; 1776 1777 PetscFunctionBegin; 1778 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1779 a = matA->A; b = matA->B; 1780 c = matB->A; d = matB->B; 1781 1782 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1783 if (flg == PETSC_TRUE) { 1784 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1785 } 1786 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1787 PetscFunctionReturn(0); 1788 } 1789 1790 /* -------------------------------------------------------------------*/ 1791 static struct _MatOps MatOps_Values = { 1792 MatSetValues_MPIBAIJ, 1793 MatGetRow_MPIBAIJ, 1794 MatRestoreRow_MPIBAIJ, 1795 MatMult_MPIBAIJ, 1796 MatMultAdd_MPIBAIJ, 1797 MatMultTrans_MPIBAIJ, 1798 MatMultTransAdd_MPIBAIJ, 1799 0, 1800 0, 1801 0, 1802 0, 1803 0, 1804 0, 1805 0, 1806 MatTranspose_MPIBAIJ, 1807 MatGetInfo_MPIBAIJ, 1808 MatEqual_MPIBAIJ, 1809 MatGetDiagonal_MPIBAIJ, 1810 MatDiagonalScale_MPIBAIJ, 1811 MatNorm_MPIBAIJ, 1812 MatAssemblyBegin_MPIBAIJ, 1813 MatAssemblyEnd_MPIBAIJ, 1814 0, 1815 MatSetOption_MPIBAIJ, 1816 MatZeroEntries_MPIBAIJ, 1817 MatZeroRows_MPIBAIJ, 1818 0, 1819 0, 1820 0, 1821 0, 1822 MatGetSize_MPIBAIJ, 1823 MatGetLocalSize_MPIBAIJ, 1824 MatGetOwnershipRange_MPIBAIJ, 1825 0, 1826 0, 1827 0, 1828 0, 1829 MatDuplicate_MPIBAIJ, 1830 0, 1831 0, 1832 0, 1833 0, 1834 0, 1835 MatGetSubMatrices_MPIBAIJ, 1836 MatIncreaseOverlap_MPIBAIJ, 1837 MatGetValues_MPIBAIJ, 1838 0, 1839 MatPrintHelp_MPIBAIJ, 1840 MatScale_MPIBAIJ, 1841 0, 1842 0, 1843 0, 1844 MatGetBlockSize_MPIBAIJ, 1845 0, 1846 0, 1847 0, 1848 0, 1849 0, 1850 0, 1851 MatSetUnfactored_MPIBAIJ, 1852 0, 1853 MatSetValuesBlocked_MPIBAIJ, 1854 0, 1855 0, 1856 0, 1857 MatGetMaps_Petsc}; 1858 1859 1860 EXTERN_C_BEGIN 1861 #undef __FUNC__ 1862 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1863 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1864 { 1865 PetscFunctionBegin; 1866 *a = ((Mat_MPIBAIJ *)A->data)->A; 1867 *iscopy = PETSC_FALSE; 1868 PetscFunctionReturn(0); 1869 } 1870 EXTERN_C_END 1871 1872 #undef __FUNC__ 1873 #define __FUNC__ "MatCreateMPIBAIJ" 1874 /*@C 1875 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1876 (block compressed row). For good matrix assembly performance 1877 the user should preallocate the matrix storage by setting the parameters 1878 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1879 performance can be increased by more than a factor of 50. 1880 1881 Collective on MPI_Comm 1882 1883 Input Parameters: 1884 + comm - MPI communicator 1885 . bs - size of blockk 1886 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1887 This value should be the same as the local size used in creating the 1888 y vector for the matrix-vector product y = Ax. 1889 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1890 This value should be the same as the local size used in creating the 1891 x vector for the matrix-vector product y = Ax. 1892 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1893 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1894 . d_nz - number of block nonzeros per block row in diagonal portion of local 1895 submatrix (same for all local rows) 1896 . d_nnz - array containing the number of block nonzeros in the various block rows 1897 of the in diagonal portion of the local (possibly different for each block 1898 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1899 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1900 submatrix (same for all local rows). 1901 - o_nnz - array containing the number of nonzeros in the various block rows of the 1902 off-diagonal portion of the local submatrix (possibly different for 1903 each block row) or PETSC_NULL. 1904 1905 Output Parameter: 1906 . A - the matrix 1907 1908 Options Database Keys: 1909 . -mat_no_unroll - uses code that does not unroll the loops in the 1910 block calculations (much slower) 1911 . -mat_block_size - size of the blocks to use 1912 . -mat_mpi - use the parallel matrix data structures even on one processor 1913 (defaults to using SeqBAIJ format on one processor) 1914 1915 Notes: 1916 The user MUST specify either the local or global matrix dimensions 1917 (possibly both). 1918 1919 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1920 than it must be used on all processors that share the object for that argument. 1921 1922 Storage Information: 1923 For a square global matrix we define each processor's diagonal portion 1924 to be its local rows and the corresponding columns (a square submatrix); 1925 each processor's off-diagonal portion encompasses the remainder of the 1926 local matrix (a rectangular submatrix). 1927 1928 The user can specify preallocated storage for the diagonal part of 1929 the local submatrix with either d_nz or d_nnz (not both). Set 1930 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1931 memory allocation. Likewise, specify preallocated storage for the 1932 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1933 1934 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1935 the figure below we depict these three local rows and all columns (0-11). 1936 1937 .vb 1938 0 1 2 3 4 5 6 7 8 9 10 11 1939 ------------------- 1940 row 3 | o o o d d d o o o o o o 1941 row 4 | o o o d d d o o o o o o 1942 row 5 | o o o d d d o o o o o o 1943 ------------------- 1944 .ve 1945 1946 Thus, any entries in the d locations are stored in the d (diagonal) 1947 submatrix, and any entries in the o locations are stored in the 1948 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1949 stored simply in the MATSEQBAIJ format for compressed row storage. 1950 1951 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1952 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1953 In general, for PDE problems in which most nonzeros are near the diagonal, 1954 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1955 or you will get TERRIBLE performance; see the users' manual chapter on 1956 matrices. 1957 1958 Level: intermediate 1959 1960 .keywords: matrix, block, aij, compressed row, sparse, parallel 1961 1962 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1963 @*/ 1964 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1965 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1966 { 1967 Mat B; 1968 Mat_MPIBAIJ *b; 1969 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1970 int flag1 = 0,flag2 = 0; 1971 1972 PetscFunctionBegin; 1973 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1974 1975 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1976 1977 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1978 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 1979 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1980 if (!flag1 && !flag2 && size == 1) { 1981 if (M == PETSC_DECIDE) M = m; 1982 if (N == PETSC_DECIDE) N = n; 1983 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1984 PetscFunctionReturn(0); 1985 } 1986 1987 *A = 0; 1988 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 1989 PLogObjectCreate(B); 1990 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 1991 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 1992 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1993 1994 B->ops->destroy = MatDestroy_MPIBAIJ; 1995 B->ops->view = MatView_MPIBAIJ; 1996 B->mapping = 0; 1997 B->factor = 0; 1998 B->assembled = PETSC_FALSE; 1999 2000 B->insertmode = NOT_SET_VALUES; 2001 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2002 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2003 2004 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2005 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2006 } 2007 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2008 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2009 } 2010 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2011 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2012 } 2013 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2014 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2015 2016 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2017 work[0] = m; work[1] = n; 2018 mbs = m/bs; nbs = n/bs; 2019 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2020 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2021 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2022 } 2023 if (m == PETSC_DECIDE) { 2024 Mbs = M/bs; 2025 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2026 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2027 m = mbs*bs; 2028 } 2029 if (n == PETSC_DECIDE) { 2030 Nbs = N/bs; 2031 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2032 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2033 n = nbs*bs; 2034 } 2035 if (mbs*bs != m || nbs*bs != n) { 2036 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2037 } 2038 2039 b->m = m; B->m = m; 2040 b->n = n; B->n = n; 2041 b->N = N; B->N = N; 2042 b->M = M; B->M = M; 2043 b->bs = bs; 2044 b->bs2 = bs*bs; 2045 b->mbs = mbs; 2046 b->nbs = nbs; 2047 b->Mbs = Mbs; 2048 b->Nbs = Nbs; 2049 2050 /* the information in the maps duplicates the information computed below, eventually 2051 we should remove the duplicate information that is not contained in the maps */ 2052 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2053 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2054 2055 /* build local table of row and column ownerships */ 2056 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2057 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2058 b->cowners = b->rowners + b->size + 2; 2059 b->rowners_bs = b->cowners + b->size + 2; 2060 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2061 b->rowners[0] = 0; 2062 for ( i=2; i<=b->size; i++ ) { 2063 b->rowners[i] += b->rowners[i-1]; 2064 } 2065 for ( i=0; i<=b->size; i++ ) { 2066 b->rowners_bs[i] = b->rowners[i]*bs; 2067 } 2068 b->rstart = b->rowners[b->rank]; 2069 b->rend = b->rowners[b->rank+1]; 2070 b->rstart_bs = b->rstart * bs; 2071 b->rend_bs = b->rend * bs; 2072 2073 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2074 b->cowners[0] = 0; 2075 for ( i=2; i<=b->size; i++ ) { 2076 b->cowners[i] += b->cowners[i-1]; 2077 } 2078 b->cstart = b->cowners[b->rank]; 2079 b->cend = b->cowners[b->rank+1]; 2080 b->cstart_bs = b->cstart * bs; 2081 b->cend_bs = b->cend * bs; 2082 2083 2084 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2085 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2086 PLogObjectParent(B,b->A); 2087 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2088 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2089 PLogObjectParent(B,b->B); 2090 2091 /* build cache for off array entries formed */ 2092 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2093 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2094 b->donotstash = 0; 2095 b->colmap = 0; 2096 b->garray = 0; 2097 b->roworiented = 1; 2098 2099 /* stuff used in block assembly */ 2100 b->barray = 0; 2101 2102 /* stuff used for matrix vector multiply */ 2103 b->lvec = 0; 2104 b->Mvctx = 0; 2105 2106 /* stuff for MatGetRow() */ 2107 b->rowindices = 0; 2108 b->rowvalues = 0; 2109 b->getrowactive = PETSC_FALSE; 2110 2111 /* hash table stuff */ 2112 b->ht = 0; 2113 b->hd = 0; 2114 b->ht_size = 0; 2115 b->ht_flag = 0; 2116 b->ht_fact = 0; 2117 b->ht_total_ct = 0; 2118 b->ht_insert_ct = 0; 2119 2120 *A = B; 2121 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2122 if (flg) { 2123 double fact = 1.39; 2124 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2125 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr); 2126 if (fact <= 1.0) fact = 1.39; 2127 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2128 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2129 } 2130 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 2131 "MatStoreValues_MPIBAIJ", 2132 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2133 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 2134 "MatRetrieveValues_MPIBAIJ", 2135 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2136 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2137 "MatGetDiagonalBlock_MPIBAIJ", 2138 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 #undef __FUNC__ 2143 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2144 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2145 { 2146 Mat mat; 2147 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2148 int ierr, len=0, flg; 2149 2150 PetscFunctionBegin; 2151 *newmat = 0; 2152 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2153 PLogObjectCreate(mat); 2154 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2155 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2156 mat->ops->destroy = MatDestroy_MPIBAIJ; 2157 mat->ops->view = MatView_MPIBAIJ; 2158 mat->factor = matin->factor; 2159 mat->assembled = PETSC_TRUE; 2160 mat->insertmode = NOT_SET_VALUES; 2161 2162 a->m = mat->m = oldmat->m; 2163 a->n = mat->n = oldmat->n; 2164 a->M = mat->M = oldmat->M; 2165 a->N = mat->N = oldmat->N; 2166 2167 a->bs = oldmat->bs; 2168 a->bs2 = oldmat->bs2; 2169 a->mbs = oldmat->mbs; 2170 a->nbs = oldmat->nbs; 2171 a->Mbs = oldmat->Mbs; 2172 a->Nbs = oldmat->Nbs; 2173 2174 a->rstart = oldmat->rstart; 2175 a->rend = oldmat->rend; 2176 a->cstart = oldmat->cstart; 2177 a->cend = oldmat->cend; 2178 a->size = oldmat->size; 2179 a->rank = oldmat->rank; 2180 a->donotstash = oldmat->donotstash; 2181 a->roworiented = oldmat->roworiented; 2182 a->rowindices = 0; 2183 a->rowvalues = 0; 2184 a->getrowactive = PETSC_FALSE; 2185 a->barray = 0; 2186 a->rstart_bs = oldmat->rstart_bs; 2187 a->rend_bs = oldmat->rend_bs; 2188 a->cstart_bs = oldmat->cstart_bs; 2189 a->cend_bs = oldmat->cend_bs; 2190 2191 /* hash table stuff */ 2192 a->ht = 0; 2193 a->hd = 0; 2194 a->ht_size = 0; 2195 a->ht_flag = oldmat->ht_flag; 2196 a->ht_fact = oldmat->ht_fact; 2197 a->ht_total_ct = 0; 2198 a->ht_insert_ct = 0; 2199 2200 2201 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2202 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2203 a->cowners = a->rowners + a->size + 2; 2204 a->rowners_bs = a->cowners + a->size + 2; 2205 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2206 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2207 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2208 if (oldmat->colmap) { 2209 #if defined (PETSC_USE_CTABLE) 2210 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2211 #else 2212 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2213 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2214 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2215 #endif 2216 } else a->colmap = 0; 2217 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2218 a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2219 PLogObjectMemory(mat,len*sizeof(int)); 2220 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2221 } else a->garray = 0; 2222 2223 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2224 PLogObjectParent(mat,a->lvec); 2225 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2226 2227 PLogObjectParent(mat,a->Mvctx); 2228 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2229 PLogObjectParent(mat,a->A); 2230 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2231 PLogObjectParent(mat,a->B); 2232 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2233 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2234 if (flg) { 2235 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2236 } 2237 *newmat = mat; 2238 PetscFunctionReturn(0); 2239 } 2240 2241 #include "sys.h" 2242 2243 #undef __FUNC__ 2244 #define __FUNC__ "MatLoad_MPIBAIJ" 2245 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2246 { 2247 Mat A; 2248 int i, nz, ierr, j,rstart, rend, fd; 2249 Scalar *vals,*buf; 2250 MPI_Comm comm = ((PetscObject)viewer)->comm; 2251 MPI_Status status; 2252 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2253 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2254 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2255 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2256 int dcount,kmax,k,nzcount,tmp; 2257 2258 PetscFunctionBegin; 2259 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2260 2261 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2262 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2263 if (!rank) { 2264 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2265 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2266 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2267 if (header[3] < 0) { 2268 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2269 } 2270 } 2271 2272 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2273 M = header[1]; N = header[2]; 2274 2275 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2276 2277 /* 2278 This code adds extra rows to make sure the number of rows is 2279 divisible by the blocksize 2280 */ 2281 Mbs = M/bs; 2282 extra_rows = bs - M + bs*(Mbs); 2283 if (extra_rows == bs) extra_rows = 0; 2284 else Mbs++; 2285 if (extra_rows &&!rank) { 2286 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2287 } 2288 2289 /* determine ownership of all rows */ 2290 mbs = Mbs/size + ((Mbs % size) > rank); 2291 m = mbs * bs; 2292 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2293 browners = rowners + size + 1; 2294 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2295 rowners[0] = 0; 2296 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2297 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2298 rstart = rowners[rank]; 2299 rend = rowners[rank+1]; 2300 2301 /* distribute row lengths to all processors */ 2302 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens); 2303 if (!rank) { 2304 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths); 2305 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2306 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2307 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2308 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2309 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2310 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2311 } else { 2312 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2313 } 2314 2315 if (!rank) { 2316 /* calculate the number of nonzeros on each processor */ 2317 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2318 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2319 for ( i=0; i<size; i++ ) { 2320 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2321 procsnz[i] += rowlengths[j]; 2322 } 2323 } 2324 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2325 2326 /* determine max buffer needed and allocate it */ 2327 maxnz = 0; 2328 for ( i=0; i<size; i++ ) { 2329 maxnz = PetscMax(maxnz,procsnz[i]); 2330 } 2331 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2332 2333 /* read in my part of the matrix column indices */ 2334 nz = procsnz[0]; 2335 ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2336 mycols = ibuf; 2337 if (size == 1) nz -= extra_rows; 2338 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2339 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2340 2341 /* read in every ones (except the last) and ship off */ 2342 for ( i=1; i<size-1; i++ ) { 2343 nz = procsnz[i]; 2344 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2345 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2346 } 2347 /* read in the stuff for the last proc */ 2348 if ( size != 1 ) { 2349 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2350 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2351 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2352 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2353 } 2354 ierr = PetscFree(cols);CHKERRQ(ierr); 2355 } else { 2356 /* determine buffer space needed for message */ 2357 nz = 0; 2358 for ( i=0; i<m; i++ ) { 2359 nz += locrowlens[i]; 2360 } 2361 ibuf = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2362 mycols = ibuf; 2363 /* receive message of column indices*/ 2364 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2365 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2366 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2367 } 2368 2369 /* loop over local rows, determining number of off diagonal entries */ 2370 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens); 2371 odlens = dlens + (rend-rstart); 2372 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask); 2373 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2374 masked1 = mask + Mbs; 2375 masked2 = masked1 + Mbs; 2376 rowcount = 0; nzcount = 0; 2377 for ( i=0; i<mbs; i++ ) { 2378 dcount = 0; 2379 odcount = 0; 2380 for ( j=0; j<bs; j++ ) { 2381 kmax = locrowlens[rowcount]; 2382 for ( k=0; k<kmax; k++ ) { 2383 tmp = mycols[nzcount++]/bs; 2384 if (!mask[tmp]) { 2385 mask[tmp] = 1; 2386 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2387 else masked1[dcount++] = tmp; 2388 } 2389 } 2390 rowcount++; 2391 } 2392 2393 dlens[i] = dcount; 2394 odlens[i] = odcount; 2395 2396 /* zero out the mask elements we set */ 2397 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2398 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2399 } 2400 2401 /* create our matrix */ 2402 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2403 A = *newmat; 2404 MatSetOption(A,MAT_COLUMNS_SORTED); 2405 2406 if (!rank) { 2407 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf); 2408 /* read in my part of the matrix numerical values */ 2409 nz = procsnz[0]; 2410 vals = buf; 2411 mycols = ibuf; 2412 if (size == 1) nz -= extra_rows; 2413 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2414 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2415 2416 /* insert into matrix */ 2417 jj = rstart*bs; 2418 for ( i=0; i<m; i++ ) { 2419 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2420 mycols += locrowlens[i]; 2421 vals += locrowlens[i]; 2422 jj++; 2423 } 2424 /* read in other processors (except the last one) and ship out */ 2425 for ( i=1; i<size-1; i++ ) { 2426 nz = procsnz[i]; 2427 vals = buf; 2428 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2429 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2430 } 2431 /* the last proc */ 2432 if ( size != 1 ){ 2433 nz = procsnz[i] - extra_rows; 2434 vals = buf; 2435 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2436 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2437 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2438 } 2439 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2440 } else { 2441 /* receive numeric values */ 2442 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf); 2443 2444 /* receive message of values*/ 2445 vals = buf; 2446 mycols = ibuf; 2447 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2448 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2449 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2450 2451 /* insert into matrix */ 2452 jj = rstart*bs; 2453 for ( i=0; i<m; i++ ) { 2454 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2455 mycols += locrowlens[i]; 2456 vals += locrowlens[i]; 2457 jj++; 2458 } 2459 } 2460 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2461 ierr = PetscFree(buf);CHKERRQ(ierr); 2462 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2463 ierr = PetscFree(rowners);CHKERRQ(ierr); 2464 ierr = PetscFree(dlens);CHKERRQ(ierr); 2465 ierr = PetscFree(mask);CHKERRQ(ierr); 2466 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2467 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 2472 2473 #undef __FUNC__ 2474 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2475 /*@ 2476 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2477 2478 Input Parameters: 2479 . mat - the matrix 2480 . fact - factor 2481 2482 Collective on Mat 2483 2484 Level: advanced 2485 2486 Notes: 2487 This can also be set by the command line option: -mat_use_hash_table fact 2488 2489 .keywords: matrix, hashtable, factor, HT 2490 2491 .seealso: MatSetOption() 2492 @*/ 2493 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2494 { 2495 Mat_MPIBAIJ *baij; 2496 2497 PetscFunctionBegin; 2498 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2499 if (mat->type != MATMPIBAIJ) { 2500 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2501 } 2502 baij = (Mat_MPIBAIJ*) mat->data; 2503 baij->ht_fact = fact; 2504 PetscFunctionReturn(0); 2505 } 2506