1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.173 1999/06/15 15:31:01 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 EXTERN_C_BEGIN 20 #undef __FUNC__ 21 #define __FUNC__ "MatStoreValues_MPIBAIJ" 22 int MatStoreValues_MPIBAIJ(Mat mat) 23 { 24 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 25 int ierr; 26 27 PetscFunctionBegin; 28 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 29 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 EXTERN_C_END 33 34 EXTERN_C_BEGIN 35 #undef __FUNC__ 36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 37 int MatRetrieveValues_MPIBAIJ(Mat mat) 38 { 39 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 40 int ierr; 41 42 PetscFunctionBegin; 43 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 44 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 45 PetscFunctionReturn(0); 46 } 47 EXTERN_C_END 48 49 /* 50 Local utility routine that creates a mapping from the global column 51 number to the local number in the off-diagonal part of the local 52 storage of the matrix. This is done in a non scable way since the 53 length of colmap equals the global matrix length. 54 */ 55 #undef __FUNC__ 56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 57 static int CreateColmap_MPIBAIJ_Private(Mat mat) 58 { 59 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 60 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 61 int nbs = B->nbs,i,bs=B->bs,ierr; 62 63 PetscFunctionBegin; 64 #if defined (PETSC_USE_CTABLE) 65 ierr = TableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 66 for ( i=0; i<nbs; i++ ){ 67 ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 68 } 69 #else 70 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 71 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 72 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 73 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 74 #endif 75 PetscFunctionReturn(0); 76 } 77 78 #define CHUNKSIZE 10 79 80 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 81 { \ 82 \ 83 brow = row/bs; \ 84 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 85 rmax = aimax[brow]; nrow = ailen[brow]; \ 86 bcol = col/bs; \ 87 ridx = row % bs; cidx = col % bs; \ 88 low = 0; high = nrow; \ 89 while (high-low > 3) { \ 90 t = (low+high)/2; \ 91 if (rp[t] > bcol) high = t; \ 92 else low = t; \ 93 } \ 94 for ( _i=low; _i<high; _i++ ) { \ 95 if (rp[_i] > bcol) break; \ 96 if (rp[_i] == bcol) { \ 97 bap = ap + bs2*_i + bs*cidx + ridx; \ 98 if (addv == ADD_VALUES) *bap += value; \ 99 else *bap = value; \ 100 goto a_noinsert; \ 101 } \ 102 } \ 103 if (a->nonew == 1) goto a_noinsert; \ 104 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 105 if (nrow >= rmax) { \ 106 /* there is no extra room in row, therefore enlarge */ \ 107 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 108 Scalar *new_a; \ 109 \ 110 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 111 \ 112 /* malloc new storage space */ \ 113 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 114 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 115 new_j = (int *) (new_a + bs2*new_nz); \ 116 new_i = new_j + new_nz; \ 117 \ 118 /* copy over old data into new slots */ \ 119 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 120 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 121 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 122 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 123 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 124 len*sizeof(int));CHKERRQ(ierr); \ 125 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 126 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 127 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 128 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 129 /* free up old matrix storage */ \ 130 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 131 if (!a->singlemalloc) { \ 132 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 133 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 134 } \ 135 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 136 a->singlemalloc = 1; \ 137 \ 138 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 139 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 140 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 141 a->maxnz += bs2*CHUNKSIZE; \ 142 a->reallocs++; \ 143 a->nz++; \ 144 } \ 145 N = nrow++ - 1; \ 146 /* shift up all the later entries in this row */ \ 147 for ( ii=N; ii>=_i; ii-- ) { \ 148 rp[ii+1] = rp[ii]; \ 149 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 150 } \ 151 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); } \ 152 rp[_i] = bcol; \ 153 ap[bs2*_i + bs*cidx + ridx] = value; \ 154 a_noinsert:; \ 155 ailen[brow] = nrow; \ 156 } 157 158 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 159 { \ 160 \ 161 brow = row/bs; \ 162 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 163 rmax = bimax[brow]; nrow = bilen[brow]; \ 164 bcol = col/bs; \ 165 ridx = row % bs; cidx = col % bs; \ 166 low = 0; high = nrow; \ 167 while (high-low > 3) { \ 168 t = (low+high)/2; \ 169 if (rp[t] > bcol) high = t; \ 170 else low = t; \ 171 } \ 172 for ( _i=low; _i<high; _i++ ) { \ 173 if (rp[_i] > bcol) break; \ 174 if (rp[_i] == bcol) { \ 175 bap = ap + bs2*_i + bs*cidx + ridx; \ 176 if (addv == ADD_VALUES) *bap += value; \ 177 else *bap = value; \ 178 goto b_noinsert; \ 179 } \ 180 } \ 181 if (b->nonew == 1) goto b_noinsert; \ 182 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 183 if (nrow >= rmax) { \ 184 /* there is no extra room in row, therefore enlarge */ \ 185 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 186 Scalar *new_a; \ 187 \ 188 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 189 \ 190 /* malloc new storage space */ \ 191 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 192 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 193 new_j = (int *) (new_a + bs2*new_nz); \ 194 new_i = new_j + new_nz; \ 195 \ 196 /* copy over old data into new slots */ \ 197 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 198 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 199 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 200 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 201 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 202 len*sizeof(int));CHKERRQ(ierr); \ 203 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 204 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 205 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 206 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 207 /* free up old matrix storage */ \ 208 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 209 if (!b->singlemalloc) { \ 210 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 211 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 212 } \ 213 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 214 b->singlemalloc = 1; \ 215 \ 216 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 217 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 218 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 219 b->maxnz += bs2*CHUNKSIZE; \ 220 b->reallocs++; \ 221 b->nz++; \ 222 } \ 223 N = nrow++ - 1; \ 224 /* shift up all the later entries in this row */ \ 225 for ( ii=N; ii>=_i; ii-- ) { \ 226 rp[ii+1] = rp[ii]; \ 227 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 228 } \ 229 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);} \ 230 rp[_i] = bcol; \ 231 ap[bs2*_i + bs*cidx + ridx] = value; \ 232 b_noinsert:; \ 233 bilen[brow] = nrow; \ 234 } 235 236 #undef __FUNC__ 237 #define __FUNC__ "MatSetValues_MPIBAIJ" 238 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 239 { 240 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 241 Scalar value; 242 int ierr,i,j,row,col; 243 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 244 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 245 int cend_orig=baij->cend_bs,bs=baij->bs; 246 247 /* Some Variables required in the macro */ 248 Mat A = baij->A; 249 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 250 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 251 Scalar *aa=a->a; 252 253 Mat B = baij->B; 254 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 255 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 256 Scalar *ba=b->a; 257 258 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 259 int low,high,t,ridx,cidx,bs2=a->bs2; 260 Scalar *ap,*bap; 261 262 PetscFunctionBegin; 263 for ( i=0; i<m; i++ ) { 264 if (im[i] < 0) continue; 265 #if defined(PETSC_USE_BOPT_g) 266 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 267 #endif 268 if (im[i] >= rstart_orig && im[i] < rend_orig) { 269 row = im[i] - rstart_orig; 270 for ( j=0; j<n; j++ ) { 271 if (in[j] >= cstart_orig && in[j] < cend_orig){ 272 col = in[j] - cstart_orig; 273 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 274 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 275 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 276 } else if (in[j] < 0) continue; 277 #if defined(PETSC_USE_BOPT_g) 278 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 279 #endif 280 else { 281 if (mat->was_assembled) { 282 if (!baij->colmap) { 283 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 284 } 285 #if defined (PETSC_USE_CTABLE) 286 ierr = TableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr); 287 col = col - 1 + in[j]%bs; 288 #else 289 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 290 #endif 291 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 292 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 293 col = in[j]; 294 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 295 B = baij->B; 296 b = (Mat_SeqBAIJ *) (B)->data; 297 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 298 ba=b->a; 299 } 300 } else col = in[j]; 301 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 302 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 303 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 304 } 305 } 306 } else { 307 if ( !baij->donotstash) { 308 if (roworiented) { 309 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 310 } else { 311 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 312 } 313 } 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNC__ 320 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 321 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 322 { 323 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 324 Scalar *value,*barray=baij->barray; 325 int ierr,i,j,ii,jj,row,col; 326 int roworiented = baij->roworiented,rstart=baij->rstart ; 327 int rend=baij->rend,cstart=baij->cstart,stepval; 328 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 329 330 if(!barray) { 331 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray); 332 } 333 334 if (roworiented) { 335 stepval = (n-1)*bs; 336 } else { 337 stepval = (m-1)*bs; 338 } 339 for ( i=0; i<m; i++ ) { 340 if (im[i] < 0) continue; 341 #if defined(PETSC_USE_BOPT_g) 342 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 343 #endif 344 if (im[i] >= rstart && im[i] < rend) { 345 row = im[i] - rstart; 346 for ( j=0; j<n; j++ ) { 347 /* If NumCol = 1 then a copy is not required */ 348 if ((roworiented) && (n == 1)) { 349 barray = v + i*bs2; 350 } else if((!roworiented) && (m == 1)) { 351 barray = v + j*bs2; 352 } else { /* Here a copy is required */ 353 if (roworiented) { 354 value = v + i*(stepval+bs)*bs + j*bs; 355 } else { 356 value = v + j*(stepval+bs)*bs + i*bs; 357 } 358 for ( ii=0; ii<bs; ii++,value+=stepval ) { 359 for (jj=0; jj<bs; jj++ ) { 360 *barray++ = *value++; 361 } 362 } 363 barray -=bs2; 364 } 365 366 if (in[j] >= cstart && in[j] < cend){ 367 col = in[j] - cstart; 368 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 369 } 370 else if (in[j] < 0) continue; 371 #if defined(PETSC_USE_BOPT_g) 372 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 373 #endif 374 else { 375 if (mat->was_assembled) { 376 if (!baij->colmap) { 377 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 378 } 379 380 #if defined(PETSC_USE_BOPT_g) 381 #if defined (PETSC_USE_CTABLE) 382 { int data; 383 ierr = TableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 384 if((data - 1) % bs) 385 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 386 } 387 #else 388 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 389 #endif 390 #endif 391 #if defined (PETSC_USE_CTABLE) 392 ierr = TableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 393 col = (col - 1)/bs; 394 #else 395 col = (baij->colmap[in[j]] - 1)/bs; 396 #endif 397 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 398 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 399 col = in[j]; 400 } 401 } 402 else col = in[j]; 403 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 404 } 405 } 406 } else { 407 if (!baij->donotstash) { 408 if (roworiented) { 409 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 410 } else { 411 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 412 } 413 } 414 } 415 } 416 PetscFunctionReturn(0); 417 } 418 #define HASH_KEY 0.6180339887 419 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 420 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 421 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 422 #undef __FUNC__ 423 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 424 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 425 { 426 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 427 int ierr,i,j,row,col; 428 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 429 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 430 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 431 double tmp; 432 Scalar ** HD = baij->hd,value; 433 #if defined(PETSC_USE_BOPT_g) 434 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 435 #endif 436 437 PetscFunctionBegin; 438 439 for ( i=0; i<m; i++ ) { 440 #if defined(PETSC_USE_BOPT_g) 441 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 442 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 443 #endif 444 row = im[i]; 445 if (row >= rstart_orig && row < rend_orig) { 446 for ( j=0; j<n; j++ ) { 447 col = in[j]; 448 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 449 /* Look up into the Hash Table */ 450 key = (row/bs)*Nbs+(col/bs)+1; 451 h1 = HASH(size,key,tmp); 452 453 454 idx = h1; 455 #if defined(PETSC_USE_BOPT_g) 456 insert_ct++; 457 total_ct++; 458 if (HT[idx] != key) { 459 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 460 if (idx == size) { 461 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 462 if (idx == h1) { 463 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 464 } 465 } 466 } 467 #else 468 if (HT[idx] != key) { 469 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 470 if (idx == size) { 471 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 472 if (idx == h1) { 473 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 474 } 475 } 476 } 477 #endif 478 /* A HASH table entry is found, so insert the values at the correct address */ 479 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 480 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 481 } 482 } else { 483 if (!baij->donotstash) { 484 if (roworiented) { 485 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 486 } else { 487 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 488 } 489 } 490 } 491 } 492 #if defined(PETSC_USE_BOPT_g) 493 baij->ht_total_ct = total_ct; 494 baij->ht_insert_ct = insert_ct; 495 #endif 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNC__ 500 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 501 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 502 { 503 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 504 int ierr,i,j,ii,jj,row,col; 505 int roworiented = baij->roworiented,rstart=baij->rstart ; 506 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 507 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 508 double tmp; 509 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 510 #if defined(PETSC_USE_BOPT_g) 511 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 512 #endif 513 514 PetscFunctionBegin; 515 516 if (roworiented) { 517 stepval = (n-1)*bs; 518 } else { 519 stepval = (m-1)*bs; 520 } 521 for ( i=0; i<m; i++ ) { 522 #if defined(PETSC_USE_BOPT_g) 523 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 524 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 525 #endif 526 row = im[i]; 527 v_t = v + i*bs2; 528 if (row >= rstart && row < rend) { 529 for ( j=0; j<n; j++ ) { 530 col = in[j]; 531 532 /* Look up into the Hash Table */ 533 key = row*Nbs+col+1; 534 h1 = HASH(size,key,tmp); 535 536 idx = h1; 537 #if defined(PETSC_USE_BOPT_g) 538 total_ct++; 539 insert_ct++; 540 if (HT[idx] != key) { 541 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 542 if (idx == size) { 543 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 544 if (idx == h1) { 545 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 546 } 547 } 548 } 549 #else 550 if (HT[idx] != key) { 551 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 552 if (idx == size) { 553 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 554 if (idx == h1) { 555 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 556 } 557 } 558 } 559 #endif 560 baij_a = HD[idx]; 561 if (roworiented) { 562 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 563 /* value = v + (i*(stepval+bs)+j)*bs; */ 564 value = v_t; 565 v_t += bs; 566 if (addv == ADD_VALUES) { 567 for ( ii=0; ii<bs; ii++,value+=stepval) { 568 for ( jj=ii; jj<bs2; jj+=bs ) { 569 baij_a[jj] += *value++; 570 } 571 } 572 } else { 573 for ( ii=0; ii<bs; ii++,value+=stepval) { 574 for ( jj=ii; jj<bs2; jj+=bs ) { 575 baij_a[jj] = *value++; 576 } 577 } 578 } 579 } else { 580 value = v + j*(stepval+bs)*bs + i*bs; 581 if (addv == ADD_VALUES) { 582 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 583 for ( jj=0; jj<bs; jj++ ) { 584 baij_a[jj] += *value++; 585 } 586 } 587 } else { 588 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 589 for ( jj=0; jj<bs; jj++ ) { 590 baij_a[jj] = *value++; 591 } 592 } 593 } 594 } 595 } 596 } else { 597 if (!baij->donotstash) { 598 if (roworiented) { 599 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 600 } else { 601 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 602 } 603 } 604 } 605 } 606 #if defined(PETSC_USE_BOPT_g) 607 baij->ht_total_ct = total_ct; 608 baij->ht_insert_ct = insert_ct; 609 #endif 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNC__ 614 #define __FUNC__ "MatGetValues_MPIBAIJ" 615 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 616 { 617 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 618 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 619 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 620 621 PetscFunctionBegin; 622 for ( i=0; i<m; i++ ) { 623 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 624 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 625 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 626 row = idxm[i] - bsrstart; 627 for ( j=0; j<n; j++ ) { 628 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 629 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 630 if (idxn[j] >= bscstart && idxn[j] < bscend){ 631 col = idxn[j] - bscstart; 632 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } else { 634 if (!baij->colmap) { 635 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 636 } 637 #if defined (PETSC_USE_CTABLE) 638 ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 639 data --; 640 #else 641 data = baij->colmap[idxn[j]/bs]-1; 642 #endif 643 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 644 else { 645 col = data + idxn[j]%bs; 646 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } 648 } 649 } 650 } else { 651 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 652 } 653 } 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNC__ 658 #define __FUNC__ "MatNorm_MPIBAIJ" 659 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 660 { 661 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 662 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 663 int ierr, i,bs2=baij->bs2; 664 double sum = 0.0; 665 Scalar *v; 666 667 PetscFunctionBegin; 668 if (baij->size == 1) { 669 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 670 } else { 671 if (type == NORM_FROBENIUS) { 672 v = amat->a; 673 for (i=0; i<amat->nz*bs2; i++ ) { 674 #if defined(PETSC_USE_COMPLEX) 675 sum += PetscReal(PetscConj(*v)*(*v)); v++; 676 #else 677 sum += (*v)*(*v); v++; 678 #endif 679 } 680 v = bmat->a; 681 for (i=0; i<bmat->nz*bs2; i++ ) { 682 #if defined(PETSC_USE_COMPLEX) 683 sum += PetscReal(PetscConj(*v)*(*v)); v++; 684 #else 685 sum += (*v)*(*v); v++; 686 #endif 687 } 688 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 689 *norm = sqrt(*norm); 690 } else { 691 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 692 } 693 } 694 PetscFunctionReturn(0); 695 } 696 697 698 /* 699 Creates the hash table, and sets the table 700 This table is created only once. 701 If new entried need to be added to the matrix 702 then the hash table has to be destroyed and 703 recreated. 704 */ 705 #undef __FUNC__ 706 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 707 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 708 { 709 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 710 Mat A = baij->A, B=baij->B; 711 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 712 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 713 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 714 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 715 int *HT,key; 716 Scalar **HD; 717 double tmp; 718 #if defined(PETSC_USE_BOPT_g) 719 int ct=0,max=0; 720 #endif 721 722 PetscFunctionBegin; 723 baij->ht_size=(int)(factor*nz); 724 size = baij->ht_size; 725 726 if (baij->ht) { 727 PetscFunctionReturn(0); 728 } 729 730 /* Allocate Memory for Hash Table */ 731 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd); 732 baij->ht = (int*)(baij->hd + size); 733 HD = baij->hd; 734 HT = baij->ht; 735 736 737 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 738 739 740 /* Loop Over A */ 741 for ( i=0; i<a->mbs; i++ ) { 742 for ( j=ai[i]; j<ai[i+1]; j++ ) { 743 row = i+rstart; 744 col = aj[j]+cstart; 745 746 key = row*Nbs + col + 1; 747 h1 = HASH(size,key,tmp); 748 for ( k=0; k<size; k++ ){ 749 if (HT[(h1+k)%size] == 0.0) { 750 HT[(h1+k)%size] = key; 751 HD[(h1+k)%size] = a->a + j*bs2; 752 break; 753 #if defined(PETSC_USE_BOPT_g) 754 } else { 755 ct++; 756 #endif 757 } 758 } 759 #if defined(PETSC_USE_BOPT_g) 760 if (k> max) max = k; 761 #endif 762 } 763 } 764 /* Loop Over B */ 765 for ( i=0; i<b->mbs; i++ ) { 766 for ( j=bi[i]; j<bi[i+1]; j++ ) { 767 row = i+rstart; 768 col = garray[bj[j]]; 769 key = row*Nbs + col + 1; 770 h1 = HASH(size,key,tmp); 771 for ( k=0; k<size; k++ ){ 772 if (HT[(h1+k)%size] == 0.0) { 773 HT[(h1+k)%size] = key; 774 HD[(h1+k)%size] = b->a + j*bs2; 775 break; 776 #if defined(PETSC_USE_BOPT_g) 777 } else { 778 ct++; 779 #endif 780 } 781 } 782 #if defined(PETSC_USE_BOPT_g) 783 if (k> max) max = k; 784 #endif 785 } 786 } 787 788 /* Print Summary */ 789 #if defined(PETSC_USE_BOPT_g) 790 for ( i=0,j=0; i<size; i++) 791 if (HT[i]) {j++;} 792 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 793 (j== 0)? 0.0:((double)(ct+j))/j,max); 794 #endif 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNC__ 799 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 800 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 801 { 802 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 803 int ierr,nstash,reallocs; 804 InsertMode addv; 805 806 PetscFunctionBegin; 807 if (baij->donotstash) { 808 PetscFunctionReturn(0); 809 } 810 811 /* make sure all processors are either in INSERTMODE or ADDMODE */ 812 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 813 if (addv == (ADD_VALUES|INSERT_VALUES)) { 814 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 815 } 816 mat->insertmode = addv; /* in case this processor had no cache */ 817 818 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 819 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 820 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 821 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 822 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 823 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNC__ 828 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 829 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 830 { 831 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 832 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 833 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 834 int *row,*col,other_disassembled,r1,r2,r3; 835 Scalar *val; 836 InsertMode addv = mat->insertmode; 837 838 PetscFunctionBegin; 839 if (!baij->donotstash) { 840 while (1) { 841 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 842 if (!flg) break; 843 844 for ( i=0; i<n; ) { 845 /* Now identify the consecutive vals belonging to the same row */ 846 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 847 if (j < n) ncols = j-i; 848 else ncols = n-i; 849 /* Now assemble all these values with a single function call */ 850 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 851 i = j; 852 } 853 } 854 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 855 /* Now process the block-stash. Since the values are stashed column-oriented, 856 set the roworiented flag to column oriented, and after MatSetValues() 857 restore the original flags */ 858 r1 = baij->roworiented; 859 r2 = a->roworiented; 860 r3 = b->roworiented; 861 baij->roworiented = 0; 862 a->roworiented = 0; 863 b->roworiented = 0; 864 while (1) { 865 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 866 if (!flg) break; 867 868 for ( i=0; i<n; ) { 869 /* Now identify the consecutive vals belonging to the same row */ 870 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 871 if (j < n) ncols = j-i; 872 else ncols = n-i; 873 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 874 i = j; 875 } 876 } 877 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 878 baij->roworiented = r1; 879 a->roworiented = r2; 880 b->roworiented = r3; 881 } 882 883 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 884 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 885 886 /* determine if any processor has disassembled, if so we must 887 also disassemble ourselfs, in order that we may reassemble. */ 888 /* 889 if nonzero structure of submatrix B cannot change then we know that 890 no processor disassembled thus we can skip this stuff 891 */ 892 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 893 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 894 if (mat->was_assembled && !other_disassembled) { 895 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 896 } 897 } 898 899 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 900 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 901 } 902 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 903 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 904 905 #if defined(PETSC_USE_BOPT_g) 906 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 907 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 908 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 909 baij->ht_total_ct = 0; 910 baij->ht_insert_ct = 0; 911 } 912 #endif 913 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 914 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 915 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 916 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 917 } 918 919 if (baij->rowvalues) { 920 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 921 baij->rowvalues = 0; 922 } 923 PetscFunctionReturn(0); 924 } 925 926 /* 927 #undef __FUNC__ 928 #define __FUNC__ "MatView_MPIBAIJ_Binary" 929 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 930 { 931 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 932 int ierr; 933 934 PetscFunctionBegin; 935 if (baij->size == 1) { 936 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 937 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 938 PetscFunctionReturn(0); 939 } 940 */ 941 942 #undef __FUNC__ 943 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 944 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 945 { 946 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 947 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 948 FILE *fd; 949 ViewerType vtype; 950 951 PetscFunctionBegin; 952 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 953 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 954 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 955 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 956 MatInfo info; 957 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 958 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 959 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 960 ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr); 961 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 962 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 963 baij->bs,(int)info.memory); 964 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 965 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 966 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 967 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 968 fflush(fd); 969 ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr); 970 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 973 ierr = PetscPrintf(mat->comm," block size is %d\n",bs);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 } 977 978 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 979 Draw draw; 980 PetscTruth isnull; 981 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 982 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 983 } 984 985 if (size == 1) { 986 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 987 } else { 988 /* assemble the entire matrix onto first processor. */ 989 Mat A; 990 Mat_SeqBAIJ *Aloc; 991 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 992 int mbs = baij->mbs; 993 Scalar *a; 994 995 if (!rank) { 996 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 997 } else { 998 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 999 } 1000 PLogObjectParent(mat,A); 1001 1002 /* copy over the A part */ 1003 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1004 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1005 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1006 1007 for ( i=0; i<mbs; i++ ) { 1008 rvals[0] = bs*(baij->rstart + i); 1009 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1010 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1011 col = (baij->cstart+aj[j])*bs; 1012 for (k=0; k<bs; k++ ) { 1013 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1014 col++; a += bs; 1015 } 1016 } 1017 } 1018 /* copy over the B part */ 1019 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1020 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1021 for ( i=0; i<mbs; i++ ) { 1022 rvals[0] = bs*(baij->rstart + i); 1023 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1024 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1025 col = baij->garray[aj[j]]*bs; 1026 for (k=0; k<bs; k++ ) { 1027 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1028 col++; a += bs; 1029 } 1030 } 1031 } 1032 ierr = PetscFree(rvals);CHKERRQ(ierr); 1033 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1034 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1035 /* 1036 Everyone has to call to draw the matrix since the graphics waits are 1037 synchronized across all processors that share the Draw object 1038 */ 1039 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1040 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 1041 } 1042 ierr = MatDestroy(A);CHKERRQ(ierr); 1043 } 1044 PetscFunctionReturn(0); 1045 } 1046 1047 1048 1049 #undef __FUNC__ 1050 #define __FUNC__ "MatView_MPIBAIJ" 1051 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1052 { 1053 int ierr; 1054 ViewerType vtype; 1055 1056 PetscFunctionBegin; 1057 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 1058 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1059 PetscTypeCompare(vtype,SOCKET_VIEWER)|| PetscTypeCompare(vtype,BINARY_VIEWER)) { 1060 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1061 /* 1062 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1063 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1064 */ 1065 } else { 1066 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1067 } 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNC__ 1072 #define __FUNC__ "MatDestroy_MPIBAIJ" 1073 int MatDestroy_MPIBAIJ(Mat mat) 1074 { 1075 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1076 int ierr; 1077 1078 PetscFunctionBegin; 1079 if (--mat->refct > 0) PetscFunctionReturn(0); 1080 1081 if (mat->mapping) { 1082 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1083 } 1084 if (mat->bmapping) { 1085 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1086 } 1087 if (mat->rmap) { 1088 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1089 } 1090 if (mat->cmap) { 1091 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1092 } 1093 #if defined(PETSC_USE_LOG) 1094 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1095 #endif 1096 1097 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1098 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1099 1100 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1101 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1102 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1103 #if defined (PETSC_USE_CTABLE) 1104 if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);} 1105 #else 1106 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1107 #endif 1108 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1109 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1110 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1111 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1112 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1113 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1114 ierr = PetscFree(baij);CHKERRQ(ierr); 1115 PLogObjectDestroy(mat); 1116 PetscHeaderDestroy(mat); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNC__ 1121 #define __FUNC__ "MatMult_MPIBAIJ" 1122 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1123 { 1124 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1125 int ierr, nt; 1126 1127 PetscFunctionBegin; 1128 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1129 if (nt != a->n) { 1130 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1131 } 1132 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1133 if (nt != a->m) { 1134 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1135 } 1136 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1137 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1138 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1139 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1140 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNC__ 1145 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1146 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1147 { 1148 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1149 int ierr; 1150 1151 PetscFunctionBegin; 1152 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1153 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1154 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1155 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNC__ 1160 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1161 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1162 { 1163 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1164 int ierr; 1165 1166 PetscFunctionBegin; 1167 /* do nondiagonal part */ 1168 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1169 /* send it on its way */ 1170 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1171 /* do local part */ 1172 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 1173 /* receive remote parts: note this assumes the values are not actually */ 1174 /* inserted in yy until the next line, which is true for my implementation*/ 1175 /* but is not perhaps always true. */ 1176 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNC__ 1181 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1182 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1183 { 1184 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1185 int ierr; 1186 1187 PetscFunctionBegin; 1188 /* do nondiagonal part */ 1189 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1190 /* send it on its way */ 1191 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1192 /* do local part */ 1193 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1194 /* receive remote parts: note this assumes the values are not actually */ 1195 /* inserted in yy until the next line, which is true for my implementation*/ 1196 /* but is not perhaps always true. */ 1197 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1198 PetscFunctionReturn(0); 1199 } 1200 1201 /* 1202 This only works correctly for square matrices where the subblock A->A is the 1203 diagonal block 1204 */ 1205 #undef __FUNC__ 1206 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1207 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1208 { 1209 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1210 int ierr; 1211 1212 PetscFunctionBegin; 1213 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1214 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNC__ 1219 #define __FUNC__ "MatScale_MPIBAIJ" 1220 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1221 { 1222 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1223 int ierr; 1224 1225 PetscFunctionBegin; 1226 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1227 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNC__ 1232 #define __FUNC__ "MatGetSize_MPIBAIJ" 1233 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1234 { 1235 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1236 1237 PetscFunctionBegin; 1238 if (m) *m = mat->M; 1239 if (n) *n = mat->N; 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNC__ 1244 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1245 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1246 { 1247 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1248 1249 PetscFunctionBegin; 1250 *m = mat->m; *n = mat->n; 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNC__ 1255 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1256 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1257 { 1258 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1259 1260 PetscFunctionBegin; 1261 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNC__ 1266 #define __FUNC__ "MatGetRow_MPIBAIJ" 1267 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1268 { 1269 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1270 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1271 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1272 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1273 int *cmap, *idx_p,cstart = mat->cstart; 1274 1275 PetscFunctionBegin; 1276 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1277 mat->getrowactive = PETSC_TRUE; 1278 1279 if (!mat->rowvalues && (idx || v)) { 1280 /* 1281 allocate enough space to hold information from the longest row. 1282 */ 1283 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1284 int max = 1,mbs = mat->mbs,tmp; 1285 for ( i=0; i<mbs; i++ ) { 1286 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1287 if (max < tmp) { max = tmp; } 1288 } 1289 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1290 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1291 } 1292 1293 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1294 lrow = row - brstart; 1295 1296 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1297 if (!v) {pvA = 0; pvB = 0;} 1298 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1299 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1300 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1301 nztot = nzA + nzB; 1302 1303 cmap = mat->garray; 1304 if (v || idx) { 1305 if (nztot) { 1306 /* Sort by increasing column numbers, assuming A and B already sorted */ 1307 int imark = -1; 1308 if (v) { 1309 *v = v_p = mat->rowvalues; 1310 for ( i=0; i<nzB; i++ ) { 1311 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1312 else break; 1313 } 1314 imark = i; 1315 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1316 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1317 } 1318 if (idx) { 1319 *idx = idx_p = mat->rowindices; 1320 if (imark > -1) { 1321 for ( i=0; i<imark; i++ ) { 1322 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1323 } 1324 } else { 1325 for ( i=0; i<nzB; i++ ) { 1326 if (cmap[cworkB[i]/bs] < cstart) 1327 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1328 else break; 1329 } 1330 imark = i; 1331 } 1332 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1333 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1334 } 1335 } else { 1336 if (idx) *idx = 0; 1337 if (v) *v = 0; 1338 } 1339 } 1340 *nz = nztot; 1341 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1342 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNC__ 1347 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1348 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1349 { 1350 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1351 1352 PetscFunctionBegin; 1353 if (baij->getrowactive == PETSC_FALSE) { 1354 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1355 } 1356 baij->getrowactive = PETSC_FALSE; 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNC__ 1361 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1362 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1363 { 1364 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1365 1366 PetscFunctionBegin; 1367 *bs = baij->bs; 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNC__ 1372 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1373 int MatZeroEntries_MPIBAIJ(Mat A) 1374 { 1375 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1376 int ierr; 1377 1378 PetscFunctionBegin; 1379 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1380 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 #undef __FUNC__ 1385 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1386 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1387 { 1388 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1389 Mat A = a->A, B = a->B; 1390 int ierr; 1391 double isend[5], irecv[5]; 1392 1393 PetscFunctionBegin; 1394 info->block_size = (double)a->bs; 1395 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1396 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1397 isend[3] = info->memory; isend[4] = info->mallocs; 1398 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1399 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1400 isend[3] += info->memory; isend[4] += info->mallocs; 1401 if (flag == MAT_LOCAL) { 1402 info->nz_used = isend[0]; 1403 info->nz_allocated = isend[1]; 1404 info->nz_unneeded = isend[2]; 1405 info->memory = isend[3]; 1406 info->mallocs = isend[4]; 1407 } else if (flag == MAT_GLOBAL_MAX) { 1408 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1409 info->nz_used = irecv[0]; 1410 info->nz_allocated = irecv[1]; 1411 info->nz_unneeded = irecv[2]; 1412 info->memory = irecv[3]; 1413 info->mallocs = irecv[4]; 1414 } else if (flag == MAT_GLOBAL_SUM) { 1415 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1416 info->nz_used = irecv[0]; 1417 info->nz_allocated = irecv[1]; 1418 info->nz_unneeded = irecv[2]; 1419 info->memory = irecv[3]; 1420 info->mallocs = irecv[4]; 1421 } else { 1422 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1423 } 1424 info->rows_global = (double)a->M; 1425 info->columns_global = (double)a->N; 1426 info->rows_local = (double)a->m; 1427 info->columns_local = (double)a->N; 1428 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1429 info->fill_ratio_needed = 0; 1430 info->factor_mallocs = 0; 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNC__ 1435 #define __FUNC__ "MatSetOption_MPIBAIJ" 1436 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1437 { 1438 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1439 int ierr; 1440 1441 PetscFunctionBegin; 1442 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1443 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1444 op == MAT_COLUMNS_UNSORTED || 1445 op == MAT_COLUMNS_SORTED || 1446 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1447 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1448 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1449 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1450 } else if (op == MAT_ROW_ORIENTED) { 1451 a->roworiented = 1; 1452 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1453 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1454 } else if (op == MAT_ROWS_SORTED || 1455 op == MAT_ROWS_UNSORTED || 1456 op == MAT_SYMMETRIC || 1457 op == MAT_STRUCTURALLY_SYMMETRIC || 1458 op == MAT_YES_NEW_DIAGONALS || 1459 op == MAT_USE_HASH_TABLE) { 1460 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1461 } else if (op == MAT_COLUMN_ORIENTED) { 1462 a->roworiented = 0; 1463 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1464 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1465 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1466 a->donotstash = 1; 1467 } else if (op == MAT_NO_NEW_DIAGONALS) { 1468 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1469 } else if (op == MAT_USE_HASH_TABLE) { 1470 a->ht_flag = 1; 1471 } else { 1472 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1473 } 1474 PetscFunctionReturn(0); 1475 } 1476 1477 #undef __FUNC__ 1478 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1479 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1480 { 1481 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1482 Mat_SeqBAIJ *Aloc; 1483 Mat B; 1484 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1485 int bs=baij->bs,mbs=baij->mbs; 1486 Scalar *a; 1487 1488 PetscFunctionBegin; 1489 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1490 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1491 CHKERRQ(ierr); 1492 1493 /* copy over the A part */ 1494 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1495 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1496 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1497 1498 for ( i=0; i<mbs; i++ ) { 1499 rvals[0] = bs*(baij->rstart + i); 1500 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1501 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1502 col = (baij->cstart+aj[j])*bs; 1503 for (k=0; k<bs; k++ ) { 1504 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1505 col++; a += bs; 1506 } 1507 } 1508 } 1509 /* copy over the B part */ 1510 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1511 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1512 for ( i=0; i<mbs; i++ ) { 1513 rvals[0] = bs*(baij->rstart + i); 1514 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1515 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1516 col = baij->garray[aj[j]]*bs; 1517 for (k=0; k<bs; k++ ) { 1518 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1519 col++; a += bs; 1520 } 1521 } 1522 } 1523 ierr = PetscFree(rvals);CHKERRQ(ierr); 1524 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1525 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1526 1527 if (matout != PETSC_NULL) { 1528 *matout = B; 1529 } else { 1530 PetscOps *Abops; 1531 MatOps Aops; 1532 1533 /* This isn't really an in-place transpose .... but free data structures from baij */ 1534 ierr = PetscFree(baij->rowners); CHKERRQ(ierr); 1535 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1536 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1537 #if defined (PETSC_USE_CTABLE) 1538 if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);} 1539 #else 1540 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1541 #endif 1542 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1543 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1544 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1545 ierr = PetscFree(baij);CHKERRQ(ierr); 1546 1547 /* 1548 This is horrible, horrible code. We need to keep the 1549 A pointers for the bops and ops but copy everything 1550 else from C. 1551 */ 1552 Abops = A->bops; 1553 Aops = A->ops; 1554 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1555 A->bops = Abops; 1556 A->ops = Aops; 1557 1558 PetscHeaderDestroy(B); 1559 } 1560 PetscFunctionReturn(0); 1561 } 1562 1563 #undef __FUNC__ 1564 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1565 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1566 { 1567 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1568 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1569 int ierr,s1,s2,s3; 1570 1571 PetscFunctionBegin; 1572 if (ll) { 1573 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1574 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 1575 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1576 ierr = MatDiagonalScale(a,ll,0);CHKERRQ(ierr); 1577 ierr = MatDiagonalScale(b,ll,0);CHKERRQ(ierr); 1578 } 1579 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1580 PetscFunctionReturn(0); 1581 } 1582 1583 #undef __FUNC__ 1584 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1585 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1586 { 1587 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1588 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1589 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1590 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1591 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1592 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1593 MPI_Comm comm = A->comm; 1594 MPI_Request *send_waits,*recv_waits; 1595 MPI_Status recv_status,*send_status; 1596 IS istmp; 1597 1598 PetscFunctionBegin; 1599 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1600 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1601 1602 /* first count number of contributors to each processor */ 1603 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 1604 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1605 procs = nprocs + size; 1606 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1607 for ( i=0; i<N; i++ ) { 1608 idx = rows[i]; 1609 found = 0; 1610 for ( j=0; j<size; j++ ) { 1611 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1612 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1613 } 1614 } 1615 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1616 } 1617 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1618 1619 /* inform other processors of number of messages and max length*/ 1620 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 1621 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1622 nrecvs = work[rank]; 1623 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1624 nmax = work[rank]; 1625 ierr = PetscFree(work);CHKERRQ(ierr); 1626 1627 /* post receives: */ 1628 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1629 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1630 for ( i=0; i<nrecvs; i++ ) { 1631 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1632 } 1633 1634 /* do sends: 1635 1) starts[i] gives the starting index in svalues for stuff going to 1636 the ith processor 1637 */ 1638 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 1639 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1640 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 1641 starts[0] = 0; 1642 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1643 for ( i=0; i<N; i++ ) { 1644 svalues[starts[owner[i]]++] = rows[i]; 1645 } 1646 ISRestoreIndices(is,&rows); 1647 1648 starts[0] = 0; 1649 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1650 count = 0; 1651 for ( i=0; i<size; i++ ) { 1652 if (procs[i]) { 1653 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1654 } 1655 } 1656 ierr = PetscFree(starts);CHKERRQ(ierr); 1657 1658 base = owners[rank]*bs; 1659 1660 /* wait on receives */ 1661 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 1662 source = lens + nrecvs; 1663 count = nrecvs; slen = 0; 1664 while (count) { 1665 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1666 /* unpack receives into our local space */ 1667 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1668 source[imdex] = recv_status.MPI_SOURCE; 1669 lens[imdex] = n; 1670 slen += n; 1671 count--; 1672 } 1673 ierr = PetscFree(recv_waits); CHKERRQ(ierr); 1674 1675 /* move the data into the send scatter */ 1676 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 1677 count = 0; 1678 for ( i=0; i<nrecvs; i++ ) { 1679 values = rvalues + i*nmax; 1680 for ( j=0; j<lens[i]; j++ ) { 1681 lrows[count++] = values[j] - base; 1682 } 1683 } 1684 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1685 ierr = PetscFree(lens);CHKERRQ(ierr); 1686 ierr = PetscFree(owner);CHKERRQ(ierr); 1687 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1688 1689 /* actually zap the local rows */ 1690 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1691 PLogObjectParent(A,istmp); 1692 1693 /* 1694 Zero the required rows. If the "diagonal block" of the matrix 1695 is square and the user wishes to set the diagonal we use seperate 1696 code so that MatSetValues() is not called for each diagonal allocating 1697 new memory, thus calling lots of mallocs and slowing things down. 1698 1699 Contributed by: Mathew Knepley 1700 */ 1701 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1702 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 1703 if (diag && (l->A->M == l->A->N)) { 1704 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 1705 } else if (diag) { 1706 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1707 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1708 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1709 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1710 } 1711 for ( i = 0; i < slen; i++ ) { 1712 row = lrows[i] + rstart_bs; 1713 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1714 } 1715 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1716 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1717 } else { 1718 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1719 } 1720 1721 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1722 ierr = PetscFree(lrows);CHKERRQ(ierr); 1723 1724 /* wait on sends */ 1725 if (nsends) { 1726 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1727 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1728 ierr = PetscFree(send_status);CHKERRQ(ierr); 1729 } 1730 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1731 ierr = PetscFree(svalues);CHKERRQ(ierr); 1732 1733 PetscFunctionReturn(0); 1734 } 1735 1736 #undef __FUNC__ 1737 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1738 int MatPrintHelp_MPIBAIJ(Mat A) 1739 { 1740 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1741 MPI_Comm comm = A->comm; 1742 static int called = 0; 1743 int ierr; 1744 1745 PetscFunctionBegin; 1746 if (!a->rank) { 1747 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1748 } 1749 if (called) {PetscFunctionReturn(0);} else called = 1; 1750 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1751 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1752 PetscFunctionReturn(0); 1753 } 1754 1755 #undef __FUNC__ 1756 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1757 int MatSetUnfactored_MPIBAIJ(Mat A) 1758 { 1759 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1760 int ierr; 1761 1762 PetscFunctionBegin; 1763 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1764 PetscFunctionReturn(0); 1765 } 1766 1767 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1768 1769 #undef __FUNC__ 1770 #define __FUNC__ "MatEqual_MPIBAIJ" 1771 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag) 1772 { 1773 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data; 1774 Mat a, b, c, d; 1775 PetscTruth flg; 1776 int ierr; 1777 1778 PetscFunctionBegin; 1779 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1780 a = matA->A; b = matA->B; 1781 c = matB->A; d = matB->B; 1782 1783 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1784 if (flg == PETSC_TRUE) { 1785 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1786 } 1787 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1788 PetscFunctionReturn(0); 1789 } 1790 1791 /* -------------------------------------------------------------------*/ 1792 static struct _MatOps MatOps_Values = { 1793 MatSetValues_MPIBAIJ, 1794 MatGetRow_MPIBAIJ, 1795 MatRestoreRow_MPIBAIJ, 1796 MatMult_MPIBAIJ, 1797 MatMultAdd_MPIBAIJ, 1798 MatMultTrans_MPIBAIJ, 1799 MatMultTransAdd_MPIBAIJ, 1800 0, 1801 0, 1802 0, 1803 0, 1804 0, 1805 0, 1806 0, 1807 MatTranspose_MPIBAIJ, 1808 MatGetInfo_MPIBAIJ, 1809 MatEqual_MPIBAIJ, 1810 MatGetDiagonal_MPIBAIJ, 1811 MatDiagonalScale_MPIBAIJ, 1812 MatNorm_MPIBAIJ, 1813 MatAssemblyBegin_MPIBAIJ, 1814 MatAssemblyEnd_MPIBAIJ, 1815 0, 1816 MatSetOption_MPIBAIJ, 1817 MatZeroEntries_MPIBAIJ, 1818 MatZeroRows_MPIBAIJ, 1819 0, 1820 0, 1821 0, 1822 0, 1823 MatGetSize_MPIBAIJ, 1824 MatGetLocalSize_MPIBAIJ, 1825 MatGetOwnershipRange_MPIBAIJ, 1826 0, 1827 0, 1828 0, 1829 0, 1830 MatDuplicate_MPIBAIJ, 1831 0, 1832 0, 1833 0, 1834 0, 1835 0, 1836 MatGetSubMatrices_MPIBAIJ, 1837 MatIncreaseOverlap_MPIBAIJ, 1838 MatGetValues_MPIBAIJ, 1839 0, 1840 MatPrintHelp_MPIBAIJ, 1841 MatScale_MPIBAIJ, 1842 0, 1843 0, 1844 0, 1845 MatGetBlockSize_MPIBAIJ, 1846 0, 1847 0, 1848 0, 1849 0, 1850 0, 1851 0, 1852 MatSetUnfactored_MPIBAIJ, 1853 0, 1854 MatSetValuesBlocked_MPIBAIJ, 1855 0, 1856 0, 1857 0, 1858 MatGetMaps_Petsc}; 1859 1860 1861 EXTERN_C_BEGIN 1862 #undef __FUNC__ 1863 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1864 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1865 { 1866 PetscFunctionBegin; 1867 *a = ((Mat_MPIBAIJ *)A->data)->A; 1868 *iscopy = PETSC_FALSE; 1869 PetscFunctionReturn(0); 1870 } 1871 EXTERN_C_END 1872 1873 #undef __FUNC__ 1874 #define __FUNC__ "MatCreateMPIBAIJ" 1875 /*@C 1876 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1877 (block compressed row). For good matrix assembly performance 1878 the user should preallocate the matrix storage by setting the parameters 1879 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1880 performance can be increased by more than a factor of 50. 1881 1882 Collective on MPI_Comm 1883 1884 Input Parameters: 1885 + comm - MPI communicator 1886 . bs - size of blockk 1887 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1888 This value should be the same as the local size used in creating the 1889 y vector for the matrix-vector product y = Ax. 1890 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1891 This value should be the same as the local size used in creating the 1892 x vector for the matrix-vector product y = Ax. 1893 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1894 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1895 . d_nz - number of block nonzeros per block row in diagonal portion of local 1896 submatrix (same for all local rows) 1897 . d_nnz - array containing the number of block nonzeros in the various block rows 1898 of the in diagonal portion of the local (possibly different for each block 1899 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1900 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1901 submatrix (same for all local rows). 1902 - o_nnz - array containing the number of nonzeros in the various block rows of the 1903 off-diagonal portion of the local submatrix (possibly different for 1904 each block row) or PETSC_NULL. 1905 1906 Output Parameter: 1907 . A - the matrix 1908 1909 Options Database Keys: 1910 . -mat_no_unroll - uses code that does not unroll the loops in the 1911 block calculations (much slower) 1912 . -mat_block_size - size of the blocks to use 1913 . -mat_mpi - use the parallel matrix data structures even on one processor 1914 (defaults to using SeqBAIJ format on one processor) 1915 1916 Notes: 1917 The user MUST specify either the local or global matrix dimensions 1918 (possibly both). 1919 1920 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1921 than it must be used on all processors that share the object for that argument. 1922 1923 Storage Information: 1924 For a square global matrix we define each processor's diagonal portion 1925 to be its local rows and the corresponding columns (a square submatrix); 1926 each processor's off-diagonal portion encompasses the remainder of the 1927 local matrix (a rectangular submatrix). 1928 1929 The user can specify preallocated storage for the diagonal part of 1930 the local submatrix with either d_nz or d_nnz (not both). Set 1931 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1932 memory allocation. Likewise, specify preallocated storage for the 1933 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1934 1935 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1936 the figure below we depict these three local rows and all columns (0-11). 1937 1938 .vb 1939 0 1 2 3 4 5 6 7 8 9 10 11 1940 ------------------- 1941 row 3 | o o o d d d o o o o o o 1942 row 4 | o o o d d d o o o o o o 1943 row 5 | o o o d d d o o o o o o 1944 ------------------- 1945 .ve 1946 1947 Thus, any entries in the d locations are stored in the d (diagonal) 1948 submatrix, and any entries in the o locations are stored in the 1949 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1950 stored simply in the MATSEQBAIJ format for compressed row storage. 1951 1952 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1953 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1954 In general, for PDE problems in which most nonzeros are near the diagonal, 1955 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1956 or you will get TERRIBLE performance; see the users' manual chapter on 1957 matrices. 1958 1959 Level: intermediate 1960 1961 .keywords: matrix, block, aij, compressed row, sparse, parallel 1962 1963 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1964 @*/ 1965 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1966 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1967 { 1968 Mat B; 1969 Mat_MPIBAIJ *b; 1970 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1971 int flag1 = 0,flag2 = 0; 1972 1973 PetscFunctionBegin; 1974 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1975 1976 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1977 1978 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1979 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 1980 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1981 if (!flag1 && !flag2 && size == 1) { 1982 if (M == PETSC_DECIDE) M = m; 1983 if (N == PETSC_DECIDE) N = n; 1984 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1985 PetscFunctionReturn(0); 1986 } 1987 1988 *A = 0; 1989 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 1990 PLogObjectCreate(B); 1991 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 1992 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 1993 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1994 1995 B->ops->destroy = MatDestroy_MPIBAIJ; 1996 B->ops->view = MatView_MPIBAIJ; 1997 B->mapping = 0; 1998 B->factor = 0; 1999 B->assembled = PETSC_FALSE; 2000 2001 B->insertmode = NOT_SET_VALUES; 2002 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2003 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2004 2005 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2006 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2007 } 2008 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2009 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2010 } 2011 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2012 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2013 } 2014 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2015 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2016 2017 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2018 work[0] = m; work[1] = n; 2019 mbs = m/bs; nbs = n/bs; 2020 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2021 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2022 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2023 } 2024 if (m == PETSC_DECIDE) { 2025 Mbs = M/bs; 2026 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2027 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2028 m = mbs*bs; 2029 } 2030 if (n == PETSC_DECIDE) { 2031 Nbs = N/bs; 2032 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2033 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2034 n = nbs*bs; 2035 } 2036 if (mbs*bs != m || nbs*bs != n) { 2037 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2038 } 2039 2040 b->m = m; B->m = m; 2041 b->n = n; B->n = n; 2042 b->N = N; B->N = N; 2043 b->M = M; B->M = M; 2044 b->bs = bs; 2045 b->bs2 = bs*bs; 2046 b->mbs = mbs; 2047 b->nbs = nbs; 2048 b->Mbs = Mbs; 2049 b->Nbs = Nbs; 2050 2051 /* the information in the maps duplicates the information computed below, eventually 2052 we should remove the duplicate information that is not contained in the maps */ 2053 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2054 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2055 2056 /* build local table of row and column ownerships */ 2057 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2058 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2059 b->cowners = b->rowners + b->size + 2; 2060 b->rowners_bs = b->cowners + b->size + 2; 2061 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2062 b->rowners[0] = 0; 2063 for ( i=2; i<=b->size; i++ ) { 2064 b->rowners[i] += b->rowners[i-1]; 2065 } 2066 for ( i=0; i<=b->size; i++ ) { 2067 b->rowners_bs[i] = b->rowners[i]*bs; 2068 } 2069 b->rstart = b->rowners[b->rank]; 2070 b->rend = b->rowners[b->rank+1]; 2071 b->rstart_bs = b->rstart * bs; 2072 b->rend_bs = b->rend * bs; 2073 2074 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2075 b->cowners[0] = 0; 2076 for ( i=2; i<=b->size; i++ ) { 2077 b->cowners[i] += b->cowners[i-1]; 2078 } 2079 b->cstart = b->cowners[b->rank]; 2080 b->cend = b->cowners[b->rank+1]; 2081 b->cstart_bs = b->cstart * bs; 2082 b->cend_bs = b->cend * bs; 2083 2084 2085 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2086 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2087 PLogObjectParent(B,b->A); 2088 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2089 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2090 PLogObjectParent(B,b->B); 2091 2092 /* build cache for off array entries formed */ 2093 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2094 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2095 b->donotstash = 0; 2096 b->colmap = 0; 2097 b->garray = 0; 2098 b->roworiented = 1; 2099 2100 /* stuff used in block assembly */ 2101 b->barray = 0; 2102 2103 /* stuff used for matrix vector multiply */ 2104 b->lvec = 0; 2105 b->Mvctx = 0; 2106 2107 /* stuff for MatGetRow() */ 2108 b->rowindices = 0; 2109 b->rowvalues = 0; 2110 b->getrowactive = PETSC_FALSE; 2111 2112 /* hash table stuff */ 2113 b->ht = 0; 2114 b->hd = 0; 2115 b->ht_size = 0; 2116 b->ht_flag = 0; 2117 b->ht_fact = 0; 2118 b->ht_total_ct = 0; 2119 b->ht_insert_ct = 0; 2120 2121 *A = B; 2122 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2123 if (flg) { 2124 double fact = 1.39; 2125 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2126 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr); 2127 if (fact <= 1.0) fact = 1.39; 2128 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2129 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2130 } 2131 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 2132 "MatStoreValues_MPIBAIJ", 2133 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2134 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 2135 "MatRetrieveValues_MPIBAIJ", 2136 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2137 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2138 "MatGetDiagonalBlock_MPIBAIJ", 2139 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2140 PetscFunctionReturn(0); 2141 } 2142 2143 #undef __FUNC__ 2144 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2145 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2146 { 2147 Mat mat; 2148 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2149 int ierr, len=0, flg; 2150 2151 PetscFunctionBegin; 2152 *newmat = 0; 2153 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2154 PLogObjectCreate(mat); 2155 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2156 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2157 mat->ops->destroy = MatDestroy_MPIBAIJ; 2158 mat->ops->view = MatView_MPIBAIJ; 2159 mat->factor = matin->factor; 2160 mat->assembled = PETSC_TRUE; 2161 mat->insertmode = NOT_SET_VALUES; 2162 2163 a->m = mat->m = oldmat->m; 2164 a->n = mat->n = oldmat->n; 2165 a->M = mat->M = oldmat->M; 2166 a->N = mat->N = oldmat->N; 2167 2168 a->bs = oldmat->bs; 2169 a->bs2 = oldmat->bs2; 2170 a->mbs = oldmat->mbs; 2171 a->nbs = oldmat->nbs; 2172 a->Mbs = oldmat->Mbs; 2173 a->Nbs = oldmat->Nbs; 2174 2175 a->rstart = oldmat->rstart; 2176 a->rend = oldmat->rend; 2177 a->cstart = oldmat->cstart; 2178 a->cend = oldmat->cend; 2179 a->size = oldmat->size; 2180 a->rank = oldmat->rank; 2181 a->donotstash = oldmat->donotstash; 2182 a->roworiented = oldmat->roworiented; 2183 a->rowindices = 0; 2184 a->rowvalues = 0; 2185 a->getrowactive = PETSC_FALSE; 2186 a->barray = 0; 2187 a->rstart_bs = oldmat->rstart_bs; 2188 a->rend_bs = oldmat->rend_bs; 2189 a->cstart_bs = oldmat->cstart_bs; 2190 a->cend_bs = oldmat->cend_bs; 2191 2192 /* hash table stuff */ 2193 a->ht = 0; 2194 a->hd = 0; 2195 a->ht_size = 0; 2196 a->ht_flag = oldmat->ht_flag; 2197 a->ht_fact = oldmat->ht_fact; 2198 a->ht_total_ct = 0; 2199 a->ht_insert_ct = 0; 2200 2201 2202 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2203 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2204 a->cowners = a->rowners + a->size + 2; 2205 a->rowners_bs = a->cowners + a->size + 2; 2206 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2207 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2208 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2209 if (oldmat->colmap) { 2210 #if defined (PETSC_USE_CTABLE) 2211 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2212 #else 2213 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2214 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2215 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2216 #endif 2217 } else a->colmap = 0; 2218 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2219 a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2220 PLogObjectMemory(mat,len*sizeof(int)); 2221 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2222 } else a->garray = 0; 2223 2224 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2225 PLogObjectParent(mat,a->lvec); 2226 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2227 2228 PLogObjectParent(mat,a->Mvctx); 2229 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2230 PLogObjectParent(mat,a->A); 2231 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2232 PLogObjectParent(mat,a->B); 2233 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2234 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2235 if (flg) { 2236 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2237 } 2238 *newmat = mat; 2239 PetscFunctionReturn(0); 2240 } 2241 2242 #include "sys.h" 2243 2244 #undef __FUNC__ 2245 #define __FUNC__ "MatLoad_MPIBAIJ" 2246 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2247 { 2248 Mat A; 2249 int i, nz, ierr, j,rstart, rend, fd; 2250 Scalar *vals,*buf; 2251 MPI_Comm comm = ((PetscObject)viewer)->comm; 2252 MPI_Status status; 2253 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2254 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2255 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2256 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2257 int dcount,kmax,k,nzcount,tmp; 2258 2259 PetscFunctionBegin; 2260 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2261 2262 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2263 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2264 if (!rank) { 2265 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2266 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2267 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2268 if (header[3] < 0) { 2269 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2270 } 2271 } 2272 2273 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2274 M = header[1]; N = header[2]; 2275 2276 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2277 2278 /* 2279 This code adds extra rows to make sure the number of rows is 2280 divisible by the blocksize 2281 */ 2282 Mbs = M/bs; 2283 extra_rows = bs - M + bs*(Mbs); 2284 if (extra_rows == bs) extra_rows = 0; 2285 else Mbs++; 2286 if (extra_rows &&!rank) { 2287 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2288 } 2289 2290 /* determine ownership of all rows */ 2291 mbs = Mbs/size + ((Mbs % size) > rank); 2292 m = mbs * bs; 2293 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2294 browners = rowners + size + 1; 2295 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2296 rowners[0] = 0; 2297 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2298 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2299 rstart = rowners[rank]; 2300 rend = rowners[rank+1]; 2301 2302 /* distribute row lengths to all processors */ 2303 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens); 2304 if (!rank) { 2305 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths); 2306 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2307 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2308 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2309 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2310 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2311 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2312 } else { 2313 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2314 } 2315 2316 if (!rank) { 2317 /* calculate the number of nonzeros on each processor */ 2318 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2319 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2320 for ( i=0; i<size; i++ ) { 2321 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2322 procsnz[i] += rowlengths[j]; 2323 } 2324 } 2325 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2326 2327 /* determine max buffer needed and allocate it */ 2328 maxnz = 0; 2329 for ( i=0; i<size; i++ ) { 2330 maxnz = PetscMax(maxnz,procsnz[i]); 2331 } 2332 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2333 2334 /* read in my part of the matrix column indices */ 2335 nz = procsnz[0]; 2336 ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2337 mycols = ibuf; 2338 if (size == 1) nz -= extra_rows; 2339 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2340 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2341 2342 /* read in every ones (except the last) and ship off */ 2343 for ( i=1; i<size-1; i++ ) { 2344 nz = procsnz[i]; 2345 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2346 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2347 } 2348 /* read in the stuff for the last proc */ 2349 if ( size != 1 ) { 2350 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2351 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2352 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2353 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2354 } 2355 ierr = PetscFree(cols);CHKERRQ(ierr); 2356 } else { 2357 /* determine buffer space needed for message */ 2358 nz = 0; 2359 for ( i=0; i<m; i++ ) { 2360 nz += locrowlens[i]; 2361 } 2362 ibuf = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2363 mycols = ibuf; 2364 /* receive message of column indices*/ 2365 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2366 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2367 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2368 } 2369 2370 /* loop over local rows, determining number of off diagonal entries */ 2371 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens); 2372 odlens = dlens + (rend-rstart); 2373 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask); 2374 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2375 masked1 = mask + Mbs; 2376 masked2 = masked1 + Mbs; 2377 rowcount = 0; nzcount = 0; 2378 for ( i=0; i<mbs; i++ ) { 2379 dcount = 0; 2380 odcount = 0; 2381 for ( j=0; j<bs; j++ ) { 2382 kmax = locrowlens[rowcount]; 2383 for ( k=0; k<kmax; k++ ) { 2384 tmp = mycols[nzcount++]/bs; 2385 if (!mask[tmp]) { 2386 mask[tmp] = 1; 2387 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2388 else masked1[dcount++] = tmp; 2389 } 2390 } 2391 rowcount++; 2392 } 2393 2394 dlens[i] = dcount; 2395 odlens[i] = odcount; 2396 2397 /* zero out the mask elements we set */ 2398 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2399 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2400 } 2401 2402 /* create our matrix */ 2403 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2404 A = *newmat; 2405 MatSetOption(A,MAT_COLUMNS_SORTED); 2406 2407 if (!rank) { 2408 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf); 2409 /* read in my part of the matrix numerical values */ 2410 nz = procsnz[0]; 2411 vals = buf; 2412 mycols = ibuf; 2413 if (size == 1) nz -= extra_rows; 2414 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2415 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2416 2417 /* insert into matrix */ 2418 jj = rstart*bs; 2419 for ( i=0; i<m; i++ ) { 2420 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2421 mycols += locrowlens[i]; 2422 vals += locrowlens[i]; 2423 jj++; 2424 } 2425 /* read in other processors (except the last one) and ship out */ 2426 for ( i=1; i<size-1; i++ ) { 2427 nz = procsnz[i]; 2428 vals = buf; 2429 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2430 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2431 } 2432 /* the last proc */ 2433 if ( size != 1 ){ 2434 nz = procsnz[i] - extra_rows; 2435 vals = buf; 2436 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2437 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2438 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2439 } 2440 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2441 } else { 2442 /* receive numeric values */ 2443 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf); 2444 2445 /* receive message of values*/ 2446 vals = buf; 2447 mycols = ibuf; 2448 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2449 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2450 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2451 2452 /* insert into matrix */ 2453 jj = rstart*bs; 2454 for ( i=0; i<m; i++ ) { 2455 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2456 mycols += locrowlens[i]; 2457 vals += locrowlens[i]; 2458 jj++; 2459 } 2460 } 2461 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2462 ierr = PetscFree(buf);CHKERRQ(ierr); 2463 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2464 ierr = PetscFree(rowners);CHKERRQ(ierr); 2465 ierr = PetscFree(dlens);CHKERRQ(ierr); 2466 ierr = PetscFree(mask);CHKERRQ(ierr); 2467 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2468 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2469 PetscFunctionReturn(0); 2470 } 2471 2472 2473 2474 #undef __FUNC__ 2475 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2476 /*@ 2477 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2478 2479 Input Parameters: 2480 . mat - the matrix 2481 . fact - factor 2482 2483 Collective on Mat 2484 2485 Level: advanced 2486 2487 Notes: 2488 This can also be set by the command line option: -mat_use_hash_table fact 2489 2490 .keywords: matrix, hashtable, factor, HT 2491 2492 .seealso: MatSetOption() 2493 @*/ 2494 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2495 { 2496 Mat_MPIBAIJ *baij; 2497 2498 PetscFunctionBegin; 2499 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2500 if (mat->type != MATMPIBAIJ) { 2501 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2502 } 2503 baij = (Mat_MPIBAIJ*) mat->data; 2504 baij->ht_fact = fact; 2505 PetscFunctionReturn(0); 2506 } 2507