1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.180 1999/10/13 20:37:30 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 EXTERN_C_BEGIN 20 #undef __FUNC__ 21 #define __FUNC__ "MatStoreValues_MPIBAIJ" 22 int MatStoreValues_MPIBAIJ(Mat mat) 23 { 24 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 25 int ierr; 26 27 PetscFunctionBegin; 28 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 29 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 EXTERN_C_END 33 34 EXTERN_C_BEGIN 35 #undef __FUNC__ 36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 37 int MatRetrieveValues_MPIBAIJ(Mat mat) 38 { 39 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 40 int ierr; 41 42 PetscFunctionBegin; 43 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 44 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 45 PetscFunctionReturn(0); 46 } 47 EXTERN_C_END 48 49 /* 50 Local utility routine that creates a mapping from the global column 51 number to the local number in the off-diagonal part of the local 52 storage of the matrix. This is done in a non scable way since the 53 length of colmap equals the global matrix length. 54 */ 55 #undef __FUNC__ 56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 57 static int CreateColmap_MPIBAIJ_Private(Mat mat) 58 { 59 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 60 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 61 int nbs = B->nbs,i,bs=B->bs,ierr; 62 63 PetscFunctionBegin; 64 #if defined (PETSC_USE_CTABLE) 65 ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 66 for ( i=0; i<nbs; i++ ){ 67 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 68 } 69 #else 70 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 71 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 72 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 73 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 74 #endif 75 PetscFunctionReturn(0); 76 } 77 78 #define CHUNKSIZE 10 79 80 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 81 { \ 82 \ 83 brow = row/bs; \ 84 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 85 rmax = aimax[brow]; nrow = ailen[brow]; \ 86 bcol = col/bs; \ 87 ridx = row % bs; cidx = col % bs; \ 88 low = 0; high = nrow; \ 89 while (high-low > 3) { \ 90 t = (low+high)/2; \ 91 if (rp[t] > bcol) high = t; \ 92 else low = t; \ 93 } \ 94 for ( _i=low; _i<high; _i++ ) { \ 95 if (rp[_i] > bcol) break; \ 96 if (rp[_i] == bcol) { \ 97 bap = ap + bs2*_i + bs*cidx + ridx; \ 98 if (addv == ADD_VALUES) *bap += value; \ 99 else *bap = value; \ 100 goto a_noinsert; \ 101 } \ 102 } \ 103 if (a->nonew == 1) goto a_noinsert; \ 104 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 105 if (nrow >= rmax) { \ 106 /* there is no extra room in row, therefore enlarge */ \ 107 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 108 Scalar *new_a; \ 109 \ 110 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 111 \ 112 /* malloc new storage space */ \ 113 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 114 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 115 new_j = (int *) (new_a + bs2*new_nz); \ 116 new_i = new_j + new_nz; \ 117 \ 118 /* copy over old data into new slots */ \ 119 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 120 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 121 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 122 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 123 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 124 len*sizeof(int));CHKERRQ(ierr); \ 125 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 126 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 127 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 128 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 129 /* free up old matrix storage */ \ 130 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 131 if (!a->singlemalloc) { \ 132 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 133 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 134 } \ 135 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 136 a->singlemalloc = 1; \ 137 \ 138 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 139 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 140 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 141 a->maxnz += bs2*CHUNKSIZE; \ 142 a->reallocs++; \ 143 a->nz++; \ 144 } \ 145 N = nrow++ - 1; \ 146 /* shift up all the later entries in this row */ \ 147 for ( ii=N; ii>=_i; ii-- ) { \ 148 rp[ii+1] = rp[ii]; \ 149 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 150 } \ 151 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); } \ 152 rp[_i] = bcol; \ 153 ap[bs2*_i + bs*cidx + ridx] = value; \ 154 a_noinsert:; \ 155 ailen[brow] = nrow; \ 156 } 157 158 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 159 { \ 160 \ 161 brow = row/bs; \ 162 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 163 rmax = bimax[brow]; nrow = bilen[brow]; \ 164 bcol = col/bs; \ 165 ridx = row % bs; cidx = col % bs; \ 166 low = 0; high = nrow; \ 167 while (high-low > 3) { \ 168 t = (low+high)/2; \ 169 if (rp[t] > bcol) high = t; \ 170 else low = t; \ 171 } \ 172 for ( _i=low; _i<high; _i++ ) { \ 173 if (rp[_i] > bcol) break; \ 174 if (rp[_i] == bcol) { \ 175 bap = ap + bs2*_i + bs*cidx + ridx; \ 176 if (addv == ADD_VALUES) *bap += value; \ 177 else *bap = value; \ 178 goto b_noinsert; \ 179 } \ 180 } \ 181 if (b->nonew == 1) goto b_noinsert; \ 182 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 183 if (nrow >= rmax) { \ 184 /* there is no extra room in row, therefore enlarge */ \ 185 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 186 Scalar *new_a; \ 187 \ 188 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 189 \ 190 /* malloc new storage space */ \ 191 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 192 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 193 new_j = (int *) (new_a + bs2*new_nz); \ 194 new_i = new_j + new_nz; \ 195 \ 196 /* copy over old data into new slots */ \ 197 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 198 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 199 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 200 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 201 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 202 len*sizeof(int));CHKERRQ(ierr); \ 203 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 204 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 205 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 206 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 207 /* free up old matrix storage */ \ 208 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 209 if (!b->singlemalloc) { \ 210 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 211 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 212 } \ 213 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 214 b->singlemalloc = 1; \ 215 \ 216 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 217 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 218 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 219 b->maxnz += bs2*CHUNKSIZE; \ 220 b->reallocs++; \ 221 b->nz++; \ 222 } \ 223 N = nrow++ - 1; \ 224 /* shift up all the later entries in this row */ \ 225 for ( ii=N; ii>=_i; ii-- ) { \ 226 rp[ii+1] = rp[ii]; \ 227 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 228 } \ 229 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);} \ 230 rp[_i] = bcol; \ 231 ap[bs2*_i + bs*cidx + ridx] = value; \ 232 b_noinsert:; \ 233 bilen[brow] = nrow; \ 234 } 235 236 #undef __FUNC__ 237 #define __FUNC__ "MatSetValues_MPIBAIJ" 238 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 239 { 240 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 241 Scalar value; 242 int ierr,i,j,row,col; 243 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 244 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 245 int cend_orig=baij->cend_bs,bs=baij->bs; 246 247 /* Some Variables required in the macro */ 248 Mat A = baij->A; 249 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 250 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 251 Scalar *aa=a->a; 252 253 Mat B = baij->B; 254 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 255 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 256 Scalar *ba=b->a; 257 258 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 259 int low,high,t,ridx,cidx,bs2=a->bs2; 260 Scalar *ap,*bap; 261 262 PetscFunctionBegin; 263 for ( i=0; i<m; i++ ) { 264 if (im[i] < 0) continue; 265 #if defined(PETSC_USE_BOPT_g) 266 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 267 #endif 268 if (im[i] >= rstart_orig && im[i] < rend_orig) { 269 row = im[i] - rstart_orig; 270 for ( j=0; j<n; j++ ) { 271 if (in[j] >= cstart_orig && in[j] < cend_orig){ 272 col = in[j] - cstart_orig; 273 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 274 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 275 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 276 } else if (in[j] < 0) continue; 277 #if defined(PETSC_USE_BOPT_g) 278 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 279 #endif 280 else { 281 if (mat->was_assembled) { 282 if (!baij->colmap) { 283 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 284 } 285 #if defined (PETSC_USE_CTABLE) 286 ierr = PetscTableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr); 287 col = col - 1 + in[j]%bs; 288 #else 289 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 290 #endif 291 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 292 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 293 col = in[j]; 294 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 295 B = baij->B; 296 b = (Mat_SeqBAIJ *) (B)->data; 297 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 298 ba=b->a; 299 } 300 } else col = in[j]; 301 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 302 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 303 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 304 } 305 } 306 } else { 307 if ( !baij->donotstash) { 308 if (roworiented) { 309 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 310 } else { 311 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 312 } 313 } 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNC__ 320 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 321 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 322 { 323 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 324 Scalar *value,*barray=baij->barray; 325 int ierr,i,j,ii,jj,row,col; 326 int roworiented = baij->roworiented,rstart=baij->rstart ; 327 int rend=baij->rend,cstart=baij->cstart,stepval; 328 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 329 330 PetscFunctionBegin; 331 if(!barray) { 332 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray); 333 } 334 335 if (roworiented) { 336 stepval = (n-1)*bs; 337 } else { 338 stepval = (m-1)*bs; 339 } 340 for ( i=0; i<m; i++ ) { 341 if (im[i] < 0) continue; 342 #if defined(PETSC_USE_BOPT_g) 343 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 344 #endif 345 if (im[i] >= rstart && im[i] < rend) { 346 row = im[i] - rstart; 347 for ( j=0; j<n; j++ ) { 348 /* If NumCol = 1 then a copy is not required */ 349 if ((roworiented) && (n == 1)) { 350 barray = v + i*bs2; 351 } else if((!roworiented) && (m == 1)) { 352 barray = v + j*bs2; 353 } else { /* Here a copy is required */ 354 if (roworiented) { 355 value = v + i*(stepval+bs)*bs + j*bs; 356 } else { 357 value = v + j*(stepval+bs)*bs + i*bs; 358 } 359 for ( ii=0; ii<bs; ii++,value+=stepval ) { 360 for (jj=0; jj<bs; jj++ ) { 361 *barray++ = *value++; 362 } 363 } 364 barray -=bs2; 365 } 366 367 if (in[j] >= cstart && in[j] < cend){ 368 col = in[j] - cstart; 369 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 370 } 371 else if (in[j] < 0) continue; 372 #if defined(PETSC_USE_BOPT_g) 373 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 374 #endif 375 else { 376 if (mat->was_assembled) { 377 if (!baij->colmap) { 378 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 379 } 380 381 #if defined(PETSC_USE_BOPT_g) 382 #if defined (PETSC_USE_CTABLE) 383 { int data; 384 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 385 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 386 } 387 #else 388 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 389 #endif 390 #endif 391 #if defined (PETSC_USE_CTABLE) 392 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 393 col = (col - 1)/bs; 394 #else 395 col = (baij->colmap[in[j]] - 1)/bs; 396 #endif 397 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 398 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 399 col = in[j]; 400 } 401 } 402 else col = in[j]; 403 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 404 } 405 } 406 } else { 407 if (!baij->donotstash) { 408 if (roworiented) { 409 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 410 } else { 411 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 412 } 413 } 414 } 415 } 416 PetscFunctionReturn(0); 417 } 418 #define HASH_KEY 0.6180339887 419 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 420 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 421 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 422 #undef __FUNC__ 423 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 424 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 425 { 426 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 427 int ierr,i,j,row,col; 428 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 429 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 430 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 431 double tmp; 432 Scalar ** HD = baij->hd,value; 433 #if defined(PETSC_USE_BOPT_g) 434 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 435 #endif 436 437 PetscFunctionBegin; 438 439 for ( i=0; i<m; i++ ) { 440 #if defined(PETSC_USE_BOPT_g) 441 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 442 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 443 #endif 444 row = im[i]; 445 if (row >= rstart_orig && row < rend_orig) { 446 for ( j=0; j<n; j++ ) { 447 col = in[j]; 448 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 449 /* Look up into the Hash Table */ 450 key = (row/bs)*Nbs+(col/bs)+1; 451 h1 = HASH(size,key,tmp); 452 453 454 idx = h1; 455 #if defined(PETSC_USE_BOPT_g) 456 insert_ct++; 457 total_ct++; 458 if (HT[idx] != key) { 459 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 460 if (idx == size) { 461 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 462 if (idx == h1) { 463 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 464 } 465 } 466 } 467 #else 468 if (HT[idx] != key) { 469 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 470 if (idx == size) { 471 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 472 if (idx == h1) { 473 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 474 } 475 } 476 } 477 #endif 478 /* A HASH table entry is found, so insert the values at the correct address */ 479 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 480 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 481 } 482 } else { 483 if (!baij->donotstash) { 484 if (roworiented) { 485 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 486 } else { 487 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 488 } 489 } 490 } 491 } 492 #if defined(PETSC_USE_BOPT_g) 493 baij->ht_total_ct = total_ct; 494 baij->ht_insert_ct = insert_ct; 495 #endif 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNC__ 500 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 501 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 502 { 503 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 504 int ierr,i,j,ii,jj,row,col; 505 int roworiented = baij->roworiented,rstart=baij->rstart ; 506 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 507 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 508 double tmp; 509 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 510 #if defined(PETSC_USE_BOPT_g) 511 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 512 #endif 513 514 PetscFunctionBegin; 515 516 if (roworiented) { 517 stepval = (n-1)*bs; 518 } else { 519 stepval = (m-1)*bs; 520 } 521 for ( i=0; i<m; i++ ) { 522 #if defined(PETSC_USE_BOPT_g) 523 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 524 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 525 #endif 526 row = im[i]; 527 v_t = v + i*bs2; 528 if (row >= rstart && row < rend) { 529 for ( j=0; j<n; j++ ) { 530 col = in[j]; 531 532 /* Look up into the Hash Table */ 533 key = row*Nbs+col+1; 534 h1 = HASH(size,key,tmp); 535 536 idx = h1; 537 #if defined(PETSC_USE_BOPT_g) 538 total_ct++; 539 insert_ct++; 540 if (HT[idx] != key) { 541 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 542 if (idx == size) { 543 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 544 if (idx == h1) { 545 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 546 } 547 } 548 } 549 #else 550 if (HT[idx] != key) { 551 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 552 if (idx == size) { 553 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 554 if (idx == h1) { 555 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 556 } 557 } 558 } 559 #endif 560 baij_a = HD[idx]; 561 if (roworiented) { 562 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 563 /* value = v + (i*(stepval+bs)+j)*bs; */ 564 value = v_t; 565 v_t += bs; 566 if (addv == ADD_VALUES) { 567 for ( ii=0; ii<bs; ii++,value+=stepval) { 568 for ( jj=ii; jj<bs2; jj+=bs ) { 569 baij_a[jj] += *value++; 570 } 571 } 572 } else { 573 for ( ii=0; ii<bs; ii++,value+=stepval) { 574 for ( jj=ii; jj<bs2; jj+=bs ) { 575 baij_a[jj] = *value++; 576 } 577 } 578 } 579 } else { 580 value = v + j*(stepval+bs)*bs + i*bs; 581 if (addv == ADD_VALUES) { 582 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 583 for ( jj=0; jj<bs; jj++ ) { 584 baij_a[jj] += *value++; 585 } 586 } 587 } else { 588 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 589 for ( jj=0; jj<bs; jj++ ) { 590 baij_a[jj] = *value++; 591 } 592 } 593 } 594 } 595 } 596 } else { 597 if (!baij->donotstash) { 598 if (roworiented) { 599 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 600 } else { 601 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 602 } 603 } 604 } 605 } 606 #if defined(PETSC_USE_BOPT_g) 607 baij->ht_total_ct = total_ct; 608 baij->ht_insert_ct = insert_ct; 609 #endif 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNC__ 614 #define __FUNC__ "MatGetValues_MPIBAIJ" 615 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 616 { 617 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 618 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 619 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 620 621 PetscFunctionBegin; 622 for ( i=0; i<m; i++ ) { 623 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 624 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 625 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 626 row = idxm[i] - bsrstart; 627 for ( j=0; j<n; j++ ) { 628 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 629 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 630 if (idxn[j] >= bscstart && idxn[j] < bscend){ 631 col = idxn[j] - bscstart; 632 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } else { 634 if (!baij->colmap) { 635 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 636 } 637 #if defined (PETSC_USE_CTABLE) 638 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 639 data --; 640 #else 641 data = baij->colmap[idxn[j]/bs]-1; 642 #endif 643 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 644 else { 645 col = data + idxn[j]%bs; 646 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } 648 } 649 } 650 } else { 651 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 652 } 653 } 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNC__ 658 #define __FUNC__ "MatNorm_MPIBAIJ" 659 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 660 { 661 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 662 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 663 int ierr, i,bs2=baij->bs2; 664 double sum = 0.0; 665 Scalar *v; 666 667 PetscFunctionBegin; 668 if (baij->size == 1) { 669 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 670 } else { 671 if (type == NORM_FROBENIUS) { 672 v = amat->a; 673 for (i=0; i<amat->nz*bs2; i++ ) { 674 #if defined(PETSC_USE_COMPLEX) 675 sum += PetscReal(PetscConj(*v)*(*v)); v++; 676 #else 677 sum += (*v)*(*v); v++; 678 #endif 679 } 680 v = bmat->a; 681 for (i=0; i<bmat->nz*bs2; i++ ) { 682 #if defined(PETSC_USE_COMPLEX) 683 sum += PetscReal(PetscConj(*v)*(*v)); v++; 684 #else 685 sum += (*v)*(*v); v++; 686 #endif 687 } 688 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 689 *norm = sqrt(*norm); 690 } else { 691 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 692 } 693 } 694 PetscFunctionReturn(0); 695 } 696 697 698 /* 699 Creates the hash table, and sets the table 700 This table is created only once. 701 If new entried need to be added to the matrix 702 then the hash table has to be destroyed and 703 recreated. 704 */ 705 #undef __FUNC__ 706 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 707 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 708 { 709 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 710 Mat A = baij->A, B=baij->B; 711 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 712 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 713 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 714 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 715 int *HT,key; 716 Scalar **HD; 717 double tmp; 718 #if defined(PETSC_USE_BOPT_g) 719 int ct=0,max=0; 720 #endif 721 722 PetscFunctionBegin; 723 baij->ht_size=(int)(factor*nz); 724 size = baij->ht_size; 725 726 if (baij->ht) { 727 PetscFunctionReturn(0); 728 } 729 730 /* Allocate Memory for Hash Table */ 731 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd); 732 baij->ht = (int*)(baij->hd + size); 733 HD = baij->hd; 734 HT = baij->ht; 735 736 737 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 738 739 740 /* Loop Over A */ 741 for ( i=0; i<a->mbs; i++ ) { 742 for ( j=ai[i]; j<ai[i+1]; j++ ) { 743 row = i+rstart; 744 col = aj[j]+cstart; 745 746 key = row*Nbs + col + 1; 747 h1 = HASH(size,key,tmp); 748 for ( k=0; k<size; k++ ){ 749 if (HT[(h1+k)%size] == 0.0) { 750 HT[(h1+k)%size] = key; 751 HD[(h1+k)%size] = a->a + j*bs2; 752 break; 753 #if defined(PETSC_USE_BOPT_g) 754 } else { 755 ct++; 756 #endif 757 } 758 } 759 #if defined(PETSC_USE_BOPT_g) 760 if (k> max) max = k; 761 #endif 762 } 763 } 764 /* Loop Over B */ 765 for ( i=0; i<b->mbs; i++ ) { 766 for ( j=bi[i]; j<bi[i+1]; j++ ) { 767 row = i+rstart; 768 col = garray[bj[j]]; 769 key = row*Nbs + col + 1; 770 h1 = HASH(size,key,tmp); 771 for ( k=0; k<size; k++ ){ 772 if (HT[(h1+k)%size] == 0.0) { 773 HT[(h1+k)%size] = key; 774 HD[(h1+k)%size] = b->a + j*bs2; 775 break; 776 #if defined(PETSC_USE_BOPT_g) 777 } else { 778 ct++; 779 #endif 780 } 781 } 782 #if defined(PETSC_USE_BOPT_g) 783 if (k> max) max = k; 784 #endif 785 } 786 } 787 788 /* Print Summary */ 789 #if defined(PETSC_USE_BOPT_g) 790 for ( i=0,j=0; i<size; i++) 791 if (HT[i]) {j++;} 792 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 793 (j== 0)? 0.0:((double)(ct+j))/j,max); 794 #endif 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNC__ 799 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 800 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 801 { 802 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 803 int ierr,nstash,reallocs; 804 InsertMode addv; 805 806 PetscFunctionBegin; 807 if (baij->donotstash) { 808 PetscFunctionReturn(0); 809 } 810 811 /* make sure all processors are either in INSERTMODE or ADDMODE */ 812 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 813 if (addv == (ADD_VALUES|INSERT_VALUES)) { 814 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 815 } 816 mat->insertmode = addv; /* in case this processor had no cache */ 817 818 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 819 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 820 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 821 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 822 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 823 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNC__ 828 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 829 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 830 { 831 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 832 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 833 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 834 int *row,*col,other_disassembled,r1,r2,r3; 835 Scalar *val; 836 InsertMode addv = mat->insertmode; 837 838 PetscFunctionBegin; 839 if (!baij->donotstash) { 840 while (1) { 841 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 842 if (!flg) break; 843 844 for ( i=0; i<n; ) { 845 /* Now identify the consecutive vals belonging to the same row */ 846 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 847 if (j < n) ncols = j-i; 848 else ncols = n-i; 849 /* Now assemble all these values with a single function call */ 850 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 851 i = j; 852 } 853 } 854 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 855 /* Now process the block-stash. Since the values are stashed column-oriented, 856 set the roworiented flag to column oriented, and after MatSetValues() 857 restore the original flags */ 858 r1 = baij->roworiented; 859 r2 = a->roworiented; 860 r3 = b->roworiented; 861 baij->roworiented = 0; 862 a->roworiented = 0; 863 b->roworiented = 0; 864 while (1) { 865 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 866 if (!flg) break; 867 868 for ( i=0; i<n; ) { 869 /* Now identify the consecutive vals belonging to the same row */ 870 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 871 if (j < n) ncols = j-i; 872 else ncols = n-i; 873 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 874 i = j; 875 } 876 } 877 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 878 baij->roworiented = r1; 879 a->roworiented = r2; 880 b->roworiented = r3; 881 } 882 883 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 884 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 885 886 /* determine if any processor has disassembled, if so we must 887 also disassemble ourselfs, in order that we may reassemble. */ 888 /* 889 if nonzero structure of submatrix B cannot change then we know that 890 no processor disassembled thus we can skip this stuff 891 */ 892 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 893 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 894 if (mat->was_assembled && !other_disassembled) { 895 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 896 } 897 } 898 899 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 900 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 901 } 902 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 903 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 904 905 #if defined(PETSC_USE_BOPT_g) 906 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 907 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 908 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 909 baij->ht_total_ct = 0; 910 baij->ht_insert_ct = 0; 911 } 912 #endif 913 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 914 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 915 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 916 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 917 } 918 919 if (baij->rowvalues) { 920 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 921 baij->rowvalues = 0; 922 } 923 PetscFunctionReturn(0); 924 } 925 926 /* 927 #undef __FUNC__ 928 #define __FUNC__ "MatView_MPIBAIJ_Binary" 929 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 930 { 931 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 932 int ierr; 933 934 PetscFunctionBegin; 935 if (baij->size == 1) { 936 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 937 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 938 PetscFunctionReturn(0); 939 } 940 */ 941 942 #undef __FUNC__ 943 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 944 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 945 { 946 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 947 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 948 PetscTruth isascii,isdraw; 949 950 PetscFunctionBegin; 951 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 952 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 953 if (isascii) { 954 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 955 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 956 MatInfo info; 957 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 958 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 959 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 960 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 961 baij->bs,(int)info.memory);CHKERRQ(ierr); 962 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 963 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 964 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 965 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 966 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 967 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 970 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } 973 } 974 975 if (isdraw) { 976 Draw draw; 977 PetscTruth isnull; 978 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 979 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 980 } 981 982 if (size == 1) { 983 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 984 } else { 985 /* assemble the entire matrix onto first processor. */ 986 Mat A; 987 Mat_SeqBAIJ *Aloc; 988 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 989 int mbs = baij->mbs; 990 Scalar *a; 991 992 if (!rank) { 993 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 994 } else { 995 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 996 } 997 PLogObjectParent(mat,A); 998 999 /* copy over the A part */ 1000 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1001 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1002 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1003 1004 for ( i=0; i<mbs; i++ ) { 1005 rvals[0] = bs*(baij->rstart + i); 1006 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1007 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1008 col = (baij->cstart+aj[j])*bs; 1009 for (k=0; k<bs; k++ ) { 1010 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1011 col++; a += bs; 1012 } 1013 } 1014 } 1015 /* copy over the B part */ 1016 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1017 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1018 for ( i=0; i<mbs; i++ ) { 1019 rvals[0] = bs*(baij->rstart + i); 1020 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1021 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1022 col = baij->garray[aj[j]]*bs; 1023 for (k=0; k<bs; k++ ) { 1024 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1025 col++; a += bs; 1026 } 1027 } 1028 } 1029 ierr = PetscFree(rvals);CHKERRQ(ierr); 1030 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1031 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1032 /* 1033 Everyone has to call to draw the matrix since the graphics waits are 1034 synchronized across all processors that share the Draw object 1035 */ 1036 if (!rank) { 1037 Viewer sviewer; 1038 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1039 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1040 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1041 } 1042 ierr = MatDestroy(A);CHKERRQ(ierr); 1043 } 1044 PetscFunctionReturn(0); 1045 } 1046 1047 1048 1049 #undef __FUNC__ 1050 #define __FUNC__ "MatView_MPIBAIJ" 1051 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1052 { 1053 int ierr; 1054 PetscTruth isascii,isdraw,issocket,isbinary; 1055 1056 PetscFunctionBegin; 1057 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1058 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1059 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1060 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1061 if (isascii || isdraw || issocket || isbinary) { 1062 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1063 } else { 1064 SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNC__ 1070 #define __FUNC__ "MatDestroy_MPIBAIJ" 1071 int MatDestroy_MPIBAIJ(Mat mat) 1072 { 1073 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1074 int ierr; 1075 1076 PetscFunctionBegin; 1077 1078 if (mat->mapping) { 1079 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1080 } 1081 if (mat->bmapping) { 1082 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1083 } 1084 if (mat->rmap) { 1085 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1086 } 1087 if (mat->cmap) { 1088 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1089 } 1090 #if defined(PETSC_USE_LOG) 1091 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1092 #endif 1093 1094 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1095 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1096 1097 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1098 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1099 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1100 #if defined (PETSC_USE_CTABLE) 1101 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1102 #else 1103 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1104 #endif 1105 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1106 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1107 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1108 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1109 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1110 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1111 ierr = PetscFree(baij);CHKERRQ(ierr); 1112 PLogObjectDestroy(mat); 1113 PetscHeaderDestroy(mat); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNC__ 1118 #define __FUNC__ "MatMult_MPIBAIJ" 1119 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1120 { 1121 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1122 int ierr, nt; 1123 1124 PetscFunctionBegin; 1125 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1126 if (nt != a->n) { 1127 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1128 } 1129 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1130 if (nt != a->m) { 1131 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1132 } 1133 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1134 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1135 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1136 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1137 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNC__ 1142 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1143 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1144 { 1145 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1146 int ierr; 1147 1148 PetscFunctionBegin; 1149 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1150 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1151 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1152 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNC__ 1157 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1158 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1159 { 1160 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1161 int ierr; 1162 1163 PetscFunctionBegin; 1164 /* do nondiagonal part */ 1165 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1166 /* send it on its way */ 1167 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1168 /* do local part */ 1169 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 1170 /* receive remote parts: note this assumes the values are not actually */ 1171 /* inserted in yy until the next line, which is true for my implementation*/ 1172 /* but is not perhaps always true. */ 1173 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNC__ 1178 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1179 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1180 { 1181 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1182 int ierr; 1183 1184 PetscFunctionBegin; 1185 /* do nondiagonal part */ 1186 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1187 /* send it on its way */ 1188 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1189 /* do local part */ 1190 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1191 /* receive remote parts: note this assumes the values are not actually */ 1192 /* inserted in yy until the next line, which is true for my implementation*/ 1193 /* but is not perhaps always true. */ 1194 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 /* 1199 This only works correctly for square matrices where the subblock A->A is the 1200 diagonal block 1201 */ 1202 #undef __FUNC__ 1203 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1204 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1205 { 1206 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1207 int ierr; 1208 1209 PetscFunctionBegin; 1210 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1211 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214 1215 #undef __FUNC__ 1216 #define __FUNC__ "MatScale_MPIBAIJ" 1217 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1218 { 1219 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1220 int ierr; 1221 1222 PetscFunctionBegin; 1223 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1224 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNC__ 1229 #define __FUNC__ "MatGetSize_MPIBAIJ" 1230 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1231 { 1232 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1233 1234 PetscFunctionBegin; 1235 if (m) *m = mat->M; 1236 if (n) *n = mat->N; 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNC__ 1241 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1242 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1243 { 1244 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1245 1246 PetscFunctionBegin; 1247 *m = mat->m; *n = mat->n; 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNC__ 1252 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1253 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1254 { 1255 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1256 1257 PetscFunctionBegin; 1258 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNC__ 1263 #define __FUNC__ "MatGetRow_MPIBAIJ" 1264 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1265 { 1266 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1267 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1268 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1269 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1270 int *cmap, *idx_p,cstart = mat->cstart; 1271 1272 PetscFunctionBegin; 1273 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1274 mat->getrowactive = PETSC_TRUE; 1275 1276 if (!mat->rowvalues && (idx || v)) { 1277 /* 1278 allocate enough space to hold information from the longest row. 1279 */ 1280 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1281 int max = 1,mbs = mat->mbs,tmp; 1282 for ( i=0; i<mbs; i++ ) { 1283 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1284 if (max < tmp) { max = tmp; } 1285 } 1286 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1287 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1288 } 1289 1290 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1291 lrow = row - brstart; 1292 1293 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1294 if (!v) {pvA = 0; pvB = 0;} 1295 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1296 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1297 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1298 nztot = nzA + nzB; 1299 1300 cmap = mat->garray; 1301 if (v || idx) { 1302 if (nztot) { 1303 /* Sort by increasing column numbers, assuming A and B already sorted */ 1304 int imark = -1; 1305 if (v) { 1306 *v = v_p = mat->rowvalues; 1307 for ( i=0; i<nzB; i++ ) { 1308 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1309 else break; 1310 } 1311 imark = i; 1312 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1313 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1314 } 1315 if (idx) { 1316 *idx = idx_p = mat->rowindices; 1317 if (imark > -1) { 1318 for ( i=0; i<imark; i++ ) { 1319 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1320 } 1321 } else { 1322 for ( i=0; i<nzB; i++ ) { 1323 if (cmap[cworkB[i]/bs] < cstart) 1324 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1325 else break; 1326 } 1327 imark = i; 1328 } 1329 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1330 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1331 } 1332 } else { 1333 if (idx) *idx = 0; 1334 if (v) *v = 0; 1335 } 1336 } 1337 *nz = nztot; 1338 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1339 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNC__ 1344 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1345 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1346 { 1347 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1348 1349 PetscFunctionBegin; 1350 if (baij->getrowactive == PETSC_FALSE) { 1351 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1352 } 1353 baij->getrowactive = PETSC_FALSE; 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNC__ 1358 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1359 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1360 { 1361 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1362 1363 PetscFunctionBegin; 1364 *bs = baij->bs; 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNC__ 1369 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1370 int MatZeroEntries_MPIBAIJ(Mat A) 1371 { 1372 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1373 int ierr; 1374 1375 PetscFunctionBegin; 1376 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1377 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #undef __FUNC__ 1382 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1383 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1384 { 1385 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1386 Mat A = a->A, B = a->B; 1387 int ierr; 1388 double isend[5], irecv[5]; 1389 1390 PetscFunctionBegin; 1391 info->block_size = (double)a->bs; 1392 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1393 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1394 isend[3] = info->memory; isend[4] = info->mallocs; 1395 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1396 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1397 isend[3] += info->memory; isend[4] += info->mallocs; 1398 if (flag == MAT_LOCAL) { 1399 info->nz_used = isend[0]; 1400 info->nz_allocated = isend[1]; 1401 info->nz_unneeded = isend[2]; 1402 info->memory = isend[3]; 1403 info->mallocs = isend[4]; 1404 } else if (flag == MAT_GLOBAL_MAX) { 1405 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1406 info->nz_used = irecv[0]; 1407 info->nz_allocated = irecv[1]; 1408 info->nz_unneeded = irecv[2]; 1409 info->memory = irecv[3]; 1410 info->mallocs = irecv[4]; 1411 } else if (flag == MAT_GLOBAL_SUM) { 1412 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1413 info->nz_used = irecv[0]; 1414 info->nz_allocated = irecv[1]; 1415 info->nz_unneeded = irecv[2]; 1416 info->memory = irecv[3]; 1417 info->mallocs = irecv[4]; 1418 } else { 1419 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1420 } 1421 info->rows_global = (double)a->M; 1422 info->columns_global = (double)a->N; 1423 info->rows_local = (double)a->m; 1424 info->columns_local = (double)a->N; 1425 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1426 info->fill_ratio_needed = 0; 1427 info->factor_mallocs = 0; 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNC__ 1432 #define __FUNC__ "MatSetOption_MPIBAIJ" 1433 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1434 { 1435 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1436 int ierr; 1437 1438 PetscFunctionBegin; 1439 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1440 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1441 op == MAT_COLUMNS_UNSORTED || 1442 op == MAT_COLUMNS_SORTED || 1443 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1444 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1445 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1446 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1447 } else if (op == MAT_ROW_ORIENTED) { 1448 a->roworiented = 1; 1449 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1450 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1451 } else if (op == MAT_ROWS_SORTED || 1452 op == MAT_ROWS_UNSORTED || 1453 op == MAT_SYMMETRIC || 1454 op == MAT_STRUCTURALLY_SYMMETRIC || 1455 op == MAT_YES_NEW_DIAGONALS || 1456 op == MAT_USE_HASH_TABLE) { 1457 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1458 } else if (op == MAT_COLUMN_ORIENTED) { 1459 a->roworiented = 0; 1460 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1461 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1462 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1463 a->donotstash = 1; 1464 } else if (op == MAT_NO_NEW_DIAGONALS) { 1465 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1466 } else if (op == MAT_USE_HASH_TABLE) { 1467 a->ht_flag = 1; 1468 } else { 1469 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1470 } 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNC__ 1475 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1476 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1477 { 1478 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1479 Mat_SeqBAIJ *Aloc; 1480 Mat B; 1481 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1482 int bs=baij->bs,mbs=baij->mbs; 1483 Scalar *a; 1484 1485 PetscFunctionBegin; 1486 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1487 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1488 CHKERRQ(ierr); 1489 1490 /* copy over the A part */ 1491 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1492 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1493 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1494 1495 for ( i=0; i<mbs; i++ ) { 1496 rvals[0] = bs*(baij->rstart + i); 1497 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1498 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1499 col = (baij->cstart+aj[j])*bs; 1500 for (k=0; k<bs; k++ ) { 1501 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1502 col++; a += bs; 1503 } 1504 } 1505 } 1506 /* copy over the B part */ 1507 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1508 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1509 for ( i=0; i<mbs; i++ ) { 1510 rvals[0] = bs*(baij->rstart + i); 1511 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1512 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1513 col = baij->garray[aj[j]]*bs; 1514 for (k=0; k<bs; k++ ) { 1515 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1516 col++; a += bs; 1517 } 1518 } 1519 } 1520 ierr = PetscFree(rvals);CHKERRQ(ierr); 1521 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1522 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1523 1524 if (matout != PETSC_NULL) { 1525 *matout = B; 1526 } else { 1527 PetscOps *Abops; 1528 MatOps Aops; 1529 1530 /* This isn't really an in-place transpose .... but free data structures from baij */ 1531 ierr = PetscFree(baij->rowners); CHKERRQ(ierr); 1532 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1533 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1534 #if defined (PETSC_USE_CTABLE) 1535 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1536 #else 1537 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1538 #endif 1539 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1540 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1541 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1542 ierr = PetscFree(baij);CHKERRQ(ierr); 1543 1544 /* 1545 This is horrible, horrible code. We need to keep the 1546 A pointers for the bops and ops but copy everything 1547 else from C. 1548 */ 1549 Abops = A->bops; 1550 Aops = A->ops; 1551 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1552 A->bops = Abops; 1553 A->ops = Aops; 1554 1555 PetscHeaderDestroy(B); 1556 } 1557 PetscFunctionReturn(0); 1558 } 1559 1560 #undef __FUNC__ 1561 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1562 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1563 { 1564 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1565 Mat a = baij->A, b = baij->B; 1566 int ierr,s1,s2,s3; 1567 1568 PetscFunctionBegin; 1569 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1570 if (rr) { 1571 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1572 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1573 /* Overlap communication with computation. */ 1574 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1575 } 1576 if (ll) { 1577 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1578 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1579 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1580 } 1581 /* scale the diagonal block */ 1582 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1583 1584 if (rr) { 1585 /* Do a scatter end and then right scale the off-diagonal block */ 1586 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1587 ierr = (*b->ops->diagonalscale)(b,0,baij->lvec);CHKERRQ(ierr); 1588 } 1589 1590 PetscFunctionReturn(0); 1591 } 1592 1593 #undef __FUNC__ 1594 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1595 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1596 { 1597 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1598 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1599 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1600 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1601 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1602 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1603 MPI_Comm comm = A->comm; 1604 MPI_Request *send_waits,*recv_waits; 1605 MPI_Status recv_status,*send_status; 1606 IS istmp; 1607 1608 PetscFunctionBegin; 1609 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1610 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1611 1612 /* first count number of contributors to each processor */ 1613 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 1614 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1615 procs = nprocs + size; 1616 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1617 for ( i=0; i<N; i++ ) { 1618 idx = rows[i]; 1619 found = 0; 1620 for ( j=0; j<size; j++ ) { 1621 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1622 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1623 } 1624 } 1625 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1626 } 1627 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1628 1629 /* inform other processors of number of messages and max length*/ 1630 work = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work); 1631 ierr = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1632 nmax = work[rank]; 1633 nrecvs = work[size+rank]; 1634 ierr = PetscFree(work);CHKERRQ(ierr); 1635 1636 /* post receives: */ 1637 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1638 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1639 for ( i=0; i<nrecvs; i++ ) { 1640 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1641 } 1642 1643 /* do sends: 1644 1) starts[i] gives the starting index in svalues for stuff going to 1645 the ith processor 1646 */ 1647 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 1648 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1649 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 1650 starts[0] = 0; 1651 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1652 for ( i=0; i<N; i++ ) { 1653 svalues[starts[owner[i]]++] = rows[i]; 1654 } 1655 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1656 1657 starts[0] = 0; 1658 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1659 count = 0; 1660 for ( i=0; i<size; i++ ) { 1661 if (procs[i]) { 1662 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1663 } 1664 } 1665 ierr = PetscFree(starts);CHKERRQ(ierr); 1666 1667 base = owners[rank]*bs; 1668 1669 /* wait on receives */ 1670 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 1671 source = lens + nrecvs; 1672 count = nrecvs; slen = 0; 1673 while (count) { 1674 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1675 /* unpack receives into our local space */ 1676 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1677 source[imdex] = recv_status.MPI_SOURCE; 1678 lens[imdex] = n; 1679 slen += n; 1680 count--; 1681 } 1682 ierr = PetscFree(recv_waits); CHKERRQ(ierr); 1683 1684 /* move the data into the send scatter */ 1685 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 1686 count = 0; 1687 for ( i=0; i<nrecvs; i++ ) { 1688 values = rvalues + i*nmax; 1689 for ( j=0; j<lens[i]; j++ ) { 1690 lrows[count++] = values[j] - base; 1691 } 1692 } 1693 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1694 ierr = PetscFree(lens);CHKERRQ(ierr); 1695 ierr = PetscFree(owner);CHKERRQ(ierr); 1696 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1697 1698 /* actually zap the local rows */ 1699 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1700 PLogObjectParent(A,istmp); 1701 1702 /* 1703 Zero the required rows. If the "diagonal block" of the matrix 1704 is square and the user wishes to set the diagonal we use seperate 1705 code so that MatSetValues() is not called for each diagonal allocating 1706 new memory, thus calling lots of mallocs and slowing things down. 1707 1708 Contributed by: Mathew Knepley 1709 */ 1710 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1711 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 1712 if (diag && (l->A->M == l->A->N)) { 1713 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 1714 } else if (diag) { 1715 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1716 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1717 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1718 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1719 } 1720 for ( i = 0; i < slen; i++ ) { 1721 row = lrows[i] + rstart_bs; 1722 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1723 } 1724 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1725 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1726 } else { 1727 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1728 } 1729 1730 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1731 ierr = PetscFree(lrows);CHKERRQ(ierr); 1732 1733 /* wait on sends */ 1734 if (nsends) { 1735 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1736 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1737 ierr = PetscFree(send_status);CHKERRQ(ierr); 1738 } 1739 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1740 ierr = PetscFree(svalues);CHKERRQ(ierr); 1741 1742 PetscFunctionReturn(0); 1743 } 1744 1745 #undef __FUNC__ 1746 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1747 int MatPrintHelp_MPIBAIJ(Mat A) 1748 { 1749 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1750 MPI_Comm comm = A->comm; 1751 static int called = 0; 1752 int ierr; 1753 1754 PetscFunctionBegin; 1755 if (!a->rank) { 1756 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1757 } 1758 if (called) {PetscFunctionReturn(0);} else called = 1; 1759 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1760 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #undef __FUNC__ 1765 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1766 int MatSetUnfactored_MPIBAIJ(Mat A) 1767 { 1768 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1769 int ierr; 1770 1771 PetscFunctionBegin; 1772 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1773 PetscFunctionReturn(0); 1774 } 1775 1776 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1777 1778 #undef __FUNC__ 1779 #define __FUNC__ "MatEqual_MPIBAIJ" 1780 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag) 1781 { 1782 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data; 1783 Mat a, b, c, d; 1784 PetscTruth flg; 1785 int ierr; 1786 1787 PetscFunctionBegin; 1788 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1789 a = matA->A; b = matA->B; 1790 c = matB->A; d = matB->B; 1791 1792 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1793 if (flg == PETSC_TRUE) { 1794 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1795 } 1796 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1797 PetscFunctionReturn(0); 1798 } 1799 1800 /* -------------------------------------------------------------------*/ 1801 static struct _MatOps MatOps_Values = { 1802 MatSetValues_MPIBAIJ, 1803 MatGetRow_MPIBAIJ, 1804 MatRestoreRow_MPIBAIJ, 1805 MatMult_MPIBAIJ, 1806 MatMultAdd_MPIBAIJ, 1807 MatMultTrans_MPIBAIJ, 1808 MatMultTransAdd_MPIBAIJ, 1809 0, 1810 0, 1811 0, 1812 0, 1813 0, 1814 0, 1815 0, 1816 MatTranspose_MPIBAIJ, 1817 MatGetInfo_MPIBAIJ, 1818 MatEqual_MPIBAIJ, 1819 MatGetDiagonal_MPIBAIJ, 1820 MatDiagonalScale_MPIBAIJ, 1821 MatNorm_MPIBAIJ, 1822 MatAssemblyBegin_MPIBAIJ, 1823 MatAssemblyEnd_MPIBAIJ, 1824 0, 1825 MatSetOption_MPIBAIJ, 1826 MatZeroEntries_MPIBAIJ, 1827 MatZeroRows_MPIBAIJ, 1828 0, 1829 0, 1830 0, 1831 0, 1832 MatGetSize_MPIBAIJ, 1833 MatGetLocalSize_MPIBAIJ, 1834 MatGetOwnershipRange_MPIBAIJ, 1835 0, 1836 0, 1837 0, 1838 0, 1839 MatDuplicate_MPIBAIJ, 1840 0, 1841 0, 1842 0, 1843 0, 1844 0, 1845 MatGetSubMatrices_MPIBAIJ, 1846 MatIncreaseOverlap_MPIBAIJ, 1847 MatGetValues_MPIBAIJ, 1848 0, 1849 MatPrintHelp_MPIBAIJ, 1850 MatScale_MPIBAIJ, 1851 0, 1852 0, 1853 0, 1854 MatGetBlockSize_MPIBAIJ, 1855 0, 1856 0, 1857 0, 1858 0, 1859 0, 1860 0, 1861 MatSetUnfactored_MPIBAIJ, 1862 0, 1863 MatSetValuesBlocked_MPIBAIJ, 1864 0, 1865 0, 1866 0, 1867 MatGetMaps_Petsc}; 1868 1869 1870 EXTERN_C_BEGIN 1871 #undef __FUNC__ 1872 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1873 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1874 { 1875 PetscFunctionBegin; 1876 *a = ((Mat_MPIBAIJ *)A->data)->A; 1877 *iscopy = PETSC_FALSE; 1878 PetscFunctionReturn(0); 1879 } 1880 EXTERN_C_END 1881 1882 #undef __FUNC__ 1883 #define __FUNC__ "MatCreateMPIBAIJ" 1884 /*@C 1885 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1886 (block compressed row). For good matrix assembly performance 1887 the user should preallocate the matrix storage by setting the parameters 1888 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1889 performance can be increased by more than a factor of 50. 1890 1891 Collective on MPI_Comm 1892 1893 Input Parameters: 1894 + comm - MPI communicator 1895 . bs - size of blockk 1896 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1897 This value should be the same as the local size used in creating the 1898 y vector for the matrix-vector product y = Ax. 1899 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1900 This value should be the same as the local size used in creating the 1901 x vector for the matrix-vector product y = Ax. 1902 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1903 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1904 . d_nz - number of block nonzeros per block row in diagonal portion of local 1905 submatrix (same for all local rows) 1906 . d_nnz - array containing the number of block nonzeros in the various block rows 1907 of the in diagonal portion of the local (possibly different for each block 1908 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1909 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1910 submatrix (same for all local rows). 1911 - o_nnz - array containing the number of nonzeros in the various block rows of the 1912 off-diagonal portion of the local submatrix (possibly different for 1913 each block row) or PETSC_NULL. 1914 1915 Output Parameter: 1916 . A - the matrix 1917 1918 Options Database Keys: 1919 . -mat_no_unroll - uses code that does not unroll the loops in the 1920 block calculations (much slower) 1921 . -mat_block_size - size of the blocks to use 1922 . -mat_mpi - use the parallel matrix data structures even on one processor 1923 (defaults to using SeqBAIJ format on one processor) 1924 1925 Notes: 1926 The user MUST specify either the local or global matrix dimensions 1927 (possibly both). 1928 1929 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1930 than it must be used on all processors that share the object for that argument. 1931 1932 Storage Information: 1933 For a square global matrix we define each processor's diagonal portion 1934 to be its local rows and the corresponding columns (a square submatrix); 1935 each processor's off-diagonal portion encompasses the remainder of the 1936 local matrix (a rectangular submatrix). 1937 1938 The user can specify preallocated storage for the diagonal part of 1939 the local submatrix with either d_nz or d_nnz (not both). Set 1940 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1941 memory allocation. Likewise, specify preallocated storage for the 1942 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1943 1944 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1945 the figure below we depict these three local rows and all columns (0-11). 1946 1947 .vb 1948 0 1 2 3 4 5 6 7 8 9 10 11 1949 ------------------- 1950 row 3 | o o o d d d o o o o o o 1951 row 4 | o o o d d d o o o o o o 1952 row 5 | o o o d d d o o o o o o 1953 ------------------- 1954 .ve 1955 1956 Thus, any entries in the d locations are stored in the d (diagonal) 1957 submatrix, and any entries in the o locations are stored in the 1958 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1959 stored simply in the MATSEQBAIJ format for compressed row storage. 1960 1961 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1962 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1963 In general, for PDE problems in which most nonzeros are near the diagonal, 1964 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1965 or you will get TERRIBLE performance; see the users' manual chapter on 1966 matrices. 1967 1968 Level: intermediate 1969 1970 .keywords: matrix, block, aij, compressed row, sparse, parallel 1971 1972 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1973 @*/ 1974 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1975 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1976 { 1977 Mat B; 1978 Mat_MPIBAIJ *b; 1979 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1980 int flag1 = 0,flag2 = 0; 1981 1982 PetscFunctionBegin; 1983 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1984 1985 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1986 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 1987 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 1988 if (d_nnz) { 1989 for (i=0; i<m/bs; i++) { 1990 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1991 } 1992 } 1993 if (o_nnz) { 1994 for (i=0; i<m/bs; i++) { 1995 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1996 } 1997 } 1998 1999 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2000 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 2001 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2002 if (!flag1 && !flag2 && size == 1) { 2003 if (M == PETSC_DECIDE) M = m; 2004 if (N == PETSC_DECIDE) N = n; 2005 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 *A = 0; 2010 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2011 PLogObjectCreate(B); 2012 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2013 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2014 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2015 2016 B->ops->destroy = MatDestroy_MPIBAIJ; 2017 B->ops->view = MatView_MPIBAIJ; 2018 B->mapping = 0; 2019 B->factor = 0; 2020 B->assembled = PETSC_FALSE; 2021 2022 B->insertmode = NOT_SET_VALUES; 2023 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2024 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2025 2026 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2027 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2028 } 2029 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2030 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2031 } 2032 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2033 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2034 } 2035 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2036 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2037 2038 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2039 work[0] = m; work[1] = n; 2040 mbs = m/bs; nbs = n/bs; 2041 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2042 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2043 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2044 } 2045 if (m == PETSC_DECIDE) { 2046 Mbs = M/bs; 2047 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2048 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2049 m = mbs*bs; 2050 } 2051 if (n == PETSC_DECIDE) { 2052 Nbs = N/bs; 2053 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2054 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2055 n = nbs*bs; 2056 } 2057 if (mbs*bs != m || nbs*bs != n) { 2058 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2059 } 2060 2061 b->m = m; B->m = m; 2062 b->n = n; B->n = n; 2063 b->N = N; B->N = N; 2064 b->M = M; B->M = M; 2065 b->bs = bs; 2066 b->bs2 = bs*bs; 2067 b->mbs = mbs; 2068 b->nbs = nbs; 2069 b->Mbs = Mbs; 2070 b->Nbs = Nbs; 2071 2072 /* the information in the maps duplicates the information computed below, eventually 2073 we should remove the duplicate information that is not contained in the maps */ 2074 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2075 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2076 2077 /* build local table of row and column ownerships */ 2078 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2079 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2080 b->cowners = b->rowners + b->size + 2; 2081 b->rowners_bs = b->cowners + b->size + 2; 2082 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2083 b->rowners[0] = 0; 2084 for ( i=2; i<=b->size; i++ ) { 2085 b->rowners[i] += b->rowners[i-1]; 2086 } 2087 for ( i=0; i<=b->size; i++ ) { 2088 b->rowners_bs[i] = b->rowners[i]*bs; 2089 } 2090 b->rstart = b->rowners[b->rank]; 2091 b->rend = b->rowners[b->rank+1]; 2092 b->rstart_bs = b->rstart * bs; 2093 b->rend_bs = b->rend * bs; 2094 2095 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2096 b->cowners[0] = 0; 2097 for ( i=2; i<=b->size; i++ ) { 2098 b->cowners[i] += b->cowners[i-1]; 2099 } 2100 b->cstart = b->cowners[b->rank]; 2101 b->cend = b->cowners[b->rank+1]; 2102 b->cstart_bs = b->cstart * bs; 2103 b->cend_bs = b->cend * bs; 2104 2105 2106 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2107 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2108 PLogObjectParent(B,b->A); 2109 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2110 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2111 PLogObjectParent(B,b->B); 2112 2113 /* build cache for off array entries formed */ 2114 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2115 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2116 b->donotstash = 0; 2117 b->colmap = 0; 2118 b->garray = 0; 2119 b->roworiented = 1; 2120 2121 /* stuff used in block assembly */ 2122 b->barray = 0; 2123 2124 /* stuff used for matrix vector multiply */ 2125 b->lvec = 0; 2126 b->Mvctx = 0; 2127 2128 /* stuff for MatGetRow() */ 2129 b->rowindices = 0; 2130 b->rowvalues = 0; 2131 b->getrowactive = PETSC_FALSE; 2132 2133 /* hash table stuff */ 2134 b->ht = 0; 2135 b->hd = 0; 2136 b->ht_size = 0; 2137 b->ht_flag = 0; 2138 b->ht_fact = 0; 2139 b->ht_total_ct = 0; 2140 b->ht_insert_ct = 0; 2141 2142 *A = B; 2143 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2144 if (flg) { 2145 double fact = 1.39; 2146 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2147 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr); 2148 if (fact <= 1.0) fact = 1.39; 2149 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2150 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2151 } 2152 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 2153 "MatStoreValues_MPIBAIJ", 2154 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2155 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 2156 "MatRetrieveValues_MPIBAIJ", 2157 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2158 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2159 "MatGetDiagonalBlock_MPIBAIJ", 2160 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2161 PetscFunctionReturn(0); 2162 } 2163 2164 #undef __FUNC__ 2165 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2166 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2167 { 2168 Mat mat; 2169 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2170 int ierr, len=0, flg; 2171 2172 PetscFunctionBegin; 2173 *newmat = 0; 2174 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2175 PLogObjectCreate(mat); 2176 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2177 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2178 mat->ops->destroy = MatDestroy_MPIBAIJ; 2179 mat->ops->view = MatView_MPIBAIJ; 2180 mat->factor = matin->factor; 2181 mat->assembled = PETSC_TRUE; 2182 mat->insertmode = NOT_SET_VALUES; 2183 2184 a->m = mat->m = oldmat->m; 2185 a->n = mat->n = oldmat->n; 2186 a->M = mat->M = oldmat->M; 2187 a->N = mat->N = oldmat->N; 2188 2189 a->bs = oldmat->bs; 2190 a->bs2 = oldmat->bs2; 2191 a->mbs = oldmat->mbs; 2192 a->nbs = oldmat->nbs; 2193 a->Mbs = oldmat->Mbs; 2194 a->Nbs = oldmat->Nbs; 2195 2196 a->rstart = oldmat->rstart; 2197 a->rend = oldmat->rend; 2198 a->cstart = oldmat->cstart; 2199 a->cend = oldmat->cend; 2200 a->size = oldmat->size; 2201 a->rank = oldmat->rank; 2202 a->donotstash = oldmat->donotstash; 2203 a->roworiented = oldmat->roworiented; 2204 a->rowindices = 0; 2205 a->rowvalues = 0; 2206 a->getrowactive = PETSC_FALSE; 2207 a->barray = 0; 2208 a->rstart_bs = oldmat->rstart_bs; 2209 a->rend_bs = oldmat->rend_bs; 2210 a->cstart_bs = oldmat->cstart_bs; 2211 a->cend_bs = oldmat->cend_bs; 2212 2213 /* hash table stuff */ 2214 a->ht = 0; 2215 a->hd = 0; 2216 a->ht_size = 0; 2217 a->ht_flag = oldmat->ht_flag; 2218 a->ht_fact = oldmat->ht_fact; 2219 a->ht_total_ct = 0; 2220 a->ht_insert_ct = 0; 2221 2222 2223 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2224 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2225 a->cowners = a->rowners + a->size + 2; 2226 a->rowners_bs = a->cowners + a->size + 2; 2227 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2228 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2229 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2230 if (oldmat->colmap) { 2231 #if defined (PETSC_USE_CTABLE) 2232 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2233 #else 2234 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2235 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2236 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2237 #endif 2238 } else a->colmap = 0; 2239 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2240 a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2241 PLogObjectMemory(mat,len*sizeof(int)); 2242 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2243 } else a->garray = 0; 2244 2245 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2246 PLogObjectParent(mat,a->lvec); 2247 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2248 2249 PLogObjectParent(mat,a->Mvctx); 2250 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2251 PLogObjectParent(mat,a->A); 2252 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2253 PLogObjectParent(mat,a->B); 2254 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2255 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2256 if (flg) { 2257 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2258 } 2259 *newmat = mat; 2260 PetscFunctionReturn(0); 2261 } 2262 2263 #include "sys.h" 2264 2265 #undef __FUNC__ 2266 #define __FUNC__ "MatLoad_MPIBAIJ" 2267 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2268 { 2269 Mat A; 2270 int i, nz, ierr, j,rstart, rend, fd; 2271 Scalar *vals,*buf; 2272 MPI_Comm comm = ((PetscObject)viewer)->comm; 2273 MPI_Status status; 2274 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2275 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2276 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2277 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2278 int dcount,kmax,k,nzcount,tmp; 2279 2280 PetscFunctionBegin; 2281 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2282 2283 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2284 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2285 if (!rank) { 2286 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2287 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2288 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2289 if (header[3] < 0) { 2290 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2291 } 2292 } 2293 2294 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2295 M = header[1]; N = header[2]; 2296 2297 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2298 2299 /* 2300 This code adds extra rows to make sure the number of rows is 2301 divisible by the blocksize 2302 */ 2303 Mbs = M/bs; 2304 extra_rows = bs - M + bs*(Mbs); 2305 if (extra_rows == bs) extra_rows = 0; 2306 else Mbs++; 2307 if (extra_rows &&!rank) { 2308 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2309 } 2310 2311 /* determine ownership of all rows */ 2312 mbs = Mbs/size + ((Mbs % size) > rank); 2313 m = mbs * bs; 2314 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2315 browners = rowners + size + 1; 2316 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2317 rowners[0] = 0; 2318 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2319 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2320 rstart = rowners[rank]; 2321 rend = rowners[rank+1]; 2322 2323 /* distribute row lengths to all processors */ 2324 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens); 2325 if (!rank) { 2326 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths); 2327 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2328 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2329 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2330 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2331 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2332 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2333 } else { 2334 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2335 } 2336 2337 if (!rank) { 2338 /* calculate the number of nonzeros on each processor */ 2339 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2340 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2341 for ( i=0; i<size; i++ ) { 2342 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2343 procsnz[i] += rowlengths[j]; 2344 } 2345 } 2346 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2347 2348 /* determine max buffer needed and allocate it */ 2349 maxnz = 0; 2350 for ( i=0; i<size; i++ ) { 2351 maxnz = PetscMax(maxnz,procsnz[i]); 2352 } 2353 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2354 2355 /* read in my part of the matrix column indices */ 2356 nz = procsnz[0]; 2357 ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2358 mycols = ibuf; 2359 if (size == 1) nz -= extra_rows; 2360 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2361 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2362 2363 /* read in every ones (except the last) and ship off */ 2364 for ( i=1; i<size-1; i++ ) { 2365 nz = procsnz[i]; 2366 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2367 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2368 } 2369 /* read in the stuff for the last proc */ 2370 if ( size != 1 ) { 2371 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2372 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2373 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2374 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2375 } 2376 ierr = PetscFree(cols);CHKERRQ(ierr); 2377 } else { 2378 /* determine buffer space needed for message */ 2379 nz = 0; 2380 for ( i=0; i<m; i++ ) { 2381 nz += locrowlens[i]; 2382 } 2383 ibuf = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2384 mycols = ibuf; 2385 /* receive message of column indices*/ 2386 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2387 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2388 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2389 } 2390 2391 /* loop over local rows, determining number of off diagonal entries */ 2392 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens); 2393 odlens = dlens + (rend-rstart); 2394 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask); 2395 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2396 masked1 = mask + Mbs; 2397 masked2 = masked1 + Mbs; 2398 rowcount = 0; nzcount = 0; 2399 for ( i=0; i<mbs; i++ ) { 2400 dcount = 0; 2401 odcount = 0; 2402 for ( j=0; j<bs; j++ ) { 2403 kmax = locrowlens[rowcount]; 2404 for ( k=0; k<kmax; k++ ) { 2405 tmp = mycols[nzcount++]/bs; 2406 if (!mask[tmp]) { 2407 mask[tmp] = 1; 2408 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2409 else masked1[dcount++] = tmp; 2410 } 2411 } 2412 rowcount++; 2413 } 2414 2415 dlens[i] = dcount; 2416 odlens[i] = odcount; 2417 2418 /* zero out the mask elements we set */ 2419 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2420 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2421 } 2422 2423 /* create our matrix */ 2424 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2425 A = *newmat; 2426 MatSetOption(A,MAT_COLUMNS_SORTED); 2427 2428 if (!rank) { 2429 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf); 2430 /* read in my part of the matrix numerical values */ 2431 nz = procsnz[0]; 2432 vals = buf; 2433 mycols = ibuf; 2434 if (size == 1) nz -= extra_rows; 2435 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2436 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2437 2438 /* insert into matrix */ 2439 jj = rstart*bs; 2440 for ( i=0; i<m; i++ ) { 2441 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2442 mycols += locrowlens[i]; 2443 vals += locrowlens[i]; 2444 jj++; 2445 } 2446 /* read in other processors (except the last one) and ship out */ 2447 for ( i=1; i<size-1; i++ ) { 2448 nz = procsnz[i]; 2449 vals = buf; 2450 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2451 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2452 } 2453 /* the last proc */ 2454 if ( size != 1 ){ 2455 nz = procsnz[i] - extra_rows; 2456 vals = buf; 2457 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2458 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2459 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2460 } 2461 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2462 } else { 2463 /* receive numeric values */ 2464 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf); 2465 2466 /* receive message of values*/ 2467 vals = buf; 2468 mycols = ibuf; 2469 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2470 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2471 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2472 2473 /* insert into matrix */ 2474 jj = rstart*bs; 2475 for ( i=0; i<m; i++ ) { 2476 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2477 mycols += locrowlens[i]; 2478 vals += locrowlens[i]; 2479 jj++; 2480 } 2481 } 2482 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2483 ierr = PetscFree(buf);CHKERRQ(ierr); 2484 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2485 ierr = PetscFree(rowners);CHKERRQ(ierr); 2486 ierr = PetscFree(dlens);CHKERRQ(ierr); 2487 ierr = PetscFree(mask);CHKERRQ(ierr); 2488 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2489 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2490 PetscFunctionReturn(0); 2491 } 2492 2493 #undef __FUNC__ 2494 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2495 /*@ 2496 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2497 2498 Input Parameters: 2499 . mat - the matrix 2500 . fact - factor 2501 2502 Collective on Mat 2503 2504 Level: advanced 2505 2506 Notes: 2507 This can also be set by the command line option: -mat_use_hash_table fact 2508 2509 .keywords: matrix, hashtable, factor, HT 2510 2511 .seealso: MatSetOption() 2512 @*/ 2513 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2514 { 2515 Mat_MPIBAIJ *baij; 2516 2517 PetscFunctionBegin; 2518 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2519 if (mat->type != MATMPIBAIJ) { 2520 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2521 } 2522 baij = (Mat_MPIBAIJ*) mat->data; 2523 baij->ht_fact = fact; 2524 PetscFunctionReturn(0); 2525 } 2526