1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.177 1999/09/15 16:33:20 balay Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 EXTERN_C_BEGIN 20 #undef __FUNC__ 21 #define __FUNC__ "MatStoreValues_MPIBAIJ" 22 int MatStoreValues_MPIBAIJ(Mat mat) 23 { 24 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 25 int ierr; 26 27 PetscFunctionBegin; 28 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 29 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 EXTERN_C_END 33 34 EXTERN_C_BEGIN 35 #undef __FUNC__ 36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 37 int MatRetrieveValues_MPIBAIJ(Mat mat) 38 { 39 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 40 int ierr; 41 42 PetscFunctionBegin; 43 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 44 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 45 PetscFunctionReturn(0); 46 } 47 EXTERN_C_END 48 49 /* 50 Local utility routine that creates a mapping from the global column 51 number to the local number in the off-diagonal part of the local 52 storage of the matrix. This is done in a non scable way since the 53 length of colmap equals the global matrix length. 54 */ 55 #undef __FUNC__ 56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 57 static int CreateColmap_MPIBAIJ_Private(Mat mat) 58 { 59 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 60 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 61 int nbs = B->nbs,i,bs=B->bs,ierr; 62 63 PetscFunctionBegin; 64 #if defined (PETSC_USE_CTABLE) 65 ierr = TableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 66 for ( i=0; i<nbs; i++ ){ 67 ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 68 } 69 #else 70 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 71 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 72 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 73 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 74 #endif 75 PetscFunctionReturn(0); 76 } 77 78 #define CHUNKSIZE 10 79 80 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 81 { \ 82 \ 83 brow = row/bs; \ 84 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 85 rmax = aimax[brow]; nrow = ailen[brow]; \ 86 bcol = col/bs; \ 87 ridx = row % bs; cidx = col % bs; \ 88 low = 0; high = nrow; \ 89 while (high-low > 3) { \ 90 t = (low+high)/2; \ 91 if (rp[t] > bcol) high = t; \ 92 else low = t; \ 93 } \ 94 for ( _i=low; _i<high; _i++ ) { \ 95 if (rp[_i] > bcol) break; \ 96 if (rp[_i] == bcol) { \ 97 bap = ap + bs2*_i + bs*cidx + ridx; \ 98 if (addv == ADD_VALUES) *bap += value; \ 99 else *bap = value; \ 100 goto a_noinsert; \ 101 } \ 102 } \ 103 if (a->nonew == 1) goto a_noinsert; \ 104 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 105 if (nrow >= rmax) { \ 106 /* there is no extra room in row, therefore enlarge */ \ 107 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 108 Scalar *new_a; \ 109 \ 110 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 111 \ 112 /* malloc new storage space */ \ 113 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 114 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 115 new_j = (int *) (new_a + bs2*new_nz); \ 116 new_i = new_j + new_nz; \ 117 \ 118 /* copy over old data into new slots */ \ 119 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 120 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 121 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 122 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 123 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 124 len*sizeof(int));CHKERRQ(ierr); \ 125 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 126 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 127 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 128 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 129 /* free up old matrix storage */ \ 130 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 131 if (!a->singlemalloc) { \ 132 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 133 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 134 } \ 135 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 136 a->singlemalloc = 1; \ 137 \ 138 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 139 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 140 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 141 a->maxnz += bs2*CHUNKSIZE; \ 142 a->reallocs++; \ 143 a->nz++; \ 144 } \ 145 N = nrow++ - 1; \ 146 /* shift up all the later entries in this row */ \ 147 for ( ii=N; ii>=_i; ii-- ) { \ 148 rp[ii+1] = rp[ii]; \ 149 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 150 } \ 151 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); } \ 152 rp[_i] = bcol; \ 153 ap[bs2*_i + bs*cidx + ridx] = value; \ 154 a_noinsert:; \ 155 ailen[brow] = nrow; \ 156 } 157 158 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 159 { \ 160 \ 161 brow = row/bs; \ 162 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 163 rmax = bimax[brow]; nrow = bilen[brow]; \ 164 bcol = col/bs; \ 165 ridx = row % bs; cidx = col % bs; \ 166 low = 0; high = nrow; \ 167 while (high-low > 3) { \ 168 t = (low+high)/2; \ 169 if (rp[t] > bcol) high = t; \ 170 else low = t; \ 171 } \ 172 for ( _i=low; _i<high; _i++ ) { \ 173 if (rp[_i] > bcol) break; \ 174 if (rp[_i] == bcol) { \ 175 bap = ap + bs2*_i + bs*cidx + ridx; \ 176 if (addv == ADD_VALUES) *bap += value; \ 177 else *bap = value; \ 178 goto b_noinsert; \ 179 } \ 180 } \ 181 if (b->nonew == 1) goto b_noinsert; \ 182 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 183 if (nrow >= rmax) { \ 184 /* there is no extra room in row, therefore enlarge */ \ 185 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 186 Scalar *new_a; \ 187 \ 188 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 189 \ 190 /* malloc new storage space */ \ 191 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 192 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 193 new_j = (int *) (new_a + bs2*new_nz); \ 194 new_i = new_j + new_nz; \ 195 \ 196 /* copy over old data into new slots */ \ 197 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 198 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 199 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 200 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 201 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 202 len*sizeof(int));CHKERRQ(ierr); \ 203 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 204 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 205 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 206 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 207 /* free up old matrix storage */ \ 208 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 209 if (!b->singlemalloc) { \ 210 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 211 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 212 } \ 213 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 214 b->singlemalloc = 1; \ 215 \ 216 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 217 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 218 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 219 b->maxnz += bs2*CHUNKSIZE; \ 220 b->reallocs++; \ 221 b->nz++; \ 222 } \ 223 N = nrow++ - 1; \ 224 /* shift up all the later entries in this row */ \ 225 for ( ii=N; ii>=_i; ii-- ) { \ 226 rp[ii+1] = rp[ii]; \ 227 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 228 } \ 229 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);} \ 230 rp[_i] = bcol; \ 231 ap[bs2*_i + bs*cidx + ridx] = value; \ 232 b_noinsert:; \ 233 bilen[brow] = nrow; \ 234 } 235 236 #undef __FUNC__ 237 #define __FUNC__ "MatSetValues_MPIBAIJ" 238 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 239 { 240 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 241 Scalar value; 242 int ierr,i,j,row,col; 243 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 244 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 245 int cend_orig=baij->cend_bs,bs=baij->bs; 246 247 /* Some Variables required in the macro */ 248 Mat A = baij->A; 249 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 250 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 251 Scalar *aa=a->a; 252 253 Mat B = baij->B; 254 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 255 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 256 Scalar *ba=b->a; 257 258 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 259 int low,high,t,ridx,cidx,bs2=a->bs2; 260 Scalar *ap,*bap; 261 262 PetscFunctionBegin; 263 for ( i=0; i<m; i++ ) { 264 if (im[i] < 0) continue; 265 #if defined(PETSC_USE_BOPT_g) 266 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 267 #endif 268 if (im[i] >= rstart_orig && im[i] < rend_orig) { 269 row = im[i] - rstart_orig; 270 for ( j=0; j<n; j++ ) { 271 if (in[j] >= cstart_orig && in[j] < cend_orig){ 272 col = in[j] - cstart_orig; 273 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 274 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 275 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 276 } else if (in[j] < 0) continue; 277 #if defined(PETSC_USE_BOPT_g) 278 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 279 #endif 280 else { 281 if (mat->was_assembled) { 282 if (!baij->colmap) { 283 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 284 } 285 #if defined (PETSC_USE_CTABLE) 286 ierr = TableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr); 287 col = col - 1 + in[j]%bs; 288 #else 289 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 290 #endif 291 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 292 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 293 col = in[j]; 294 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 295 B = baij->B; 296 b = (Mat_SeqBAIJ *) (B)->data; 297 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 298 ba=b->a; 299 } 300 } else col = in[j]; 301 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 302 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 303 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 304 } 305 } 306 } else { 307 if ( !baij->donotstash) { 308 if (roworiented) { 309 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 310 } else { 311 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 312 } 313 } 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNC__ 320 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 321 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 322 { 323 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 324 Scalar *value,*barray=baij->barray; 325 int ierr,i,j,ii,jj,row,col; 326 int roworiented = baij->roworiented,rstart=baij->rstart ; 327 int rend=baij->rend,cstart=baij->cstart,stepval; 328 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 329 330 PetscFunctionBegin; 331 if(!barray) { 332 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray); 333 } 334 335 if (roworiented) { 336 stepval = (n-1)*bs; 337 } else { 338 stepval = (m-1)*bs; 339 } 340 for ( i=0; i<m; i++ ) { 341 if (im[i] < 0) continue; 342 #if defined(PETSC_USE_BOPT_g) 343 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 344 #endif 345 if (im[i] >= rstart && im[i] < rend) { 346 row = im[i] - rstart; 347 for ( j=0; j<n; j++ ) { 348 /* If NumCol = 1 then a copy is not required */ 349 if ((roworiented) && (n == 1)) { 350 barray = v + i*bs2; 351 } else if((!roworiented) && (m == 1)) { 352 barray = v + j*bs2; 353 } else { /* Here a copy is required */ 354 if (roworiented) { 355 value = v + i*(stepval+bs)*bs + j*bs; 356 } else { 357 value = v + j*(stepval+bs)*bs + i*bs; 358 } 359 for ( ii=0; ii<bs; ii++,value+=stepval ) { 360 for (jj=0; jj<bs; jj++ ) { 361 *barray++ = *value++; 362 } 363 } 364 barray -=bs2; 365 } 366 367 if (in[j] >= cstart && in[j] < cend){ 368 col = in[j] - cstart; 369 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 370 } 371 else if (in[j] < 0) continue; 372 #if defined(PETSC_USE_BOPT_g) 373 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 374 #endif 375 else { 376 if (mat->was_assembled) { 377 if (!baij->colmap) { 378 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 379 } 380 381 #if defined(PETSC_USE_BOPT_g) 382 #if defined (PETSC_USE_CTABLE) 383 { int data; 384 ierr = TableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 385 if((data - 1) % bs) 386 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 387 } 388 #else 389 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 390 #endif 391 #endif 392 #if defined (PETSC_USE_CTABLE) 393 ierr = TableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 394 col = (col - 1)/bs; 395 #else 396 col = (baij->colmap[in[j]] - 1)/bs; 397 #endif 398 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 399 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 400 col = in[j]; 401 } 402 } 403 else col = in[j]; 404 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 405 } 406 } 407 } else { 408 if (!baij->donotstash) { 409 if (roworiented) { 410 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 411 } else { 412 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 413 } 414 } 415 } 416 } 417 PetscFunctionReturn(0); 418 } 419 #define HASH_KEY 0.6180339887 420 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 421 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 422 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 423 #undef __FUNC__ 424 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 425 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 426 { 427 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 428 int ierr,i,j,row,col; 429 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 430 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 431 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 432 double tmp; 433 Scalar ** HD = baij->hd,value; 434 #if defined(PETSC_USE_BOPT_g) 435 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 436 #endif 437 438 PetscFunctionBegin; 439 440 for ( i=0; i<m; i++ ) { 441 #if defined(PETSC_USE_BOPT_g) 442 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 443 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 444 #endif 445 row = im[i]; 446 if (row >= rstart_orig && row < rend_orig) { 447 for ( j=0; j<n; j++ ) { 448 col = in[j]; 449 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 450 /* Look up into the Hash Table */ 451 key = (row/bs)*Nbs+(col/bs)+1; 452 h1 = HASH(size,key,tmp); 453 454 455 idx = h1; 456 #if defined(PETSC_USE_BOPT_g) 457 insert_ct++; 458 total_ct++; 459 if (HT[idx] != key) { 460 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 461 if (idx == size) { 462 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 463 if (idx == h1) { 464 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 465 } 466 } 467 } 468 #else 469 if (HT[idx] != key) { 470 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 471 if (idx == size) { 472 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 473 if (idx == h1) { 474 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 475 } 476 } 477 } 478 #endif 479 /* A HASH table entry is found, so insert the values at the correct address */ 480 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 481 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 482 } 483 } else { 484 if (!baij->donotstash) { 485 if (roworiented) { 486 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 487 } else { 488 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 489 } 490 } 491 } 492 } 493 #if defined(PETSC_USE_BOPT_g) 494 baij->ht_total_ct = total_ct; 495 baij->ht_insert_ct = insert_ct; 496 #endif 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNC__ 501 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 502 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 503 { 504 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 505 int ierr,i,j,ii,jj,row,col; 506 int roworiented = baij->roworiented,rstart=baij->rstart ; 507 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 508 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 509 double tmp; 510 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 511 #if defined(PETSC_USE_BOPT_g) 512 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 513 #endif 514 515 PetscFunctionBegin; 516 517 if (roworiented) { 518 stepval = (n-1)*bs; 519 } else { 520 stepval = (m-1)*bs; 521 } 522 for ( i=0; i<m; i++ ) { 523 #if defined(PETSC_USE_BOPT_g) 524 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 525 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 526 #endif 527 row = im[i]; 528 v_t = v + i*bs2; 529 if (row >= rstart && row < rend) { 530 for ( j=0; j<n; j++ ) { 531 col = in[j]; 532 533 /* Look up into the Hash Table */ 534 key = row*Nbs+col+1; 535 h1 = HASH(size,key,tmp); 536 537 idx = h1; 538 #if defined(PETSC_USE_BOPT_g) 539 total_ct++; 540 insert_ct++; 541 if (HT[idx] != key) { 542 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 543 if (idx == size) { 544 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 545 if (idx == h1) { 546 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 547 } 548 } 549 } 550 #else 551 if (HT[idx] != key) { 552 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 553 if (idx == size) { 554 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 555 if (idx == h1) { 556 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 557 } 558 } 559 } 560 #endif 561 baij_a = HD[idx]; 562 if (roworiented) { 563 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 564 /* value = v + (i*(stepval+bs)+j)*bs; */ 565 value = v_t; 566 v_t += bs; 567 if (addv == ADD_VALUES) { 568 for ( ii=0; ii<bs; ii++,value+=stepval) { 569 for ( jj=ii; jj<bs2; jj+=bs ) { 570 baij_a[jj] += *value++; 571 } 572 } 573 } else { 574 for ( ii=0; ii<bs; ii++,value+=stepval) { 575 for ( jj=ii; jj<bs2; jj+=bs ) { 576 baij_a[jj] = *value++; 577 } 578 } 579 } 580 } else { 581 value = v + j*(stepval+bs)*bs + i*bs; 582 if (addv == ADD_VALUES) { 583 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 584 for ( jj=0; jj<bs; jj++ ) { 585 baij_a[jj] += *value++; 586 } 587 } 588 } else { 589 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 590 for ( jj=0; jj<bs; jj++ ) { 591 baij_a[jj] = *value++; 592 } 593 } 594 } 595 } 596 } 597 } else { 598 if (!baij->donotstash) { 599 if (roworiented) { 600 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 601 } else { 602 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 603 } 604 } 605 } 606 } 607 #if defined(PETSC_USE_BOPT_g) 608 baij->ht_total_ct = total_ct; 609 baij->ht_insert_ct = insert_ct; 610 #endif 611 PetscFunctionReturn(0); 612 } 613 614 #undef __FUNC__ 615 #define __FUNC__ "MatGetValues_MPIBAIJ" 616 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 617 { 618 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 619 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 620 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 621 622 PetscFunctionBegin; 623 for ( i=0; i<m; i++ ) { 624 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 625 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 626 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 627 row = idxm[i] - bsrstart; 628 for ( j=0; j<n; j++ ) { 629 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 630 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 631 if (idxn[j] >= bscstart && idxn[j] < bscend){ 632 col = idxn[j] - bscstart; 633 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 634 } else { 635 if (!baij->colmap) { 636 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 637 } 638 #if defined (PETSC_USE_CTABLE) 639 ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 640 data --; 641 #else 642 data = baij->colmap[idxn[j]/bs]-1; 643 #endif 644 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 645 else { 646 col = data + idxn[j]%bs; 647 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 648 } 649 } 650 } 651 } else { 652 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 653 } 654 } 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNC__ 659 #define __FUNC__ "MatNorm_MPIBAIJ" 660 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 661 { 662 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 663 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 664 int ierr, i,bs2=baij->bs2; 665 double sum = 0.0; 666 Scalar *v; 667 668 PetscFunctionBegin; 669 if (baij->size == 1) { 670 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 671 } else { 672 if (type == NORM_FROBENIUS) { 673 v = amat->a; 674 for (i=0; i<amat->nz*bs2; i++ ) { 675 #if defined(PETSC_USE_COMPLEX) 676 sum += PetscReal(PetscConj(*v)*(*v)); v++; 677 #else 678 sum += (*v)*(*v); v++; 679 #endif 680 } 681 v = bmat->a; 682 for (i=0; i<bmat->nz*bs2; i++ ) { 683 #if defined(PETSC_USE_COMPLEX) 684 sum += PetscReal(PetscConj(*v)*(*v)); v++; 685 #else 686 sum += (*v)*(*v); v++; 687 #endif 688 } 689 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 690 *norm = sqrt(*norm); 691 } else { 692 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 693 } 694 } 695 PetscFunctionReturn(0); 696 } 697 698 699 /* 700 Creates the hash table, and sets the table 701 This table is created only once. 702 If new entried need to be added to the matrix 703 then the hash table has to be destroyed and 704 recreated. 705 */ 706 #undef __FUNC__ 707 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 708 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 709 { 710 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 711 Mat A = baij->A, B=baij->B; 712 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 713 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 714 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 715 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 716 int *HT,key; 717 Scalar **HD; 718 double tmp; 719 #if defined(PETSC_USE_BOPT_g) 720 int ct=0,max=0; 721 #endif 722 723 PetscFunctionBegin; 724 baij->ht_size=(int)(factor*nz); 725 size = baij->ht_size; 726 727 if (baij->ht) { 728 PetscFunctionReturn(0); 729 } 730 731 /* Allocate Memory for Hash Table */ 732 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd); 733 baij->ht = (int*)(baij->hd + size); 734 HD = baij->hd; 735 HT = baij->ht; 736 737 738 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 739 740 741 /* Loop Over A */ 742 for ( i=0; i<a->mbs; i++ ) { 743 for ( j=ai[i]; j<ai[i+1]; j++ ) { 744 row = i+rstart; 745 col = aj[j]+cstart; 746 747 key = row*Nbs + col + 1; 748 h1 = HASH(size,key,tmp); 749 for ( k=0; k<size; k++ ){ 750 if (HT[(h1+k)%size] == 0.0) { 751 HT[(h1+k)%size] = key; 752 HD[(h1+k)%size] = a->a + j*bs2; 753 break; 754 #if defined(PETSC_USE_BOPT_g) 755 } else { 756 ct++; 757 #endif 758 } 759 } 760 #if defined(PETSC_USE_BOPT_g) 761 if (k> max) max = k; 762 #endif 763 } 764 } 765 /* Loop Over B */ 766 for ( i=0; i<b->mbs; i++ ) { 767 for ( j=bi[i]; j<bi[i+1]; j++ ) { 768 row = i+rstart; 769 col = garray[bj[j]]; 770 key = row*Nbs + col + 1; 771 h1 = HASH(size,key,tmp); 772 for ( k=0; k<size; k++ ){ 773 if (HT[(h1+k)%size] == 0.0) { 774 HT[(h1+k)%size] = key; 775 HD[(h1+k)%size] = b->a + j*bs2; 776 break; 777 #if defined(PETSC_USE_BOPT_g) 778 } else { 779 ct++; 780 #endif 781 } 782 } 783 #if defined(PETSC_USE_BOPT_g) 784 if (k> max) max = k; 785 #endif 786 } 787 } 788 789 /* Print Summary */ 790 #if defined(PETSC_USE_BOPT_g) 791 for ( i=0,j=0; i<size; i++) 792 if (HT[i]) {j++;} 793 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 794 (j== 0)? 0.0:((double)(ct+j))/j,max); 795 #endif 796 PetscFunctionReturn(0); 797 } 798 799 #undef __FUNC__ 800 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 801 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 804 int ierr,nstash,reallocs; 805 InsertMode addv; 806 807 PetscFunctionBegin; 808 if (baij->donotstash) { 809 PetscFunctionReturn(0); 810 } 811 812 /* make sure all processors are either in INSERTMODE or ADDMODE */ 813 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 814 if (addv == (ADD_VALUES|INSERT_VALUES)) { 815 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 816 } 817 mat->insertmode = addv; /* in case this processor had no cache */ 818 819 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 820 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 821 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 822 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 823 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 824 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 825 PetscFunctionReturn(0); 826 } 827 828 #undef __FUNC__ 829 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 830 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 831 { 832 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 833 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 834 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 835 int *row,*col,other_disassembled,r1,r2,r3; 836 Scalar *val; 837 InsertMode addv = mat->insertmode; 838 839 PetscFunctionBegin; 840 if (!baij->donotstash) { 841 while (1) { 842 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 843 if (!flg) break; 844 845 for ( i=0; i<n; ) { 846 /* Now identify the consecutive vals belonging to the same row */ 847 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 848 if (j < n) ncols = j-i; 849 else ncols = n-i; 850 /* Now assemble all these values with a single function call */ 851 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 852 i = j; 853 } 854 } 855 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 856 /* Now process the block-stash. Since the values are stashed column-oriented, 857 set the roworiented flag to column oriented, and after MatSetValues() 858 restore the original flags */ 859 r1 = baij->roworiented; 860 r2 = a->roworiented; 861 r3 = b->roworiented; 862 baij->roworiented = 0; 863 a->roworiented = 0; 864 b->roworiented = 0; 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for ( i=0; i<n; ) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 875 i = j; 876 } 877 } 878 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 879 baij->roworiented = r1; 880 a->roworiented = r2; 881 b->roworiented = r3; 882 } 883 884 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 885 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 886 887 /* determine if any processor has disassembled, if so we must 888 also disassemble ourselfs, in order that we may reassemble. */ 889 /* 890 if nonzero structure of submatrix B cannot change then we know that 891 no processor disassembled thus we can skip this stuff 892 */ 893 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 894 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 895 if (mat->was_assembled && !other_disassembled) { 896 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 897 } 898 } 899 900 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 901 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 902 } 903 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 904 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 905 906 #if defined(PETSC_USE_BOPT_g) 907 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 908 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 909 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 910 baij->ht_total_ct = 0; 911 baij->ht_insert_ct = 0; 912 } 913 #endif 914 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 915 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 916 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 917 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 918 } 919 920 if (baij->rowvalues) { 921 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 922 baij->rowvalues = 0; 923 } 924 PetscFunctionReturn(0); 925 } 926 927 /* 928 #undef __FUNC__ 929 #define __FUNC__ "MatView_MPIBAIJ_Binary" 930 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 931 { 932 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 933 int ierr; 934 935 PetscFunctionBegin; 936 if (baij->size == 1) { 937 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 938 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 939 PetscFunctionReturn(0); 940 } 941 */ 942 943 #undef __FUNC__ 944 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 945 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 946 { 947 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 948 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 949 FILE *fd; 950 ViewerType vtype; 951 952 PetscFunctionBegin; 953 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 954 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 955 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 956 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 957 MatInfo info; 958 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 959 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 960 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 961 ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr); 962 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 963 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 964 baij->bs,(int)info.memory); 965 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 966 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 967 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 968 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 969 fflush(fd); 970 ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr); 971 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 974 ierr = PetscPrintf(mat->comm," block size is %d\n",bs);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 } 978 979 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 980 Draw draw; 981 PetscTruth isnull; 982 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 983 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 984 } 985 986 if (size == 1) { 987 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 988 } else { 989 /* assemble the entire matrix onto first processor. */ 990 Mat A; 991 Mat_SeqBAIJ *Aloc; 992 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 993 int mbs = baij->mbs; 994 Scalar *a; 995 996 if (!rank) { 997 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 998 } else { 999 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1000 } 1001 PLogObjectParent(mat,A); 1002 1003 /* copy over the A part */ 1004 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1005 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1006 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1007 1008 for ( i=0; i<mbs; i++ ) { 1009 rvals[0] = bs*(baij->rstart + i); 1010 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1011 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1012 col = (baij->cstart+aj[j])*bs; 1013 for (k=0; k<bs; k++ ) { 1014 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1015 col++; a += bs; 1016 } 1017 } 1018 } 1019 /* copy over the B part */ 1020 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1021 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1022 for ( i=0; i<mbs; i++ ) { 1023 rvals[0] = bs*(baij->rstart + i); 1024 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1025 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1026 col = baij->garray[aj[j]]*bs; 1027 for (k=0; k<bs; k++ ) { 1028 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1029 col++; a += bs; 1030 } 1031 } 1032 } 1033 ierr = PetscFree(rvals);CHKERRQ(ierr); 1034 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1035 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1036 /* 1037 Everyone has to call to draw the matrix since the graphics waits are 1038 synchronized across all processors that share the Draw object 1039 */ 1040 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1041 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 1042 } 1043 ierr = MatDestroy(A);CHKERRQ(ierr); 1044 } 1045 PetscFunctionReturn(0); 1046 } 1047 1048 1049 1050 #undef __FUNC__ 1051 #define __FUNC__ "MatView_MPIBAIJ" 1052 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1053 { 1054 int ierr; 1055 ViewerType vtype; 1056 1057 PetscFunctionBegin; 1058 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 1059 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1060 PetscTypeCompare(vtype,SOCKET_VIEWER)|| PetscTypeCompare(vtype,BINARY_VIEWER)) { 1061 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1062 /* 1063 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1064 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1065 */ 1066 } else { 1067 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1068 } 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNC__ 1073 #define __FUNC__ "MatDestroy_MPIBAIJ" 1074 int MatDestroy_MPIBAIJ(Mat mat) 1075 { 1076 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1077 int ierr; 1078 1079 PetscFunctionBegin; 1080 1081 if (mat->mapping) { 1082 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1083 } 1084 if (mat->bmapping) { 1085 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1086 } 1087 if (mat->rmap) { 1088 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1089 } 1090 if (mat->cmap) { 1091 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1092 } 1093 #if defined(PETSC_USE_LOG) 1094 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1095 #endif 1096 1097 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1098 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1099 1100 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1101 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1102 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1103 #if defined (PETSC_USE_CTABLE) 1104 if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);} 1105 #else 1106 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1107 #endif 1108 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1109 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1110 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1111 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1112 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1113 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1114 ierr = PetscFree(baij);CHKERRQ(ierr); 1115 PLogObjectDestroy(mat); 1116 PetscHeaderDestroy(mat); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNC__ 1121 #define __FUNC__ "MatMult_MPIBAIJ" 1122 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1123 { 1124 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1125 int ierr, nt; 1126 1127 PetscFunctionBegin; 1128 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1129 if (nt != a->n) { 1130 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1131 } 1132 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1133 if (nt != a->m) { 1134 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1135 } 1136 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1137 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1138 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1139 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1140 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNC__ 1145 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1146 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1147 { 1148 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1149 int ierr; 1150 1151 PetscFunctionBegin; 1152 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1153 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1154 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1155 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNC__ 1160 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1161 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1162 { 1163 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1164 int ierr; 1165 1166 PetscFunctionBegin; 1167 /* do nondiagonal part */ 1168 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1169 /* send it on its way */ 1170 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1171 /* do local part */ 1172 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 1173 /* receive remote parts: note this assumes the values are not actually */ 1174 /* inserted in yy until the next line, which is true for my implementation*/ 1175 /* but is not perhaps always true. */ 1176 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNC__ 1181 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1182 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1183 { 1184 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1185 int ierr; 1186 1187 PetscFunctionBegin; 1188 /* do nondiagonal part */ 1189 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1190 /* send it on its way */ 1191 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1192 /* do local part */ 1193 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1194 /* receive remote parts: note this assumes the values are not actually */ 1195 /* inserted in yy until the next line, which is true for my implementation*/ 1196 /* but is not perhaps always true. */ 1197 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1198 PetscFunctionReturn(0); 1199 } 1200 1201 /* 1202 This only works correctly for square matrices where the subblock A->A is the 1203 diagonal block 1204 */ 1205 #undef __FUNC__ 1206 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1207 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1208 { 1209 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1210 int ierr; 1211 1212 PetscFunctionBegin; 1213 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1214 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNC__ 1219 #define __FUNC__ "MatScale_MPIBAIJ" 1220 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1221 { 1222 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1223 int ierr; 1224 1225 PetscFunctionBegin; 1226 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1227 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNC__ 1232 #define __FUNC__ "MatGetSize_MPIBAIJ" 1233 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1234 { 1235 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1236 1237 PetscFunctionBegin; 1238 if (m) *m = mat->M; 1239 if (n) *n = mat->N; 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNC__ 1244 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1245 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1246 { 1247 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1248 1249 PetscFunctionBegin; 1250 *m = mat->m; *n = mat->n; 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNC__ 1255 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1256 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1257 { 1258 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1259 1260 PetscFunctionBegin; 1261 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNC__ 1266 #define __FUNC__ "MatGetRow_MPIBAIJ" 1267 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1268 { 1269 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1270 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1271 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1272 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1273 int *cmap, *idx_p,cstart = mat->cstart; 1274 1275 PetscFunctionBegin; 1276 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1277 mat->getrowactive = PETSC_TRUE; 1278 1279 if (!mat->rowvalues && (idx || v)) { 1280 /* 1281 allocate enough space to hold information from the longest row. 1282 */ 1283 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1284 int max = 1,mbs = mat->mbs,tmp; 1285 for ( i=0; i<mbs; i++ ) { 1286 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1287 if (max < tmp) { max = tmp; } 1288 } 1289 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1290 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1291 } 1292 1293 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1294 lrow = row - brstart; 1295 1296 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1297 if (!v) {pvA = 0; pvB = 0;} 1298 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1299 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1300 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1301 nztot = nzA + nzB; 1302 1303 cmap = mat->garray; 1304 if (v || idx) { 1305 if (nztot) { 1306 /* Sort by increasing column numbers, assuming A and B already sorted */ 1307 int imark = -1; 1308 if (v) { 1309 *v = v_p = mat->rowvalues; 1310 for ( i=0; i<nzB; i++ ) { 1311 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1312 else break; 1313 } 1314 imark = i; 1315 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1316 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1317 } 1318 if (idx) { 1319 *idx = idx_p = mat->rowindices; 1320 if (imark > -1) { 1321 for ( i=0; i<imark; i++ ) { 1322 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1323 } 1324 } else { 1325 for ( i=0; i<nzB; i++ ) { 1326 if (cmap[cworkB[i]/bs] < cstart) 1327 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1328 else break; 1329 } 1330 imark = i; 1331 } 1332 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1333 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1334 } 1335 } else { 1336 if (idx) *idx = 0; 1337 if (v) *v = 0; 1338 } 1339 } 1340 *nz = nztot; 1341 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1342 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNC__ 1347 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1348 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1349 { 1350 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1351 1352 PetscFunctionBegin; 1353 if (baij->getrowactive == PETSC_FALSE) { 1354 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1355 } 1356 baij->getrowactive = PETSC_FALSE; 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNC__ 1361 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1362 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1363 { 1364 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1365 1366 PetscFunctionBegin; 1367 *bs = baij->bs; 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNC__ 1372 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1373 int MatZeroEntries_MPIBAIJ(Mat A) 1374 { 1375 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1376 int ierr; 1377 1378 PetscFunctionBegin; 1379 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1380 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 #undef __FUNC__ 1385 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1386 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1387 { 1388 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1389 Mat A = a->A, B = a->B; 1390 int ierr; 1391 double isend[5], irecv[5]; 1392 1393 PetscFunctionBegin; 1394 info->block_size = (double)a->bs; 1395 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1396 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1397 isend[3] = info->memory; isend[4] = info->mallocs; 1398 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1399 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1400 isend[3] += info->memory; isend[4] += info->mallocs; 1401 if (flag == MAT_LOCAL) { 1402 info->nz_used = isend[0]; 1403 info->nz_allocated = isend[1]; 1404 info->nz_unneeded = isend[2]; 1405 info->memory = isend[3]; 1406 info->mallocs = isend[4]; 1407 } else if (flag == MAT_GLOBAL_MAX) { 1408 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1409 info->nz_used = irecv[0]; 1410 info->nz_allocated = irecv[1]; 1411 info->nz_unneeded = irecv[2]; 1412 info->memory = irecv[3]; 1413 info->mallocs = irecv[4]; 1414 } else if (flag == MAT_GLOBAL_SUM) { 1415 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1416 info->nz_used = irecv[0]; 1417 info->nz_allocated = irecv[1]; 1418 info->nz_unneeded = irecv[2]; 1419 info->memory = irecv[3]; 1420 info->mallocs = irecv[4]; 1421 } else { 1422 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1423 } 1424 info->rows_global = (double)a->M; 1425 info->columns_global = (double)a->N; 1426 info->rows_local = (double)a->m; 1427 info->columns_local = (double)a->N; 1428 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1429 info->fill_ratio_needed = 0; 1430 info->factor_mallocs = 0; 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNC__ 1435 #define __FUNC__ "MatSetOption_MPIBAIJ" 1436 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1437 { 1438 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1439 int ierr; 1440 1441 PetscFunctionBegin; 1442 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1443 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1444 op == MAT_COLUMNS_UNSORTED || 1445 op == MAT_COLUMNS_SORTED || 1446 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1447 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1448 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1449 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1450 } else if (op == MAT_ROW_ORIENTED) { 1451 a->roworiented = 1; 1452 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1453 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1454 } else if (op == MAT_ROWS_SORTED || 1455 op == MAT_ROWS_UNSORTED || 1456 op == MAT_SYMMETRIC || 1457 op == MAT_STRUCTURALLY_SYMMETRIC || 1458 op == MAT_YES_NEW_DIAGONALS || 1459 op == MAT_USE_HASH_TABLE) { 1460 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1461 } else if (op == MAT_COLUMN_ORIENTED) { 1462 a->roworiented = 0; 1463 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1464 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1465 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1466 a->donotstash = 1; 1467 } else if (op == MAT_NO_NEW_DIAGONALS) { 1468 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1469 } else if (op == MAT_USE_HASH_TABLE) { 1470 a->ht_flag = 1; 1471 } else { 1472 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1473 } 1474 PetscFunctionReturn(0); 1475 } 1476 1477 #undef __FUNC__ 1478 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1479 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1480 { 1481 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1482 Mat_SeqBAIJ *Aloc; 1483 Mat B; 1484 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1485 int bs=baij->bs,mbs=baij->mbs; 1486 Scalar *a; 1487 1488 PetscFunctionBegin; 1489 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1490 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1491 CHKERRQ(ierr); 1492 1493 /* copy over the A part */ 1494 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1495 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1496 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1497 1498 for ( i=0; i<mbs; i++ ) { 1499 rvals[0] = bs*(baij->rstart + i); 1500 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1501 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1502 col = (baij->cstart+aj[j])*bs; 1503 for (k=0; k<bs; k++ ) { 1504 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1505 col++; a += bs; 1506 } 1507 } 1508 } 1509 /* copy over the B part */ 1510 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1511 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1512 for ( i=0; i<mbs; i++ ) { 1513 rvals[0] = bs*(baij->rstart + i); 1514 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1515 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1516 col = baij->garray[aj[j]]*bs; 1517 for (k=0; k<bs; k++ ) { 1518 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1519 col++; a += bs; 1520 } 1521 } 1522 } 1523 ierr = PetscFree(rvals);CHKERRQ(ierr); 1524 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1525 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1526 1527 if (matout != PETSC_NULL) { 1528 *matout = B; 1529 } else { 1530 PetscOps *Abops; 1531 MatOps Aops; 1532 1533 /* This isn't really an in-place transpose .... but free data structures from baij */ 1534 ierr = PetscFree(baij->rowners); CHKERRQ(ierr); 1535 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1536 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1537 #if defined (PETSC_USE_CTABLE) 1538 if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);} 1539 #else 1540 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1541 #endif 1542 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1543 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1544 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1545 ierr = PetscFree(baij);CHKERRQ(ierr); 1546 1547 /* 1548 This is horrible, horrible code. We need to keep the 1549 A pointers for the bops and ops but copy everything 1550 else from C. 1551 */ 1552 Abops = A->bops; 1553 Aops = A->ops; 1554 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1555 A->bops = Abops; 1556 A->ops = Aops; 1557 1558 PetscHeaderDestroy(B); 1559 } 1560 PetscFunctionReturn(0); 1561 } 1562 1563 #undef __FUNC__ 1564 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1565 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1566 { 1567 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1568 Mat a = baij->A, b = baij->B; 1569 int ierr,s1,s2,s3; 1570 1571 PetscFunctionBegin; 1572 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1573 if (rr) { 1574 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1575 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1576 /* Overlap communication with computation. */ 1577 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1578 } 1579 if (ll) { 1580 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1581 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1582 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1583 } 1584 /* scale the diagonal block */ 1585 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1586 1587 if (rr) { 1588 /* Do a scatter end and then right scale the off-diagonal block */ 1589 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1590 ierr = (*b->ops->diagonalscale)(b,0,baij->lvec);CHKERRQ(ierr); 1591 } 1592 1593 PetscFunctionReturn(0); 1594 } 1595 1596 #undef __FUNC__ 1597 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1598 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1599 { 1600 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1601 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1602 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1603 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1604 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1605 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1606 MPI_Comm comm = A->comm; 1607 MPI_Request *send_waits,*recv_waits; 1608 MPI_Status recv_status,*send_status; 1609 IS istmp; 1610 1611 PetscFunctionBegin; 1612 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1613 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1614 1615 /* first count number of contributors to each processor */ 1616 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 1617 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1618 procs = nprocs + size; 1619 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1620 for ( i=0; i<N; i++ ) { 1621 idx = rows[i]; 1622 found = 0; 1623 for ( j=0; j<size; j++ ) { 1624 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1625 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1626 } 1627 } 1628 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1629 } 1630 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1631 1632 /* inform other processors of number of messages and max length*/ 1633 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 1634 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1635 nrecvs = work[rank]; 1636 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1637 nmax = work[rank]; 1638 ierr = PetscFree(work);CHKERRQ(ierr); 1639 1640 /* post receives: */ 1641 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1642 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1643 for ( i=0; i<nrecvs; i++ ) { 1644 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1645 } 1646 1647 /* do sends: 1648 1) starts[i] gives the starting index in svalues for stuff going to 1649 the ith processor 1650 */ 1651 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 1652 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1653 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 1654 starts[0] = 0; 1655 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1656 for ( i=0; i<N; i++ ) { 1657 svalues[starts[owner[i]]++] = rows[i]; 1658 } 1659 ISRestoreIndices(is,&rows); 1660 1661 starts[0] = 0; 1662 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1663 count = 0; 1664 for ( i=0; i<size; i++ ) { 1665 if (procs[i]) { 1666 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1667 } 1668 } 1669 ierr = PetscFree(starts);CHKERRQ(ierr); 1670 1671 base = owners[rank]*bs; 1672 1673 /* wait on receives */ 1674 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 1675 source = lens + nrecvs; 1676 count = nrecvs; slen = 0; 1677 while (count) { 1678 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1679 /* unpack receives into our local space */ 1680 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1681 source[imdex] = recv_status.MPI_SOURCE; 1682 lens[imdex] = n; 1683 slen += n; 1684 count--; 1685 } 1686 ierr = PetscFree(recv_waits); CHKERRQ(ierr); 1687 1688 /* move the data into the send scatter */ 1689 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 1690 count = 0; 1691 for ( i=0; i<nrecvs; i++ ) { 1692 values = rvalues + i*nmax; 1693 for ( j=0; j<lens[i]; j++ ) { 1694 lrows[count++] = values[j] - base; 1695 } 1696 } 1697 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1698 ierr = PetscFree(lens);CHKERRQ(ierr); 1699 ierr = PetscFree(owner);CHKERRQ(ierr); 1700 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1701 1702 /* actually zap the local rows */ 1703 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1704 PLogObjectParent(A,istmp); 1705 1706 /* 1707 Zero the required rows. If the "diagonal block" of the matrix 1708 is square and the user wishes to set the diagonal we use seperate 1709 code so that MatSetValues() is not called for each diagonal allocating 1710 new memory, thus calling lots of mallocs and slowing things down. 1711 1712 Contributed by: Mathew Knepley 1713 */ 1714 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1715 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 1716 if (diag && (l->A->M == l->A->N)) { 1717 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 1718 } else if (diag) { 1719 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1720 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1721 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1722 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1723 } 1724 for ( i = 0; i < slen; i++ ) { 1725 row = lrows[i] + rstart_bs; 1726 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1727 } 1728 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1729 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1730 } else { 1731 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1732 } 1733 1734 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1735 ierr = PetscFree(lrows);CHKERRQ(ierr); 1736 1737 /* wait on sends */ 1738 if (nsends) { 1739 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1740 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1741 ierr = PetscFree(send_status);CHKERRQ(ierr); 1742 } 1743 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1744 ierr = PetscFree(svalues);CHKERRQ(ierr); 1745 1746 PetscFunctionReturn(0); 1747 } 1748 1749 #undef __FUNC__ 1750 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1751 int MatPrintHelp_MPIBAIJ(Mat A) 1752 { 1753 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1754 MPI_Comm comm = A->comm; 1755 static int called = 0; 1756 int ierr; 1757 1758 PetscFunctionBegin; 1759 if (!a->rank) { 1760 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1761 } 1762 if (called) {PetscFunctionReturn(0);} else called = 1; 1763 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1764 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1765 PetscFunctionReturn(0); 1766 } 1767 1768 #undef __FUNC__ 1769 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1770 int MatSetUnfactored_MPIBAIJ(Mat A) 1771 { 1772 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1773 int ierr; 1774 1775 PetscFunctionBegin; 1776 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 1780 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1781 1782 #undef __FUNC__ 1783 #define __FUNC__ "MatEqual_MPIBAIJ" 1784 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag) 1785 { 1786 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data; 1787 Mat a, b, c, d; 1788 PetscTruth flg; 1789 int ierr; 1790 1791 PetscFunctionBegin; 1792 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1793 a = matA->A; b = matA->B; 1794 c = matB->A; d = matB->B; 1795 1796 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1797 if (flg == PETSC_TRUE) { 1798 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1799 } 1800 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1801 PetscFunctionReturn(0); 1802 } 1803 1804 /* -------------------------------------------------------------------*/ 1805 static struct _MatOps MatOps_Values = { 1806 MatSetValues_MPIBAIJ, 1807 MatGetRow_MPIBAIJ, 1808 MatRestoreRow_MPIBAIJ, 1809 MatMult_MPIBAIJ, 1810 MatMultAdd_MPIBAIJ, 1811 MatMultTrans_MPIBAIJ, 1812 MatMultTransAdd_MPIBAIJ, 1813 0, 1814 0, 1815 0, 1816 0, 1817 0, 1818 0, 1819 0, 1820 MatTranspose_MPIBAIJ, 1821 MatGetInfo_MPIBAIJ, 1822 MatEqual_MPIBAIJ, 1823 MatGetDiagonal_MPIBAIJ, 1824 MatDiagonalScale_MPIBAIJ, 1825 MatNorm_MPIBAIJ, 1826 MatAssemblyBegin_MPIBAIJ, 1827 MatAssemblyEnd_MPIBAIJ, 1828 0, 1829 MatSetOption_MPIBAIJ, 1830 MatZeroEntries_MPIBAIJ, 1831 MatZeroRows_MPIBAIJ, 1832 0, 1833 0, 1834 0, 1835 0, 1836 MatGetSize_MPIBAIJ, 1837 MatGetLocalSize_MPIBAIJ, 1838 MatGetOwnershipRange_MPIBAIJ, 1839 0, 1840 0, 1841 0, 1842 0, 1843 MatDuplicate_MPIBAIJ, 1844 0, 1845 0, 1846 0, 1847 0, 1848 0, 1849 MatGetSubMatrices_MPIBAIJ, 1850 MatIncreaseOverlap_MPIBAIJ, 1851 MatGetValues_MPIBAIJ, 1852 0, 1853 MatPrintHelp_MPIBAIJ, 1854 MatScale_MPIBAIJ, 1855 0, 1856 0, 1857 0, 1858 MatGetBlockSize_MPIBAIJ, 1859 0, 1860 0, 1861 0, 1862 0, 1863 0, 1864 0, 1865 MatSetUnfactored_MPIBAIJ, 1866 0, 1867 MatSetValuesBlocked_MPIBAIJ, 1868 0, 1869 0, 1870 0, 1871 MatGetMaps_Petsc}; 1872 1873 1874 EXTERN_C_BEGIN 1875 #undef __FUNC__ 1876 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1877 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1878 { 1879 PetscFunctionBegin; 1880 *a = ((Mat_MPIBAIJ *)A->data)->A; 1881 *iscopy = PETSC_FALSE; 1882 PetscFunctionReturn(0); 1883 } 1884 EXTERN_C_END 1885 1886 #undef __FUNC__ 1887 #define __FUNC__ "MatCreateMPIBAIJ" 1888 /*@C 1889 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1890 (block compressed row). For good matrix assembly performance 1891 the user should preallocate the matrix storage by setting the parameters 1892 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1893 performance can be increased by more than a factor of 50. 1894 1895 Collective on MPI_Comm 1896 1897 Input Parameters: 1898 + comm - MPI communicator 1899 . bs - size of blockk 1900 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1901 This value should be the same as the local size used in creating the 1902 y vector for the matrix-vector product y = Ax. 1903 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1904 This value should be the same as the local size used in creating the 1905 x vector for the matrix-vector product y = Ax. 1906 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1907 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1908 . d_nz - number of block nonzeros per block row in diagonal portion of local 1909 submatrix (same for all local rows) 1910 . d_nnz - array containing the number of block nonzeros in the various block rows 1911 of the in diagonal portion of the local (possibly different for each block 1912 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1913 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1914 submatrix (same for all local rows). 1915 - o_nnz - array containing the number of nonzeros in the various block rows of the 1916 off-diagonal portion of the local submatrix (possibly different for 1917 each block row) or PETSC_NULL. 1918 1919 Output Parameter: 1920 . A - the matrix 1921 1922 Options Database Keys: 1923 . -mat_no_unroll - uses code that does not unroll the loops in the 1924 block calculations (much slower) 1925 . -mat_block_size - size of the blocks to use 1926 . -mat_mpi - use the parallel matrix data structures even on one processor 1927 (defaults to using SeqBAIJ format on one processor) 1928 1929 Notes: 1930 The user MUST specify either the local or global matrix dimensions 1931 (possibly both). 1932 1933 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1934 than it must be used on all processors that share the object for that argument. 1935 1936 Storage Information: 1937 For a square global matrix we define each processor's diagonal portion 1938 to be its local rows and the corresponding columns (a square submatrix); 1939 each processor's off-diagonal portion encompasses the remainder of the 1940 local matrix (a rectangular submatrix). 1941 1942 The user can specify preallocated storage for the diagonal part of 1943 the local submatrix with either d_nz or d_nnz (not both). Set 1944 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1945 memory allocation. Likewise, specify preallocated storage for the 1946 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1947 1948 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1949 the figure below we depict these three local rows and all columns (0-11). 1950 1951 .vb 1952 0 1 2 3 4 5 6 7 8 9 10 11 1953 ------------------- 1954 row 3 | o o o d d d o o o o o o 1955 row 4 | o o o d d d o o o o o o 1956 row 5 | o o o d d d o o o o o o 1957 ------------------- 1958 .ve 1959 1960 Thus, any entries in the d locations are stored in the d (diagonal) 1961 submatrix, and any entries in the o locations are stored in the 1962 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1963 stored simply in the MATSEQBAIJ format for compressed row storage. 1964 1965 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1966 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1967 In general, for PDE problems in which most nonzeros are near the diagonal, 1968 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1969 or you will get TERRIBLE performance; see the users' manual chapter on 1970 matrices. 1971 1972 Level: intermediate 1973 1974 .keywords: matrix, block, aij, compressed row, sparse, parallel 1975 1976 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1977 @*/ 1978 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1979 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1980 { 1981 Mat B; 1982 Mat_MPIBAIJ *b; 1983 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1984 int flag1 = 0,flag2 = 0; 1985 1986 PetscFunctionBegin; 1987 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1988 1989 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1990 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 1991 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 1992 if (d_nnz) { 1993 for (i=0; i<m/bs; i++) { 1994 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1995 } 1996 } 1997 if (o_nnz) { 1998 for (i=0; i<m/bs; i++) { 1999 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2000 } 2001 } 2002 2003 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2004 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 2005 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2006 if (!flag1 && !flag2 && size == 1) { 2007 if (M == PETSC_DECIDE) M = m; 2008 if (N == PETSC_DECIDE) N = n; 2009 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 2010 PetscFunctionReturn(0); 2011 } 2012 2013 *A = 0; 2014 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2015 PLogObjectCreate(B); 2016 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2017 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2018 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2019 2020 B->ops->destroy = MatDestroy_MPIBAIJ; 2021 B->ops->view = MatView_MPIBAIJ; 2022 B->mapping = 0; 2023 B->factor = 0; 2024 B->assembled = PETSC_FALSE; 2025 2026 B->insertmode = NOT_SET_VALUES; 2027 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2028 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2029 2030 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2031 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2032 } 2033 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2034 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2035 } 2036 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2037 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2038 } 2039 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2040 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2041 2042 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2043 work[0] = m; work[1] = n; 2044 mbs = m/bs; nbs = n/bs; 2045 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2046 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2047 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2048 } 2049 if (m == PETSC_DECIDE) { 2050 Mbs = M/bs; 2051 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2052 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2053 m = mbs*bs; 2054 } 2055 if (n == PETSC_DECIDE) { 2056 Nbs = N/bs; 2057 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2058 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2059 n = nbs*bs; 2060 } 2061 if (mbs*bs != m || nbs*bs != n) { 2062 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2063 } 2064 2065 b->m = m; B->m = m; 2066 b->n = n; B->n = n; 2067 b->N = N; B->N = N; 2068 b->M = M; B->M = M; 2069 b->bs = bs; 2070 b->bs2 = bs*bs; 2071 b->mbs = mbs; 2072 b->nbs = nbs; 2073 b->Mbs = Mbs; 2074 b->Nbs = Nbs; 2075 2076 /* the information in the maps duplicates the information computed below, eventually 2077 we should remove the duplicate information that is not contained in the maps */ 2078 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2079 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2080 2081 /* build local table of row and column ownerships */ 2082 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2083 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2084 b->cowners = b->rowners + b->size + 2; 2085 b->rowners_bs = b->cowners + b->size + 2; 2086 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2087 b->rowners[0] = 0; 2088 for ( i=2; i<=b->size; i++ ) { 2089 b->rowners[i] += b->rowners[i-1]; 2090 } 2091 for ( i=0; i<=b->size; i++ ) { 2092 b->rowners_bs[i] = b->rowners[i]*bs; 2093 } 2094 b->rstart = b->rowners[b->rank]; 2095 b->rend = b->rowners[b->rank+1]; 2096 b->rstart_bs = b->rstart * bs; 2097 b->rend_bs = b->rend * bs; 2098 2099 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2100 b->cowners[0] = 0; 2101 for ( i=2; i<=b->size; i++ ) { 2102 b->cowners[i] += b->cowners[i-1]; 2103 } 2104 b->cstart = b->cowners[b->rank]; 2105 b->cend = b->cowners[b->rank+1]; 2106 b->cstart_bs = b->cstart * bs; 2107 b->cend_bs = b->cend * bs; 2108 2109 2110 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2111 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2112 PLogObjectParent(B,b->A); 2113 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2114 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2115 PLogObjectParent(B,b->B); 2116 2117 /* build cache for off array entries formed */ 2118 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2119 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2120 b->donotstash = 0; 2121 b->colmap = 0; 2122 b->garray = 0; 2123 b->roworiented = 1; 2124 2125 /* stuff used in block assembly */ 2126 b->barray = 0; 2127 2128 /* stuff used for matrix vector multiply */ 2129 b->lvec = 0; 2130 b->Mvctx = 0; 2131 2132 /* stuff for MatGetRow() */ 2133 b->rowindices = 0; 2134 b->rowvalues = 0; 2135 b->getrowactive = PETSC_FALSE; 2136 2137 /* hash table stuff */ 2138 b->ht = 0; 2139 b->hd = 0; 2140 b->ht_size = 0; 2141 b->ht_flag = 0; 2142 b->ht_fact = 0; 2143 b->ht_total_ct = 0; 2144 b->ht_insert_ct = 0; 2145 2146 *A = B; 2147 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2148 if (flg) { 2149 double fact = 1.39; 2150 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2151 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr); 2152 if (fact <= 1.0) fact = 1.39; 2153 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2154 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2155 } 2156 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 2157 "MatStoreValues_MPIBAIJ", 2158 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2159 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 2160 "MatRetrieveValues_MPIBAIJ", 2161 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2162 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2163 "MatGetDiagonalBlock_MPIBAIJ", 2164 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNC__ 2169 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2170 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2171 { 2172 Mat mat; 2173 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2174 int ierr, len=0, flg; 2175 2176 PetscFunctionBegin; 2177 *newmat = 0; 2178 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2179 PLogObjectCreate(mat); 2180 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2181 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2182 mat->ops->destroy = MatDestroy_MPIBAIJ; 2183 mat->ops->view = MatView_MPIBAIJ; 2184 mat->factor = matin->factor; 2185 mat->assembled = PETSC_TRUE; 2186 mat->insertmode = NOT_SET_VALUES; 2187 2188 a->m = mat->m = oldmat->m; 2189 a->n = mat->n = oldmat->n; 2190 a->M = mat->M = oldmat->M; 2191 a->N = mat->N = oldmat->N; 2192 2193 a->bs = oldmat->bs; 2194 a->bs2 = oldmat->bs2; 2195 a->mbs = oldmat->mbs; 2196 a->nbs = oldmat->nbs; 2197 a->Mbs = oldmat->Mbs; 2198 a->Nbs = oldmat->Nbs; 2199 2200 a->rstart = oldmat->rstart; 2201 a->rend = oldmat->rend; 2202 a->cstart = oldmat->cstart; 2203 a->cend = oldmat->cend; 2204 a->size = oldmat->size; 2205 a->rank = oldmat->rank; 2206 a->donotstash = oldmat->donotstash; 2207 a->roworiented = oldmat->roworiented; 2208 a->rowindices = 0; 2209 a->rowvalues = 0; 2210 a->getrowactive = PETSC_FALSE; 2211 a->barray = 0; 2212 a->rstart_bs = oldmat->rstart_bs; 2213 a->rend_bs = oldmat->rend_bs; 2214 a->cstart_bs = oldmat->cstart_bs; 2215 a->cend_bs = oldmat->cend_bs; 2216 2217 /* hash table stuff */ 2218 a->ht = 0; 2219 a->hd = 0; 2220 a->ht_size = 0; 2221 a->ht_flag = oldmat->ht_flag; 2222 a->ht_fact = oldmat->ht_fact; 2223 a->ht_total_ct = 0; 2224 a->ht_insert_ct = 0; 2225 2226 2227 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2228 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2229 a->cowners = a->rowners + a->size + 2; 2230 a->rowners_bs = a->cowners + a->size + 2; 2231 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2232 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2233 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2234 if (oldmat->colmap) { 2235 #if defined (PETSC_USE_CTABLE) 2236 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2237 #else 2238 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2239 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2240 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2241 #endif 2242 } else a->colmap = 0; 2243 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2244 a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2245 PLogObjectMemory(mat,len*sizeof(int)); 2246 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2247 } else a->garray = 0; 2248 2249 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2250 PLogObjectParent(mat,a->lvec); 2251 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2252 2253 PLogObjectParent(mat,a->Mvctx); 2254 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2255 PLogObjectParent(mat,a->A); 2256 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2257 PLogObjectParent(mat,a->B); 2258 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2259 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2260 if (flg) { 2261 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2262 } 2263 *newmat = mat; 2264 PetscFunctionReturn(0); 2265 } 2266 2267 #include "sys.h" 2268 2269 #undef __FUNC__ 2270 #define __FUNC__ "MatLoad_MPIBAIJ" 2271 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2272 { 2273 Mat A; 2274 int i, nz, ierr, j,rstart, rend, fd; 2275 Scalar *vals,*buf; 2276 MPI_Comm comm = ((PetscObject)viewer)->comm; 2277 MPI_Status status; 2278 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2279 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2280 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2281 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2282 int dcount,kmax,k,nzcount,tmp; 2283 2284 PetscFunctionBegin; 2285 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2286 2287 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2288 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2289 if (!rank) { 2290 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2291 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2292 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2293 if (header[3] < 0) { 2294 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2295 } 2296 } 2297 2298 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2299 M = header[1]; N = header[2]; 2300 2301 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2302 2303 /* 2304 This code adds extra rows to make sure the number of rows is 2305 divisible by the blocksize 2306 */ 2307 Mbs = M/bs; 2308 extra_rows = bs - M + bs*(Mbs); 2309 if (extra_rows == bs) extra_rows = 0; 2310 else Mbs++; 2311 if (extra_rows &&!rank) { 2312 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2313 } 2314 2315 /* determine ownership of all rows */ 2316 mbs = Mbs/size + ((Mbs % size) > rank); 2317 m = mbs * bs; 2318 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2319 browners = rowners + size + 1; 2320 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2321 rowners[0] = 0; 2322 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2323 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2324 rstart = rowners[rank]; 2325 rend = rowners[rank+1]; 2326 2327 /* distribute row lengths to all processors */ 2328 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens); 2329 if (!rank) { 2330 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths); 2331 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2332 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2333 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2334 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2335 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2336 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2337 } else { 2338 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2339 } 2340 2341 if (!rank) { 2342 /* calculate the number of nonzeros on each processor */ 2343 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2344 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2345 for ( i=0; i<size; i++ ) { 2346 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2347 procsnz[i] += rowlengths[j]; 2348 } 2349 } 2350 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2351 2352 /* determine max buffer needed and allocate it */ 2353 maxnz = 0; 2354 for ( i=0; i<size; i++ ) { 2355 maxnz = PetscMax(maxnz,procsnz[i]); 2356 } 2357 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2358 2359 /* read in my part of the matrix column indices */ 2360 nz = procsnz[0]; 2361 ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2362 mycols = ibuf; 2363 if (size == 1) nz -= extra_rows; 2364 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2365 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2366 2367 /* read in every ones (except the last) and ship off */ 2368 for ( i=1; i<size-1; i++ ) { 2369 nz = procsnz[i]; 2370 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2371 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2372 } 2373 /* read in the stuff for the last proc */ 2374 if ( size != 1 ) { 2375 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2376 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2377 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2378 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2379 } 2380 ierr = PetscFree(cols);CHKERRQ(ierr); 2381 } else { 2382 /* determine buffer space needed for message */ 2383 nz = 0; 2384 for ( i=0; i<m; i++ ) { 2385 nz += locrowlens[i]; 2386 } 2387 ibuf = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2388 mycols = ibuf; 2389 /* receive message of column indices*/ 2390 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2391 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2392 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2393 } 2394 2395 /* loop over local rows, determining number of off diagonal entries */ 2396 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens); 2397 odlens = dlens + (rend-rstart); 2398 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask); 2399 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2400 masked1 = mask + Mbs; 2401 masked2 = masked1 + Mbs; 2402 rowcount = 0; nzcount = 0; 2403 for ( i=0; i<mbs; i++ ) { 2404 dcount = 0; 2405 odcount = 0; 2406 for ( j=0; j<bs; j++ ) { 2407 kmax = locrowlens[rowcount]; 2408 for ( k=0; k<kmax; k++ ) { 2409 tmp = mycols[nzcount++]/bs; 2410 if (!mask[tmp]) { 2411 mask[tmp] = 1; 2412 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2413 else masked1[dcount++] = tmp; 2414 } 2415 } 2416 rowcount++; 2417 } 2418 2419 dlens[i] = dcount; 2420 odlens[i] = odcount; 2421 2422 /* zero out the mask elements we set */ 2423 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2424 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2425 } 2426 2427 /* create our matrix */ 2428 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2429 A = *newmat; 2430 MatSetOption(A,MAT_COLUMNS_SORTED); 2431 2432 if (!rank) { 2433 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf); 2434 /* read in my part of the matrix numerical values */ 2435 nz = procsnz[0]; 2436 vals = buf; 2437 mycols = ibuf; 2438 if (size == 1) nz -= extra_rows; 2439 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2440 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2441 2442 /* insert into matrix */ 2443 jj = rstart*bs; 2444 for ( i=0; i<m; i++ ) { 2445 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2446 mycols += locrowlens[i]; 2447 vals += locrowlens[i]; 2448 jj++; 2449 } 2450 /* read in other processors (except the last one) and ship out */ 2451 for ( i=1; i<size-1; i++ ) { 2452 nz = procsnz[i]; 2453 vals = buf; 2454 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2455 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2456 } 2457 /* the last proc */ 2458 if ( size != 1 ){ 2459 nz = procsnz[i] - extra_rows; 2460 vals = buf; 2461 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2462 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2463 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2464 } 2465 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2466 } else { 2467 /* receive numeric values */ 2468 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf); 2469 2470 /* receive message of values*/ 2471 vals = buf; 2472 mycols = ibuf; 2473 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2474 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2475 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2476 2477 /* insert into matrix */ 2478 jj = rstart*bs; 2479 for ( i=0; i<m; i++ ) { 2480 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2481 mycols += locrowlens[i]; 2482 vals += locrowlens[i]; 2483 jj++; 2484 } 2485 } 2486 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2487 ierr = PetscFree(buf);CHKERRQ(ierr); 2488 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2489 ierr = PetscFree(rowners);CHKERRQ(ierr); 2490 ierr = PetscFree(dlens);CHKERRQ(ierr); 2491 ierr = PetscFree(mask);CHKERRQ(ierr); 2492 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2493 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2494 PetscFunctionReturn(0); 2495 } 2496 2497 #undef __FUNC__ 2498 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2499 /*@ 2500 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2501 2502 Input Parameters: 2503 . mat - the matrix 2504 . fact - factor 2505 2506 Collective on Mat 2507 2508 Level: advanced 2509 2510 Notes: 2511 This can also be set by the command line option: -mat_use_hash_table fact 2512 2513 .keywords: matrix, hashtable, factor, HT 2514 2515 .seealso: MatSetOption() 2516 @*/ 2517 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2518 { 2519 Mat_MPIBAIJ *baij; 2520 2521 PetscFunctionBegin; 2522 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2523 if (mat->type != MATMPIBAIJ) { 2524 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2525 } 2526 baij = (Mat_MPIBAIJ*) mat->data; 2527 baij->ht_fact = fact; 2528 PetscFunctionReturn(0); 2529 } 2530