1 /*$Id: mpisbaij.c,v 1.30 2000/10/17 16:59:32 hzhang Exp bsmith $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int DisAssemble_MPISBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqSBAIJ(Mat); 18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*); 19 extern int MatZeroRows_SeqBAIJ(Mat,IS,Scalar *); 20 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 21 22 /* UGLY, ugly, ugly 23 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 24 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 25 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 26 converts the entries into single precision and then calls ..._MatScalar() to put them 27 into the single precision data structures. 28 */ 29 #if defined(PETSC_USE_MAT_SINGLE) 30 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 32 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 35 #else 36 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 37 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 38 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 39 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 40 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 41 #endif 42 43 EXTERN_C_BEGIN 44 #undef __FUNC__ 45 #define __FUNC__ /*<a name="MatStoreValues_MPISBAIJ"></a>*/"MatStoreValues_MPISBAIJ" 46 int MatStoreValues_MPISBAIJ(Mat mat) 47 { 48 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 49 int ierr; 50 51 PetscFunctionBegin; 52 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 53 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 EXTERN_C_END 57 58 EXTERN_C_BEGIN 59 #undef __FUNC__ 60 #define __FUNC__ /*<a name="MatRetrieveValues_MPISBAIJ"></a>*/"MatRetrieveValues_MPISBAIJ" 61 int MatRetrieveValues_MPISBAIJ(Mat mat) 62 { 63 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 64 int ierr; 65 66 PetscFunctionBegin; 67 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 68 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 69 PetscFunctionReturn(0); 70 } 71 EXTERN_C_END 72 73 /* 74 Local utility routine that creates a mapping from the global column 75 number to the local number in the off-diagonal part of the local 76 storage of the matrix. This is done in a non scable way since the 77 length of colmap equals the global matrix length. 78 */ 79 #undef __FUNC__ 80 #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private" 81 static int CreateColmap_MPISBAIJ_Private(Mat mat) 82 { 83 PetscFunctionBegin; 84 SETERRQ(1,"Function not yet written for SBAIJ format"); 85 /* PetscFunctionReturn(0); */ 86 } 87 88 #define CHUNKSIZE 10 89 90 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 91 { \ 92 \ 93 brow = row/bs; \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow]; nrow = ailen[brow]; \ 96 bcol = col/bs; \ 97 ridx = row % bs; cidx = col % bs; \ 98 low = 0; high = nrow; \ 99 while (high-low > 3) { \ 100 t = (low+high)/2; \ 101 if (rp[t] > bcol) high = t; \ 102 else low = t; \ 103 } \ 104 for (_i=low; _i<high; _i++) { \ 105 if (rp[_i] > bcol) break; \ 106 if (rp[_i] == bcol) { \ 107 bap = ap + bs2*_i + bs*cidx + ridx; \ 108 if (addv == ADD_VALUES) *bap += value; \ 109 else *bap = value; \ 110 goto a_noinsert; \ 111 } \ 112 } \ 113 if (a->nonew == 1) goto a_noinsert; \ 114 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 115 if (nrow >= rmax) { \ 116 /* there is no extra room in row, therefore enlarge */ \ 117 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 118 MatScalar *new_a; \ 119 \ 120 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 121 \ 122 /* malloc new storage space */ \ 123 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 124 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 125 new_j = (int*)(new_a + bs2*new_nz); \ 126 new_i = new_j + new_nz; \ 127 \ 128 /* copy over old data into new slots */ \ 129 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 130 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 131 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 132 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 133 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 134 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 135 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 136 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 137 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 138 /* free up old matrix storage */ \ 139 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 140 if (!a->singlemalloc) { \ 141 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 142 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 143 } \ 144 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 145 a->singlemalloc = PETSC_TRUE; \ 146 \ 147 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 148 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 149 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 150 a->s_maxnz += bs2*CHUNKSIZE; \ 151 a->reallocs++; \ 152 a->s_nz++; \ 153 } \ 154 N = nrow++ - 1; \ 155 /* shift up all the later entries in this row */ \ 156 for (ii=N; ii>=_i; ii--) { \ 157 rp[ii+1] = rp[ii]; \ 158 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 159 } \ 160 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 161 rp[_i] = bcol; \ 162 ap[bs2*_i + bs*cidx + ridx] = value; \ 163 a_noinsert:; \ 164 ailen[brow] = nrow; \ 165 } 166 #ifndef MatSetValues_SeqBAIJ_B_Private 167 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 168 { \ 169 brow = row/bs; \ 170 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 171 rmax = bimax[brow]; nrow = bilen[brow]; \ 172 bcol = col/bs; \ 173 ridx = row % bs; cidx = col % bs; \ 174 low = 0; high = nrow; \ 175 while (high-low > 3) { \ 176 t = (low+high)/2; \ 177 if (rp[t] > bcol) high = t; \ 178 else low = t; \ 179 } \ 180 for (_i=low; _i<high; _i++) { \ 181 if (rp[_i] > bcol) break; \ 182 if (rp[_i] == bcol) { \ 183 bap = ap + bs2*_i + bs*cidx + ridx; \ 184 if (addv == ADD_VALUES) *bap += value; \ 185 else *bap = value; \ 186 goto b_noinsert; \ 187 } \ 188 } \ 189 if (b->nonew == 1) goto b_noinsert; \ 190 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 191 if (nrow >= rmax) { \ 192 /* there is no extra room in row, therefore enlarge */ \ 193 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 194 MatScalar *new_a; \ 195 \ 196 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 197 \ 198 /* malloc new storage space */ \ 199 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 200 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 201 new_j = (int*)(new_a + bs2*new_nz); \ 202 new_i = new_j + new_nz; \ 203 \ 204 /* copy over old data into new slots */ \ 205 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 206 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 207 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 208 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 209 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 210 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 211 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 212 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 213 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 214 /* free up old matrix storage */ \ 215 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 216 if (!b->singlemalloc) { \ 217 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 218 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 219 } \ 220 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 221 b->singlemalloc = PETSC_TRUE; \ 222 \ 223 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 224 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 225 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 226 b->maxnz += bs2*CHUNKSIZE; \ 227 b->reallocs++; \ 228 b->nz++; \ 229 } \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 for (ii=N; ii>=_i; ii--) { \ 233 rp[ii+1] = rp[ii]; \ 234 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 235 } \ 236 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 237 rp[_i] = bcol; \ 238 ap[bs2*_i + bs*cidx + ridx] = value; \ 239 b_noinsert:; \ 240 bilen[brow] = nrow; \ 241 } 242 #endif 243 244 #if defined(PETSC_USE_MAT_SINGLE) 245 #undef __FUNC__ 246 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ" 247 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 248 { 249 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 250 int ierr,i,N = m*n; 251 MatScalar *vsingle; 252 253 PetscFunctionBegin; 254 if (N > b->setvalueslen) { 255 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 256 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 257 b->setvalueslen = N; 258 } 259 vsingle = b->setvaluescopy; 260 261 for (i=0; i<N; i++) { 262 vsingle[i] = v[i]; 263 } 264 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNC__ 269 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 270 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 271 { 272 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 273 int ierr,i,N = m*n*b->bs2; 274 MatScalar *vsingle; 275 276 PetscFunctionBegin; 277 if (N > b->setvalueslen) { 278 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 279 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 280 b->setvalueslen = N; 281 } 282 vsingle = b->setvaluescopy; 283 for (i=0; i<N; i++) { 284 vsingle[i] = v[i]; 285 } 286 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNC__ 291 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT" 292 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 293 { 294 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 295 int ierr,i,N = m*n; 296 MatScalar *vsingle; 297 298 PetscFunctionBegin; 299 SETERRQ(1,"Function not yet written for SBAIJ format"); 300 /* PetscFunctionReturn(0); */ 301 } 302 303 #undef __FUNC__ 304 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT" 305 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 306 { 307 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 308 int ierr,i,N = m*n*b->bs2; 309 MatScalar *vsingle; 310 311 PetscFunctionBegin; 312 SETERRQ(1,"Function not yet written for SBAIJ format"); 313 /* PetscFunctionReturn(0); */ 314 } 315 #endif 316 317 /* Only add/insert a(i,j) with i<=j (blocks). 318 Any a(i,j) with i>j input by user is ingored. 319 */ 320 #undef __FUNC__ 321 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 322 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 323 { 324 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 325 MatScalar value; 326 PetscTruth roworiented = baij->roworiented; 327 int ierr,i,j,row,col; 328 int rstart_orig=baij->rstart_bs; 329 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 330 int cend_orig=baij->cend_bs,bs=baij->bs; 331 332 /* Some Variables required in the macro */ 333 Mat A = baij->A; 334 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 335 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 336 MatScalar *aa=a->a; 337 338 Mat B = baij->B; 339 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 340 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 341 MatScalar *ba=b->a; 342 343 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 344 int low,high,t,ridx,cidx,bs2=a->bs2; 345 MatScalar *ap,*bap; 346 347 /* for stash */ 348 int n_loc, *in_loc=0; 349 MatScalar *v_loc=0; 350 351 PetscFunctionBegin; 352 353 if(!baij->donotstash){ 354 in_loc = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(in_loc); 355 v_loc = (MatScalar*)PetscMalloc(n*sizeof(MatScalar));CHKPTRQ(v_loc); 356 } 357 358 for (i=0; i<m; i++) { 359 if (im[i] < 0) continue; 360 #if defined(PETSC_USE_BOPT_g) 361 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 362 #endif 363 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 364 row = im[i] - rstart_orig; /* local row index */ 365 for (j=0; j<n; j++) { 366 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 367 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 368 col = in[j] - cstart_orig; /* local col index */ 369 brow = row/bs; bcol = col/bs; 370 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 371 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 372 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 373 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 374 } else if (in[j] < 0) continue; 375 #if defined(PETSC_USE_BOPT_g) 376 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 377 #endif 378 else { /* off-diag entry (B) */ 379 if (mat->was_assembled) { 380 if (!baij->colmap) { 381 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 382 } 383 #if defined (PETSC_USE_CTABLE) 384 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 385 col = col - 1 + in[j]%bs; 386 #else 387 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 388 #endif 389 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 390 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 391 col = in[j]; 392 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 393 B = baij->B; 394 b = (Mat_SeqBAIJ*)(B)->data; 395 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 396 ba=b->a; 397 } 398 } else col = in[j]; 399 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 400 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 401 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 402 } 403 } 404 } else { /* off processor entry */ 405 if (!baij->donotstash) { 406 n_loc = 0; 407 for (j=0; j<n; j++){ 408 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 409 in_loc[n_loc] = in[j]; 410 if (roworiented) { 411 v_loc[n_loc] = v[i*n+j]; 412 } else { 413 v_loc[n_loc] = v[j*m+i]; 414 } 415 n_loc++; 416 } 417 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 418 } 419 } 420 } 421 422 if(!baij->donotstash){ 423 ierr = PetscFree(in_loc);CHKERRQ(ierr); 424 ierr = PetscFree(v_loc);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNC__ 430 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 431 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 432 { 433 PetscFunctionBegin; 434 SETERRQ(1,"Function not yet written for SBAIJ format"); 435 /* PetscFunctionReturn(0); */ 436 } 437 438 #define HASH_KEY 0.6180339887 439 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 440 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 441 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 442 #undef __FUNC__ 443 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar" 444 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 445 { 446 PetscFunctionBegin; 447 SETERRQ(1,"Function not yet written for SBAIJ format"); 448 /* PetscFunctionReturn(0); */ 449 } 450 451 #undef __FUNC__ 452 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 453 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 454 { 455 PetscFunctionBegin; 456 SETERRQ(1,"Function not yet written for SBAIJ format"); 457 /* PetscFunctionReturn(0); */ 458 } 459 460 #undef __FUNC__ 461 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ" 462 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 463 { 464 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 465 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 466 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 467 468 PetscFunctionBegin; 469 for (i=0; i<m; i++) { 470 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 471 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 472 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 473 row = idxm[i] - bsrstart; 474 for (j=0; j<n; j++) { 475 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 476 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 477 if (idxn[j] >= bscstart && idxn[j] < bscend){ 478 col = idxn[j] - bscstart; 479 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 480 } else { 481 if (!baij->colmap) { 482 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 483 } 484 #if defined (PETSC_USE_CTABLE) 485 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 486 data --; 487 #else 488 data = baij->colmap[idxn[j]/bs]-1; 489 #endif 490 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 491 else { 492 col = data + idxn[j]%bs; 493 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 494 } 495 } 496 } 497 } else { 498 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 499 } 500 } 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNC__ 505 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ" 506 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 507 { 508 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 509 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 510 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 511 int ierr; 512 PetscReal sum[2],*lnorm2; 513 514 PetscFunctionBegin; 515 if (baij->size == 1) { 516 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 517 } else { 518 if (type == NORM_FROBENIUS) { 519 lnorm2 = (double*)PetscMalloc(2*sizeof(double));CHKPTRQ(lnorm2); 520 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 521 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 522 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 523 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 524 /* 525 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 526 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 527 */ 528 ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 529 /* 530 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 531 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 532 533 *norm = sqrt(sum[0] + 2*sum[1]); 534 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 535 } else { 536 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 537 } 538 } 539 PetscFunctionReturn(0); 540 } 541 542 /* 543 Creates the hash table, and sets the table 544 This table is created only once. 545 If new entried need to be added to the matrix 546 then the hash table has to be destroyed and 547 recreated. 548 */ 549 #undef __FUNC__ 550 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private" 551 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 552 { 553 PetscFunctionBegin; 554 SETERRQ(1,"Function not yet written for SBAIJ format"); 555 /* PetscFunctionReturn(0); */ 556 } 557 558 #undef __FUNC__ 559 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ" 560 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 561 { 562 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 563 int ierr,nstash,reallocs; 564 InsertMode addv; 565 566 PetscFunctionBegin; 567 if (baij->donotstash) { 568 PetscFunctionReturn(0); 569 } 570 571 /* make sure all processors are either in INSERTMODE or ADDMODE */ 572 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 573 if (addv == (ADD_VALUES|INSERT_VALUES)) { 574 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 575 } 576 mat->insertmode = addv; /* in case this processor had no cache */ 577 578 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 579 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 580 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 581 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 582 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 583 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNC__ 588 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ" 589 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 590 { 591 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 592 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 593 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 594 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 595 int *row,*col,other_disassembled; 596 PetscTruth r1,r2,r3; 597 MatScalar *val; 598 InsertMode addv = mat->insertmode; 599 int rank; 600 601 PetscFunctionBegin; 602 /* remove 2 line below later */ 603 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 604 605 if (!baij->donotstash) { 606 while (1) { 607 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 608 /* 609 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 610 PetscSynchronizedFlush(PETSC_COMM_WORLD); 611 */ 612 if (!flg) break; 613 614 for (i=0; i<n;) { 615 /* Now identify the consecutive vals belonging to the same row */ 616 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 617 if (j < n) ncols = j-i; 618 else ncols = n-i; 619 /* Now assemble all these values with a single function call */ 620 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 621 i = j; 622 } 623 } 624 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 625 /* Now process the block-stash. Since the values are stashed column-oriented, 626 set the roworiented flag to column oriented, and after MatSetValues() 627 restore the original flags */ 628 r1 = baij->roworiented; 629 r2 = a->roworiented; 630 r3 = b->roworiented; 631 baij->roworiented = PETSC_FALSE; 632 a->roworiented = PETSC_FALSE; 633 b->roworiented = PETSC_FALSE; 634 while (1) { 635 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 636 if (!flg) break; 637 638 for (i=0; i<n;) { 639 /* Now identify the consecutive vals belonging to the same row */ 640 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 641 if (j < n) ncols = j-i; 642 else ncols = n-i; 643 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 644 i = j; 645 } 646 } 647 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 648 baij->roworiented = r1; 649 a->roworiented = r2; 650 b->roworiented = r3; 651 } 652 653 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 654 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 655 656 /* determine if any processor has disassembled, if so we must 657 also disassemble ourselfs, in order that we may reassemble. */ 658 /* 659 if nonzero structure of submatrix B cannot change then we know that 660 no processor disassembled thus we can skip this stuff 661 */ 662 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 663 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 664 if (mat->was_assembled && !other_disassembled) { 665 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 666 } 667 } 668 669 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 670 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); 671 } 672 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 673 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 674 675 #if defined(PETSC_USE_BOPT_g) 676 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 677 PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 678 baij->ht_total_ct = 0; 679 baij->ht_insert_ct = 0; 680 } 681 #endif 682 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 683 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 684 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 685 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 686 } 687 688 if (baij->rowvalues) { 689 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 690 baij->rowvalues = 0; 691 } 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNC__ 696 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket" 697 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 698 { 699 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 700 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 701 PetscTruth isascii,isdraw; 702 Viewer sviewer; 703 704 PetscFunctionBegin; 705 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 706 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 707 if (isascii) { 708 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 709 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 710 MatInfo info; 711 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 712 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 713 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 714 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 715 baij->bs,(int)info.memory);CHKERRQ(ierr); 716 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 717 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 718 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 719 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 720 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 721 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 724 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 } 728 729 if (isdraw) { 730 Draw draw; 731 PetscTruth isnull; 732 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 733 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 734 } 735 736 if (size == 1) { 737 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 738 } else { 739 /* assemble the entire matrix onto first processor. */ 740 Mat A; 741 Mat_SeqSBAIJ *Aloc; 742 Mat_SeqBAIJ *Bloc; 743 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 744 MatScalar *a; 745 746 if (!rank) { 747 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 748 } else { 749 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 750 } 751 PLogObjectParent(mat,A); 752 753 /* copy over the A part */ 754 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 755 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 756 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 757 758 for (i=0; i<mbs; i++) { 759 rvals[0] = bs*(baij->rstart + i); 760 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 761 for (j=ai[i]; j<ai[i+1]; j++) { 762 col = (baij->cstart+aj[j])*bs; 763 for (k=0; k<bs; k++) { 764 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 765 col++; a += bs; 766 } 767 } 768 } 769 /* copy over the B part */ 770 Bloc = (Mat_SeqBAIJ*)baij->B->data; 771 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 772 for (i=0; i<mbs; i++) { 773 rvals[0] = bs*(baij->rstart + i); 774 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 775 for (j=ai[i]; j<ai[i+1]; j++) { 776 col = baij->garray[aj[j]]*bs; 777 for (k=0; k<bs; k++) { 778 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 779 col++; a += bs; 780 } 781 } 782 } 783 ierr = PetscFree(rvals);CHKERRQ(ierr); 784 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 785 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 786 /* 787 Everyone has to call to draw the matrix since the graphics waits are 788 synchronized across all processors that share the Draw object 789 */ 790 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 791 if (!rank) { 792 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 793 } 794 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 795 ierr = MatDestroy(A);CHKERRQ(ierr); 796 } 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNC__ 801 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ" 802 int MatView_MPISBAIJ(Mat mat,Viewer viewer) 803 { 804 int ierr; 805 PetscTruth isascii,isdraw,issocket,isbinary; 806 807 PetscFunctionBegin; 808 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 809 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 810 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 811 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 812 if (isascii || isdraw || issocket || isbinary) { 813 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 814 } else { 815 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNC__ 821 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ" 822 int MatDestroy_MPISBAIJ(Mat mat) 823 { 824 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 825 int ierr; 826 827 PetscFunctionBegin; 828 829 if (mat->mapping) { 830 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 831 } 832 if (mat->bmapping) { 833 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 834 } 835 if (mat->rmap) { 836 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 837 } 838 if (mat->cmap) { 839 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 840 } 841 #if defined(PETSC_USE_LOG) 842 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 843 #endif 844 845 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 846 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 847 848 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 849 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 850 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 851 #if defined (PETSC_USE_CTABLE) 852 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 853 #else 854 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 855 #endif 856 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 857 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 858 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 859 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 860 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 861 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 862 #if defined(PETSC_USE_MAT_SINGLE) 863 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 864 #endif 865 ierr = PetscFree(baij);CHKERRQ(ierr); 866 PLogObjectDestroy(mat); 867 PetscHeaderDestroy(mat); 868 PetscFunctionReturn(0); 869 } 870 871 #undef __FUNC__ 872 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ" 873 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 874 { 875 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 876 int ierr,nt; 877 878 PetscFunctionBegin; 879 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 880 if (nt != A->n) { 881 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 882 } 883 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 884 if (nt != A->m) { 885 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 886 } 887 888 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 889 /* do diagonal part */ 890 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 891 /* do supperdiagonal part */ 892 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 893 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 894 /* do subdiagonal part */ 895 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 896 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 897 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 898 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNC__ 903 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ" 904 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 905 { 906 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 907 int ierr; 908 909 PetscFunctionBegin; 910 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 911 /* do diagonal part */ 912 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 913 /* do supperdiagonal part */ 914 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 915 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 916 917 /* do subdiagonal part */ 918 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 919 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 920 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 921 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNC__ 926 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ" 927 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 928 { 929 PetscFunctionBegin; 930 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 931 /* PetscFunctionReturn(0); */ 932 } 933 934 #undef __FUNC__ 935 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ" 936 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 937 { 938 PetscFunctionBegin; 939 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 940 /* PetscFunctionReturn(0); */ 941 } 942 943 /* 944 This only works correctly for square matrices where the subblock A->A is the 945 diagonal block 946 */ 947 #undef __FUNC__ 948 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ" 949 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 950 { 951 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 952 int ierr; 953 954 PetscFunctionBegin; 955 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 956 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 957 PetscFunctionReturn(0); 958 } 959 960 #undef __FUNC__ 961 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ" 962 int MatScale_MPISBAIJ(Scalar *aa,Mat A) 963 { 964 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 965 int ierr; 966 967 PetscFunctionBegin; 968 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 969 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 973 #undef __FUNC__ 974 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ" 975 int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n) 976 { 977 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 978 979 PetscFunctionBegin; 980 if (m) *m = mat->rstart*mat->bs; 981 if (n) *n = mat->rend*mat->bs; 982 PetscFunctionReturn(0); 983 } 984 985 #undef __FUNC__ 986 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ" 987 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 988 { 989 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 990 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 991 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 992 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 993 int *cmap,*idx_p,cstart = mat->cstart; 994 995 PetscFunctionBegin; 996 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 997 mat->getrowactive = PETSC_TRUE; 998 999 if (!mat->rowvalues && (idx || v)) { 1000 /* 1001 allocate enough space to hold information from the longest row. 1002 */ 1003 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1004 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1005 int max = 1,mbs = mat->mbs,tmp; 1006 for (i=0; i<mbs; i++) { 1007 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1008 if (max < tmp) { max = tmp; } 1009 } 1010 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1011 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1012 } 1013 1014 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1015 lrow = row - brstart; /* local row index */ 1016 1017 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1018 if (!v) {pvA = 0; pvB = 0;} 1019 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1020 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1021 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1022 nztot = nzA + nzB; 1023 1024 cmap = mat->garray; 1025 if (v || idx) { 1026 if (nztot) { 1027 /* Sort by increasing column numbers, assuming A and B already sorted */ 1028 int imark = -1; 1029 if (v) { 1030 *v = v_p = mat->rowvalues; 1031 for (i=0; i<nzB; i++) { 1032 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1033 else break; 1034 } 1035 imark = i; 1036 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1037 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1038 } 1039 if (idx) { 1040 *idx = idx_p = mat->rowindices; 1041 if (imark > -1) { 1042 for (i=0; i<imark; i++) { 1043 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1044 } 1045 } else { 1046 for (i=0; i<nzB; i++) { 1047 if (cmap[cworkB[i]/bs] < cstart) 1048 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1049 else break; 1050 } 1051 imark = i; 1052 } 1053 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1054 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1055 } 1056 } else { 1057 if (idx) *idx = 0; 1058 if (v) *v = 0; 1059 } 1060 } 1061 *nz = nztot; 1062 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1063 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNC__ 1068 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ" 1069 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1070 { 1071 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1072 1073 PetscFunctionBegin; 1074 if (baij->getrowactive == PETSC_FALSE) { 1075 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1076 } 1077 baij->getrowactive = PETSC_FALSE; 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNC__ 1082 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ" 1083 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1084 { 1085 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1086 1087 PetscFunctionBegin; 1088 *bs = baij->bs; 1089 PetscFunctionReturn(0); 1090 } 1091 1092 #undef __FUNC__ 1093 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ" 1094 int MatZeroEntries_MPISBAIJ(Mat A) 1095 { 1096 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1097 int ierr; 1098 1099 PetscFunctionBegin; 1100 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1101 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNC__ 1106 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ" 1107 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1108 { 1109 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1110 Mat A = a->A,B = a->B; 1111 int ierr; 1112 PetscReal isend[5],irecv[5]; 1113 1114 PetscFunctionBegin; 1115 info->block_size = (double)a->bs; 1116 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1117 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1118 isend[3] = info->memory; isend[4] = info->mallocs; 1119 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1120 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1121 isend[3] += info->memory; isend[4] += info->mallocs; 1122 if (flag == MAT_LOCAL) { 1123 info->nz_used = isend[0]; 1124 info->nz_allocated = isend[1]; 1125 info->nz_unneeded = isend[2]; 1126 info->memory = isend[3]; 1127 info->mallocs = isend[4]; 1128 } else if (flag == MAT_GLOBAL_MAX) { 1129 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1130 info->nz_used = irecv[0]; 1131 info->nz_allocated = irecv[1]; 1132 info->nz_unneeded = irecv[2]; 1133 info->memory = irecv[3]; 1134 info->mallocs = irecv[4]; 1135 } else if (flag == MAT_GLOBAL_SUM) { 1136 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1137 info->nz_used = irecv[0]; 1138 info->nz_allocated = irecv[1]; 1139 info->nz_unneeded = irecv[2]; 1140 info->memory = irecv[3]; 1141 info->mallocs = irecv[4]; 1142 } else { 1143 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1144 } 1145 info->rows_global = (double)A->M; 1146 info->columns_global = (double)A->N; 1147 info->rows_local = (double)A->m; 1148 info->columns_local = (double)A->N; 1149 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1150 info->fill_ratio_needed = 0; 1151 info->factor_mallocs = 0; 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNC__ 1156 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ" 1157 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1158 { 1159 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1160 int ierr; 1161 1162 PetscFunctionBegin; 1163 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1164 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1165 op == MAT_COLUMNS_UNSORTED || 1166 op == MAT_COLUMNS_SORTED || 1167 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1168 op == MAT_KEEP_ZEROED_ROWS || 1169 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1170 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1171 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1172 } else if (op == MAT_ROW_ORIENTED) { 1173 a->roworiented = PETSC_TRUE; 1174 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1175 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1176 } else if (op == MAT_ROWS_SORTED || 1177 op == MAT_ROWS_UNSORTED || 1178 op == MAT_SYMMETRIC || 1179 op == MAT_STRUCTURALLY_SYMMETRIC || 1180 op == MAT_YES_NEW_DIAGONALS || 1181 op == MAT_USE_HASH_TABLE) { 1182 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1183 } else if (op == MAT_COLUMN_ORIENTED) { 1184 a->roworiented = PETSC_FALSE; 1185 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1186 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1187 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1188 a->donotstash = PETSC_TRUE; 1189 } else if (op == MAT_NO_NEW_DIAGONALS) { 1190 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1191 } else if (op == MAT_USE_HASH_TABLE) { 1192 a->ht_flag = PETSC_TRUE; 1193 } else { 1194 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1195 } 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNC__ 1200 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ(" 1201 int MatTranspose_MPISBAIJ(Mat A,Mat *matout) 1202 { 1203 PetscFunctionBegin; 1204 SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called"); 1205 /* PetscFunctionReturn(0); */ 1206 } 1207 1208 #undef __FUNC__ 1209 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ" 1210 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1211 { 1212 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1213 Mat a = baij->A,b = baij->B; 1214 int ierr,s1,s2,s3; 1215 1216 PetscFunctionBegin; 1217 if (ll != rr) { 1218 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1219 } 1220 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1221 if (rr) { 1222 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1223 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1224 /* Overlap communication with computation. */ 1225 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1226 /*} if (ll) { */ 1227 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1228 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1229 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1230 /* } */ 1231 /* scale the diagonal block */ 1232 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1233 1234 /* if (rr) { */ 1235 /* Do a scatter end and then right scale the off-diagonal block */ 1236 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1237 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1238 } 1239 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNC__ 1244 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ" 1245 int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag) 1246 { 1247 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1248 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1249 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1250 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1251 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1252 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1253 MPI_Comm comm = A->comm; 1254 MPI_Request *send_waits,*recv_waits; 1255 MPI_Status recv_status,*send_status; 1256 IS istmp; 1257 1258 PetscFunctionBegin; 1259 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1260 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1261 1262 /* first count number of contributors to each processor */ 1263 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1264 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1265 procs = nprocs + size; 1266 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1267 for (i=0; i<N; i++) { 1268 idx = rows[i]; 1269 found = 0; 1270 for (j=0; j<size; j++) { 1271 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1272 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1273 } 1274 } 1275 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1276 } 1277 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1278 1279 /* inform other processors of number of messages and max length*/ 1280 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1281 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1282 nmax = work[rank]; 1283 nrecvs = work[size+rank]; 1284 ierr = PetscFree(work);CHKERRQ(ierr); 1285 1286 /* post receives: */ 1287 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1288 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1289 for (i=0; i<nrecvs; i++) { 1290 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1291 } 1292 1293 /* do sends: 1294 1) starts[i] gives the starting index in svalues for stuff going to 1295 the ith processor 1296 */ 1297 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1298 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1299 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1300 starts[0] = 0; 1301 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1302 for (i=0; i<N; i++) { 1303 svalues[starts[owner[i]]++] = rows[i]; 1304 } 1305 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1306 1307 starts[0] = 0; 1308 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1309 count = 0; 1310 for (i=0; i<size; i++) { 1311 if (procs[i]) { 1312 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1313 } 1314 } 1315 ierr = PetscFree(starts);CHKERRQ(ierr); 1316 1317 base = owners[rank]*bs; 1318 1319 /* wait on receives */ 1320 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1321 source = lens + nrecvs; 1322 count = nrecvs; slen = 0; 1323 while (count) { 1324 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1325 /* unpack receives into our local space */ 1326 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1327 source[imdex] = recv_status.MPI_SOURCE; 1328 lens[imdex] = n; 1329 slen += n; 1330 count--; 1331 } 1332 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1333 1334 /* move the data into the send scatter */ 1335 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1336 count = 0; 1337 for (i=0; i<nrecvs; i++) { 1338 values = rvalues + i*nmax; 1339 for (j=0; j<lens[i]; j++) { 1340 lrows[count++] = values[j] - base; 1341 } 1342 } 1343 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1344 ierr = PetscFree(lens);CHKERRQ(ierr); 1345 ierr = PetscFree(owner);CHKERRQ(ierr); 1346 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1347 1348 /* actually zap the local rows */ 1349 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1350 PLogObjectParent(A,istmp); 1351 1352 /* 1353 Zero the required rows. If the "diagonal block" of the matrix 1354 is square and the user wishes to set the diagonal we use seperate 1355 code so that MatSetValues() is not called for each diagonal allocating 1356 new memory, thus calling lots of mallocs and slowing things down. 1357 1358 Contributed by: Mathew Knepley 1359 */ 1360 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1361 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1362 if (diag && (l->A->M == l->A->N)) { 1363 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1364 } else if (diag) { 1365 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1366 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1367 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1368 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1369 } 1370 for (i=0; i<slen; i++) { 1371 row = lrows[i] + rstart_bs; 1372 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1373 } 1374 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1375 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1376 } else { 1377 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1378 } 1379 1380 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1381 ierr = PetscFree(lrows);CHKERRQ(ierr); 1382 1383 /* wait on sends */ 1384 if (nsends) { 1385 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1386 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1387 ierr = PetscFree(send_status);CHKERRQ(ierr); 1388 } 1389 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1390 ierr = PetscFree(svalues);CHKERRQ(ierr); 1391 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #undef __FUNC__ 1396 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ" 1397 int MatPrintHelp_MPISBAIJ(Mat A) 1398 { 1399 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1400 MPI_Comm comm = A->comm; 1401 static int called = 0; 1402 int ierr; 1403 1404 PetscFunctionBegin; 1405 if (!a->rank) { 1406 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1407 } 1408 if (called) {PetscFunctionReturn(0);} else called = 1; 1409 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1410 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNC__ 1415 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ" 1416 int MatSetUnfactored_MPISBAIJ(Mat A) 1417 { 1418 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1419 int ierr; 1420 1421 PetscFunctionBegin; 1422 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1427 1428 #undef __FUNC__ 1429 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ" 1430 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1431 { 1432 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1433 Mat a,b,c,d; 1434 PetscTruth flg; 1435 int ierr; 1436 1437 PetscFunctionBegin; 1438 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr); 1439 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1440 a = matA->A; b = matA->B; 1441 c = matB->A; d = matB->B; 1442 1443 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1444 if (flg == PETSC_TRUE) { 1445 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1446 } 1447 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } 1450 1451 /* -------------------------------------------------------------------*/ 1452 static struct _MatOps MatOps_Values = { 1453 MatSetValues_MPISBAIJ, 1454 MatGetRow_MPISBAIJ, 1455 MatRestoreRow_MPISBAIJ, 1456 MatMult_MPISBAIJ, 1457 MatMultAdd_MPISBAIJ, 1458 MatMultTranspose_MPISBAIJ, 1459 MatMultTransposeAdd_MPISBAIJ, 1460 0, 1461 0, 1462 0, 1463 0, 1464 0, 1465 0, 1466 0, 1467 MatTranspose_MPISBAIJ, 1468 MatGetInfo_MPISBAIJ, 1469 MatEqual_MPISBAIJ, 1470 MatGetDiagonal_MPISBAIJ, 1471 MatDiagonalScale_MPISBAIJ, 1472 MatNorm_MPISBAIJ, 1473 MatAssemblyBegin_MPISBAIJ, 1474 MatAssemblyEnd_MPISBAIJ, 1475 0, 1476 MatSetOption_MPISBAIJ, 1477 MatZeroEntries_MPISBAIJ, 1478 MatZeroRows_MPISBAIJ, 1479 0, 1480 0, 1481 0, 1482 0, 1483 0, 1484 0, 1485 MatGetOwnershipRange_MPISBAIJ, 1486 0, 1487 0, 1488 0, 1489 0, 1490 MatDuplicate_MPISBAIJ, 1491 0, 1492 0, 1493 0, 1494 0, 1495 0, 1496 MatGetSubMatrices_MPISBAIJ, 1497 MatIncreaseOverlap_MPISBAIJ, 1498 MatGetValues_MPISBAIJ, 1499 0, 1500 MatPrintHelp_MPISBAIJ, 1501 MatScale_MPISBAIJ, 1502 0, 1503 0, 1504 0, 1505 MatGetBlockSize_MPISBAIJ, 1506 0, 1507 0, 1508 0, 1509 0, 1510 0, 1511 0, 1512 MatSetUnfactored_MPISBAIJ, 1513 0, 1514 MatSetValuesBlocked_MPISBAIJ, 1515 0, 1516 0, 1517 0, 1518 MatGetMaps_Petsc, 1519 0, 1520 0, 1521 0, 1522 0, 1523 0, 1524 0, 1525 MatGetRowMax_MPISBAIJ}; 1526 1527 1528 EXTERN_C_BEGIN 1529 #undef __FUNC__ 1530 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ" 1531 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1532 { 1533 PetscFunctionBegin; 1534 *a = ((Mat_MPISBAIJ *)A->data)->A; 1535 *iscopy = PETSC_FALSE; 1536 PetscFunctionReturn(0); 1537 } 1538 EXTERN_C_END 1539 1540 #undef __FUNC__ 1541 #define __FUNC__ /*<a name=""></a>*/"MatCreate_MPISBAIJ" 1542 int MatCreate_MPISBAIJ(Mat B) 1543 { 1544 Mat_MPISBAIJ *b; 1545 int ierr; 1546 PetscTruth flg; 1547 1548 PetscFunctionBegin; 1549 if (B->M != B->N || B->m != B->n){ /* N and n are not used after this */ 1550 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, set M=N and m=n"); 1551 } 1552 1553 B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b); 1554 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1555 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1556 1557 B->ops->destroy = MatDestroy_MPISBAIJ; 1558 B->ops->view = MatView_MPISBAIJ; 1559 B->mapping = 0; 1560 B->factor = 0; 1561 B->assembled = PETSC_FALSE; 1562 1563 B->insertmode = NOT_SET_VALUES; 1564 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1565 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1566 1567 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1568 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1569 1570 /* the information in the maps duplicates the information computed below, eventually 1571 we should remove the duplicate information that is not contained in the maps */ 1572 ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1573 ierr = MapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1574 1575 /* build local table of row and column ownerships */ 1576 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1577 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1578 1579 /* build cache for off array entries formed */ 1580 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1581 b->donotstash = PETSC_FALSE; 1582 b->colmap = PETSC_NULL; 1583 b->garray = PETSC_NULL; 1584 b->roworiented = PETSC_TRUE; 1585 1586 #if defined(PEYSC_USE_MAT_SINGLE) 1587 /* stuff for MatSetValues_XXX in single precision */ 1588 b->lensetvalues = 0; 1589 b->setvaluescopy = PETSC_NULL; 1590 #endif 1591 1592 /* stuff used in block assembly */ 1593 b->barray = 0; 1594 1595 /* stuff used for matrix vector multiply */ 1596 b->lvec = 0; 1597 b->Mvctx = 0; 1598 1599 /* stuff for MatGetRow() */ 1600 b->rowindices = 0; 1601 b->rowvalues = 0; 1602 b->getrowactive = PETSC_FALSE; 1603 1604 /* hash table stuff */ 1605 b->ht = 0; 1606 b->hd = 0; 1607 b->ht_size = 0; 1608 b->ht_flag = PETSC_FALSE; 1609 b->ht_fact = 0; 1610 b->ht_total_ct = 0; 1611 b->ht_insert_ct = 0; 1612 1613 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1614 if (flg) { 1615 double fact = 1.39; 1616 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1617 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1618 if (fact <= 1.0) fact = 1.39; 1619 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1620 PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1621 } 1622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1623 "MatStoreValues_MPISBAIJ", 1624 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1626 "MatRetrieveValues_MPISBAIJ", 1627 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1629 "MatGetDiagonalBlock_MPISBAIJ", 1630 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1631 PetscFunctionReturn(0); 1632 } 1633 1634 #undef __FUNC__ 1635 #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetPreallocation" 1636 /*@C 1637 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1638 the user should preallocate the matrix storage by setting the parameters 1639 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1640 performance can be increased by more than a factor of 50. 1641 1642 Collective on Mat 1643 1644 Input Parameters: 1645 + A - the matrix 1646 . bs - size of blockk 1647 . d_nz - number of block nonzeros per block row in diagonal portion of local 1648 submatrix (same for all local rows) 1649 . d_nnz - array containing the number of block nonzeros in the various block rows 1650 of the in diagonal portion of the local (possibly different for each block 1651 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1652 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1653 submatrix (same for all local rows). 1654 - o_nnz - array containing the number of nonzeros in the various block rows of the 1655 off-diagonal portion of the local submatrix (possibly different for 1656 each block row) or PETSC_NULL. 1657 1658 1659 Options Database Keys: 1660 . -mat_no_unroll - uses code that does not unroll the loops in the 1661 block calculations (much slower) 1662 . -mat_block_size - size of the blocks to use 1663 1664 Notes: 1665 1666 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1667 than it must be used on all processors that share the object for that argument. 1668 1669 Storage Information: 1670 For a square global matrix we define each processor's diagonal portion 1671 to be its local rows and the corresponding columns (a square submatrix); 1672 each processor's off-diagonal portion encompasses the remainder of the 1673 local matrix (a rectangular submatrix). 1674 1675 The user can specify preallocated storage for the diagonal part of 1676 the local submatrix with either d_nz or d_nnz (not both). Set 1677 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1678 memory allocation. Likewise, specify preallocated storage for the 1679 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1680 1681 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1682 the figure below we depict these three local rows and all columns (0-11). 1683 1684 .vb 1685 0 1 2 3 4 5 6 7 8 9 10 11 1686 ------------------- 1687 row 3 | o o o d d d o o o o o o 1688 row 4 | o o o d d d o o o o o o 1689 row 5 | o o o d d d o o o o o o 1690 ------------------- 1691 .ve 1692 1693 Thus, any entries in the d locations are stored in the d (diagonal) 1694 submatrix, and any entries in the o locations are stored in the 1695 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1696 stored simply in the MATSEQBAIJ format for compressed row storage. 1697 1698 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1699 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1700 In general, for PDE problems in which most nonzeros are near the diagonal, 1701 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1702 or you will get TERRIBLE performance; see the users' manual chapter on 1703 matrices. 1704 1705 Level: intermediate 1706 1707 .keywords: matrix, block, aij, compressed row, sparse, parallel 1708 1709 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1710 @*/ 1711 1712 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1713 { 1714 Mat_MPISBAIJ *b; 1715 int ierr,i,mbs,Mbs=PETSC_DECIDE; 1716 1717 PetscFunctionBegin; 1718 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1719 1720 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1721 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -2: value %d",d_nz); 1722 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -2: value %d",o_nz); 1723 if (d_nnz) { 1724 for (i=0; i<B->m/bs; i++) { 1725 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1726 } 1727 } 1728 if (o_nnz) { 1729 for (i=0; i<B->m/bs; i++) { 1730 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1731 } 1732 } 1733 1734 b = (Mat_MPISBAIJ*)B->data; 1735 mbs = B->m/bs; 1736 Mbs = B->M/bs; 1737 if (mbs*bs != B->m) { 1738 SETERRQ(PETSC_ERR_ARG_SIZ,"No of local rows/cols must be divisible by blocksize"); 1739 } 1740 1741 b->bs = bs; 1742 b->bs2 = bs*bs; 1743 b->mbs = mbs; 1744 b->nbs = mbs; 1745 b->Mbs = Mbs; 1746 b->Nbs = Mbs; 1747 1748 b->cowners = b->rowners + b->size + 2; 1749 b->rowners_bs = b->cowners + b->size + 2; 1750 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1751 b->rowners[0] = 0; 1752 for (i=2; i<=b->size; i++) { 1753 b->rowners[i] += b->rowners[i-1]; 1754 } 1755 b->rstart = b->rowners[b->rank]; 1756 b->rend = b->rowners[b->rank+1]; 1757 b->cstart = b->rstart; 1758 b->cend = b->rend; 1759 for (i=0; i<=b->size; i++) { 1760 b->rowners_bs[i] = b->rowners[i]*bs; 1761 } 1762 b->rstart_bs = b->rstart * bs; 1763 b->rend_bs = b->rend * bs; 1764 1765 b->cstart_bs = b->cstart * bs; 1766 b->cend_bs = b->cend * bs; 1767 1768 1769 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1770 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1771 PLogObjectParent(B,b->A); 1772 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1773 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1774 PLogObjectParent(B,b->B); 1775 1776 /* build cache for off array entries formed */ 1777 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1778 1779 PetscFunctionReturn(0); 1780 } 1781 1782 #undef __FUNC__ 1783 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ" 1784 /*@C 1785 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1786 (block compressed row). For good matrix assembly performance 1787 the user should preallocate the matrix storage by setting the parameters 1788 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1789 performance can be increased by more than a factor of 50. 1790 1791 Collective on MPI_Comm 1792 1793 Input Parameters: 1794 + comm - MPI communicator 1795 . bs - size of blockk 1796 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1797 This value should be the same as the local size used in creating the 1798 y vector for the matrix-vector product y = Ax. 1799 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1800 This value should be the same as the local size used in creating the 1801 x vector for the matrix-vector product y = Ax. 1802 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1803 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1804 . d_nz - number of block nonzeros per block row in diagonal portion of local 1805 submatrix (same for all local rows) 1806 . d_nnz - array containing the number of block nonzeros in the various block rows 1807 of the in diagonal portion of the local (possibly different for each block 1808 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1809 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1810 submatrix (same for all local rows). 1811 - o_nnz - array containing the number of nonzeros in the various block rows of the 1812 off-diagonal portion of the local submatrix (possibly different for 1813 each block row) or PETSC_NULL. 1814 1815 Output Parameter: 1816 . A - the matrix 1817 1818 Options Database Keys: 1819 . -mat_no_unroll - uses code that does not unroll the loops in the 1820 block calculations (much slower) 1821 . -mat_block_size - size of the blocks to use 1822 . -mat_mpi - use the parallel matrix data structures even on one processor 1823 (defaults to using SeqBAIJ format on one processor) 1824 1825 Notes: 1826 The user MUST specify either the local or global matrix dimensions 1827 (possibly both). 1828 1829 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1830 than it must be used on all processors that share the object for that argument. 1831 1832 Storage Information: 1833 For a square global matrix we define each processor's diagonal portion 1834 to be its local rows and the corresponding columns (a square submatrix); 1835 each processor's off-diagonal portion encompasses the remainder of the 1836 local matrix (a rectangular submatrix). 1837 1838 The user can specify preallocated storage for the diagonal part of 1839 the local submatrix with either d_nz or d_nnz (not both). Set 1840 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1841 memory allocation. Likewise, specify preallocated storage for the 1842 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1843 1844 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1845 the figure below we depict these three local rows and all columns (0-11). 1846 1847 .vb 1848 0 1 2 3 4 5 6 7 8 9 10 11 1849 ------------------- 1850 row 3 | o o o d d d o o o o o o 1851 row 4 | o o o d d d o o o o o o 1852 row 5 | o o o d d d o o o o o o 1853 ------------------- 1854 .ve 1855 1856 Thus, any entries in the d locations are stored in the d (diagonal) 1857 submatrix, and any entries in the o locations are stored in the 1858 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1859 stored simply in the MATSEQBAIJ format for compressed row storage. 1860 1861 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1862 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1863 In general, for PDE problems in which most nonzeros are near the diagonal, 1864 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1865 or you will get TERRIBLE performance; see the users' manual chapter on 1866 matrices. 1867 1868 Level: intermediate 1869 1870 .keywords: matrix, block, aij, compressed row, sparse, parallel 1871 1872 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1873 @*/ 1874 1875 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1876 { 1877 int ierr; 1878 1879 PetscFunctionBegin; 1880 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1881 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1882 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1883 PetscFunctionReturn(0); 1884 } 1885 1886 1887 #undef __FUNC__ 1888 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ" 1889 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1890 { 1891 Mat mat; 1892 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1893 int ierr,len=0; 1894 1895 PetscFunctionBegin; 1896 *newmat = 0; 1897 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1898 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 1899 a = (Mat_MPISBAIJ*)mat->data; 1900 a->bs = oldmat->bs; 1901 a->bs2 = oldmat->bs2; 1902 a->mbs = oldmat->mbs; 1903 a->nbs = oldmat->nbs; 1904 a->Mbs = oldmat->Mbs; 1905 a->Nbs = oldmat->Nbs; 1906 1907 a->rstart = oldmat->rstart; 1908 a->rend = oldmat->rend; 1909 a->cstart = oldmat->cstart; 1910 a->cend = oldmat->cend; 1911 a->size = oldmat->size; 1912 a->rank = oldmat->rank; 1913 a->donotstash = oldmat->donotstash; 1914 a->roworiented = oldmat->roworiented; 1915 a->rowindices = 0; 1916 a->rowvalues = 0; 1917 a->getrowactive = PETSC_FALSE; 1918 a->barray = 0; 1919 a->rstart_bs = oldmat->rstart_bs; 1920 a->rend_bs = oldmat->rend_bs; 1921 a->cstart_bs = oldmat->cstart_bs; 1922 a->cend_bs = oldmat->cend_bs; 1923 1924 /* hash table stuff */ 1925 a->ht = 0; 1926 a->hd = 0; 1927 a->ht_size = 0; 1928 a->ht_flag = oldmat->ht_flag; 1929 a->ht_fact = oldmat->ht_fact; 1930 a->ht_total_ct = 0; 1931 a->ht_insert_ct = 0; 1932 1933 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1934 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1935 a->cowners = a->rowners + a->size + 2; 1936 a->rowners_bs = a->cowners + a->size + 2; 1937 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1938 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1939 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 1940 if (oldmat->colmap) { 1941 #if defined (PETSC_USE_CTABLE) 1942 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1943 #else 1944 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 1945 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1946 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 1947 #endif 1948 } else a->colmap = 0; 1949 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 1950 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 1951 PLogObjectMemory(mat,len*sizeof(int)); 1952 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 1953 } else a->garray = 0; 1954 1955 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1956 PLogObjectParent(mat,a->lvec); 1957 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1958 1959 PLogObjectParent(mat,a->Mvctx); 1960 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1961 PLogObjectParent(mat,a->A); 1962 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1963 PLogObjectParent(mat,a->B); 1964 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 1965 *newmat = mat; 1966 PetscFunctionReturn(0); 1967 } 1968 1969 #include "petscsys.h" 1970 1971 #undef __FUNC__ 1972 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ" 1973 int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat) 1974 { 1975 Mat A; 1976 int i,nz,ierr,j,rstart,rend,fd; 1977 Scalar *vals,*buf; 1978 MPI_Comm comm = ((PetscObject)viewer)->comm; 1979 MPI_Status status; 1980 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 1981 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 1982 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 1983 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1984 int dcount,kmax,k,nzcount,tmp; 1985 1986 PetscFunctionBegin; 1987 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1988 1989 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1990 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1991 if (!rank) { 1992 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1993 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1994 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1995 if (header[3] < 0) { 1996 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 1997 } 1998 } 1999 2000 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2001 M = header[1]; N = header[2]; 2002 2003 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2004 2005 /* 2006 This code adds extra rows to make sure the number of rows is 2007 divisible by the blocksize 2008 */ 2009 Mbs = M/bs; 2010 extra_rows = bs - M + bs*(Mbs); 2011 if (extra_rows == bs) extra_rows = 0; 2012 else Mbs++; 2013 if (extra_rows &&!rank) { 2014 PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2015 } 2016 2017 /* determine ownership of all rows */ 2018 mbs = Mbs/size + ((Mbs % size) > rank); 2019 m = mbs * bs; 2020 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2021 browners = rowners + size + 1; 2022 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2023 rowners[0] = 0; 2024 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2025 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2026 rstart = rowners[rank]; 2027 rend = rowners[rank+1]; 2028 2029 /* distribute row lengths to all processors */ 2030 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 2031 if (!rank) { 2032 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2033 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2034 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2035 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2036 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2037 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2038 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2039 } else { 2040 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2041 } 2042 2043 if (!rank) { /* procs[0] */ 2044 /* calculate the number of nonzeros on each processor */ 2045 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2046 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2047 for (i=0; i<size; i++) { 2048 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2049 procsnz[i] += rowlengths[j]; 2050 } 2051 } 2052 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2053 2054 /* determine max buffer needed and allocate it */ 2055 maxnz = 0; 2056 for (i=0; i<size; i++) { 2057 maxnz = PetscMax(maxnz,procsnz[i]); 2058 } 2059 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2060 2061 /* read in my part of the matrix column indices */ 2062 nz = procsnz[0]; 2063 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2064 mycols = ibuf; 2065 if (size == 1) nz -= extra_rows; 2066 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2067 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2068 2069 /* read in every ones (except the last) and ship off */ 2070 for (i=1; i<size-1; i++) { 2071 nz = procsnz[i]; 2072 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2073 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2074 } 2075 /* read in the stuff for the last proc */ 2076 if (size != 1) { 2077 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2078 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2079 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2080 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2081 } 2082 ierr = PetscFree(cols);CHKERRQ(ierr); 2083 } else { /* procs[i], i>0 */ 2084 /* determine buffer space needed for message */ 2085 nz = 0; 2086 for (i=0; i<m; i++) { 2087 nz += locrowlens[i]; 2088 } 2089 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2090 mycols = ibuf; 2091 /* receive message of column indices*/ 2092 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2093 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2094 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2095 } 2096 2097 /* loop over local rows, determining number of off diagonal entries */ 2098 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2099 odlens = dlens + (rend-rstart); 2100 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2101 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2102 masked1 = mask + Mbs; 2103 masked2 = masked1 + Mbs; 2104 rowcount = 0; nzcount = 0; 2105 for (i=0; i<mbs; i++) { 2106 dcount = 0; 2107 odcount = 0; 2108 for (j=0; j<bs; j++) { 2109 kmax = locrowlens[rowcount]; 2110 for (k=0; k<kmax; k++) { 2111 tmp = mycols[nzcount++]/bs; /* block col. index */ 2112 if (!mask[tmp]) { 2113 mask[tmp] = 1; 2114 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2115 else masked1[dcount++] = tmp; /* entry in diag portion */ 2116 } 2117 } 2118 rowcount++; 2119 } 2120 2121 dlens[i] = dcount; /* d_nzz[i] */ 2122 odlens[i] = odcount; /* o_nzz[i] */ 2123 2124 /* zero out the mask elements we set */ 2125 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2126 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2127 } 2128 2129 /* create our matrix */ 2130 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2131 CHKERRQ(ierr); 2132 A = *newmat; 2133 MatSetOption(A,MAT_COLUMNS_SORTED); 2134 2135 if (!rank) { 2136 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2137 /* read in my part of the matrix numerical values */ 2138 nz = procsnz[0]; 2139 vals = buf; 2140 mycols = ibuf; 2141 if (size == 1) nz -= extra_rows; 2142 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2143 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2144 2145 /* insert into matrix */ 2146 jj = rstart*bs; 2147 for (i=0; i<m; i++) { 2148 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2149 mycols += locrowlens[i]; 2150 vals += locrowlens[i]; 2151 jj++; 2152 } 2153 2154 /* read in other processors (except the last one) and ship out */ 2155 for (i=1; i<size-1; i++) { 2156 nz = procsnz[i]; 2157 vals = buf; 2158 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2159 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2160 } 2161 /* the last proc */ 2162 if (size != 1){ 2163 nz = procsnz[i] - extra_rows; 2164 vals = buf; 2165 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2166 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2167 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2168 } 2169 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2170 2171 } else { 2172 /* receive numeric values */ 2173 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2174 2175 /* receive message of values*/ 2176 vals = buf; 2177 mycols = ibuf; 2178 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2179 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2180 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2181 2182 /* insert into matrix */ 2183 jj = rstart*bs; 2184 for (i=0; i<m; i++) { 2185 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2186 mycols += locrowlens[i]; 2187 vals += locrowlens[i]; 2188 jj++; 2189 } 2190 } 2191 2192 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2193 ierr = PetscFree(buf);CHKERRQ(ierr); 2194 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2195 ierr = PetscFree(rowners);CHKERRQ(ierr); 2196 ierr = PetscFree(dlens);CHKERRQ(ierr); 2197 ierr = PetscFree(mask);CHKERRQ(ierr); 2198 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2199 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2200 PetscFunctionReturn(0); 2201 } 2202 2203 #undef __FUNC__ 2204 #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor" 2205 /*@ 2206 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2207 2208 Input Parameters: 2209 . mat - the matrix 2210 . fact - factor 2211 2212 Collective on Mat 2213 2214 Level: advanced 2215 2216 Notes: 2217 This can also be set by the command line option: -mat_use_hash_table fact 2218 2219 .keywords: matrix, hashtable, factor, HT 2220 2221 .seealso: MatSetOption() 2222 @*/ 2223 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2224 { 2225 PetscFunctionBegin; 2226 SETERRQ(1,"Function not yet written for SBAIJ format"); 2227 /* PetscFunctionReturn(0); */ 2228 } 2229 2230 #undef __FUNC__ 2231 #define __FUNC__ /*<a name=""></a>*/"MatGetRowMax_MPISBAIJ" 2232 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2233 { 2234 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2235 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2236 PetscReal atmp; 2237 double *work,*svalues,*rvalues; 2238 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2239 int rank,size,*rowners_bs,dest,count,source; 2240 Scalar *ba,*va; 2241 MPI_Status stat; 2242 2243 PetscFunctionBegin; 2244 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2245 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2246 2247 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRA(ierr); 2248 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRA(ierr); 2249 2250 bs = a->bs; 2251 mbs = a->mbs; 2252 Mbs = a->Mbs; 2253 ba = b->a; 2254 bi = b->i; 2255 bj = b->j; 2256 /* 2257 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2258 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2259 */ 2260 2261 /* find ownerships */ 2262 rowners_bs = a->rowners_bs; 2263 /* 2264 for (i=0; i<size+1; i++) { 2265 PetscPrintf(PETSC_COMM_SELF,"rowners_bs: %d\n",i,rowners_bs[i]); 2266 } 2267 */ 2268 2269 /* each proc creates an array to be distributed */ 2270 work = (PetscReal*)PetscMalloc(bs*Mbs*sizeof(PetscReal));CHKPTRQ(work); 2271 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2272 2273 /* row_max for B */ 2274 for (i=0; i<mbs; i++) { 2275 ncols = bi[1] - bi[0]; bi++; 2276 brow = bs*i; 2277 for (j=0; j<ncols; j++){ 2278 bcol = bs*(*bj); 2279 for (kcol=0; kcol<bs; kcol++){ 2280 col = bcol + kcol; /* local col index */ 2281 col += rowners_bs[rank] + bs; /* global col index */ 2282 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2283 for (krow=0; krow<bs; krow++){ 2284 atmp = PetscAbsScalar(*ba); ba++; 2285 row = brow + krow; /* local row index */ 2286 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2287 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2288 if (work[col] < atmp) work[col] = atmp; 2289 } 2290 } 2291 bj++; 2292 } 2293 } 2294 2295 /* send values to its owners */ 2296 if (rank != size-1){ 2297 for (dest=rank+1; dest<size; dest++){ 2298 svalues = work + rowners_bs[dest]; 2299 count = rowners_bs[dest+1]-rowners_bs[dest]; 2300 MPI_Send(svalues,count,MPI_DOUBLE_PRECISION,dest,rank,PETSC_COMM_WORLD); 2301 /* 2302 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]\n",rank,count,dest); 2303 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2304 */ 2305 } 2306 } 2307 2308 /* receive values */ 2309 if (rank){ 2310 rvalues = work; 2311 count = rowners_bs[rank+1]-rowners_bs[rank]; 2312 for (source=0; source<rank; source++){ 2313 MPI_Recv(rvalues,count,MPI_DOUBLE_PRECISION,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat); 2314 /* process values */ 2315 for (i=0; i<count; i++){ 2316 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2317 } 2318 /* 2319 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received from [%d] \n",rank,stat.MPI_SOURCE); 2320 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2321 */ 2322 } 2323 } 2324 2325 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2326 ierr = PetscFree(work);CHKERRA(ierr); 2327 PetscFunctionReturn(0); 2328 } 2329