1 /*$Id: mpibaij.c,v 1.208 2001/01/15 21:45:57 bsmith Exp balay $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN int DisAssemble_MPIBAIJ(Mat); 8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15 EXTERN int MatPrintHelp_SeqBAIJ(Mat); 16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,Scalar*); 17 18 /* UGLY, ugly, ugly 19 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 20 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 21 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 22 converts the entries into single precision and then calls ..._MatScalar() to put them 23 into the single precision data structures. 24 */ 25 #if defined(PETSC_USE_MAT_SINGLE) 26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 #else 32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 33 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 35 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 37 #endif 38 39 #undef __FUNC__ 40 #define __FUNC__ "MatGetRowMax_MPIBAIJ" 41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v) 42 { 43 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 44 int ierr,i; 45 Scalar *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRA(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<A->m; i++){ 58 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRA(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNC__ 70 #define __FUNC__ "MatStoreValues_MPIBAIJ" 71 int MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 int ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNC__ 85 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 86 int MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 int ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNC__ 105 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 106 static int CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 int nbs = B->nbs,i,bs=B->bs,ierr; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 120 PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (int*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (int*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNC__ 284 #define __FUNC__ "MatSetValues_MPIBAIJ" 285 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 int ierr,i,N = m*n; 289 MatScalar *vsingle; 290 291 PetscFunctionBegin; 292 if (N > b->setvalueslen) { 293 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 294 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 295 b->setvalueslen = N; 296 } 297 vsingle = b->setvaluescopy; 298 299 for (i=0; i<N; i++) { 300 vsingle[i] = v[i]; 301 } 302 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNC__ 307 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 309 { 310 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 311 int ierr,i,N = m*n*b->bs2; 312 MatScalar *vsingle; 313 314 PetscFunctionBegin; 315 if (N > b->setvalueslen) { 316 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 317 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 318 b->setvalueslen = N; 319 } 320 vsingle = b->setvaluescopy; 321 for (i=0; i<N; i++) { 322 vsingle[i] = v[i]; 323 } 324 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNC__ 329 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 331 { 332 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 333 int ierr,i,N = m*n; 334 MatScalar *vsingle; 335 336 PetscFunctionBegin; 337 if (N > b->setvalueslen) { 338 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 339 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 340 b->setvalueslen = N; 341 } 342 vsingle = b->setvaluescopy; 343 for (i=0; i<N; i++) { 344 vsingle[i] = v[i]; 345 } 346 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNC__ 351 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 353 { 354 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 355 int ierr,i,N = m*n*b->bs2; 356 MatScalar *vsingle; 357 358 PetscFunctionBegin; 359 if (N > b->setvalueslen) { 360 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 361 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 362 b->setvalueslen = N; 363 } 364 vsingle = b->setvaluescopy; 365 for (i=0; i<N; i++) { 366 vsingle[i] = v[i]; 367 } 368 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 #endif 372 373 #undef __FUNC__ 374 #define __FUNC__ "MatSetValues_MPIBAIJ" 375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 376 { 377 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 378 MatScalar value; 379 PetscTruth roworiented = baij->roworiented; 380 int ierr,i,j,row,col; 381 int rstart_orig=baij->rstart_bs; 382 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 383 int cend_orig=baij->cend_bs,bs=baij->bs; 384 385 /* Some Variables required in the macro */ 386 Mat A = baij->A; 387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 388 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 389 MatScalar *aa=a->a; 390 391 Mat B = baij->B; 392 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 393 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 394 MatScalar *ba=b->a; 395 396 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 397 int low,high,t,ridx,cidx,bs2=a->bs2; 398 MatScalar *ap,*bap; 399 400 PetscFunctionBegin; 401 for (i=0; i<m; i++) { 402 if (im[i] < 0) continue; 403 #if defined(PETSC_USE_BOPT_g) 404 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 405 #endif 406 if (im[i] >= rstart_orig && im[i] < rend_orig) { 407 row = im[i] - rstart_orig; 408 for (j=0; j<n; j++) { 409 if (in[j] >= cstart_orig && in[j] < cend_orig){ 410 col = in[j] - cstart_orig; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 413 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 414 } else if (in[j] < 0) continue; 415 #if defined(PETSC_USE_BOPT_g) 416 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 417 #endif 418 else { 419 if (mat->was_assembled) { 420 if (!baij->colmap) { 421 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 422 } 423 #if defined (PETSC_USE_CTABLE) 424 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 425 col = col - 1 + in[j]%bs; 426 #else 427 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 428 #endif 429 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 430 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 431 col = in[j]; 432 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 433 B = baij->B; 434 b = (Mat_SeqBAIJ*)(B)->data; 435 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 436 ba=b->a; 437 } 438 } else col = in[j]; 439 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 440 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 441 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 442 } 443 } 444 } else { 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNC__ 458 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 460 { 461 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 462 MatScalar *value,*barray=baij->barray; 463 PetscTruth roworiented = baij->roworiented; 464 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 465 int rend=baij->rend,cstart=baij->cstart,stepval; 466 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 467 468 PetscFunctionBegin; 469 if(!barray) { 470 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 471 baij->barray = barray; 472 } 473 474 if (roworiented) { 475 stepval = (n-1)*bs; 476 } else { 477 stepval = (m-1)*bs; 478 } 479 for (i=0; i<m; i++) { 480 if (im[i] < 0) continue; 481 #if defined(PETSC_USE_BOPT_g) 482 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs); 483 #endif 484 if (im[i] >= rstart && im[i] < rend) { 485 row = im[i] - rstart; 486 for (j=0; j<n; j++) { 487 /* If NumCol = 1 then a copy is not required */ 488 if ((roworiented) && (n == 1)) { 489 barray = v + i*bs2; 490 } else if((!roworiented) && (m == 1)) { 491 barray = v + j*bs2; 492 } else { /* Here a copy is required */ 493 if (roworiented) { 494 value = v + i*(stepval+bs)*bs + j*bs; 495 } else { 496 value = v + j*(stepval+bs)*bs + i*bs; 497 } 498 for (ii=0; ii<bs; ii++,value+=stepval) { 499 for (jj=0; jj<bs; jj++) { 500 *barray++ = *value++; 501 } 502 } 503 barray -=bs2; 504 } 505 506 if (in[j] >= cstart && in[j] < cend){ 507 col = in[j] - cstart; 508 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 509 } 510 else if (in[j] < 0) continue; 511 #if defined(PETSC_USE_BOPT_g) 512 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);} 513 #endif 514 else { 515 if (mat->was_assembled) { 516 if (!baij->colmap) { 517 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 518 } 519 520 #if defined(PETSC_USE_BOPT_g) 521 #if defined (PETSC_USE_CTABLE) 522 { int data; 523 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 524 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 525 } 526 #else 527 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 528 #endif 529 #endif 530 #if defined (PETSC_USE_CTABLE) 531 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 532 col = (col - 1)/bs; 533 #else 534 col = (baij->colmap[in[j]] - 1)/bs; 535 #endif 536 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 537 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 538 col = in[j]; 539 } 540 } 541 else col = in[j]; 542 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 543 } 544 } 545 } else { 546 if (!baij->donotstash) { 547 if (roworiented) { 548 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 PetscFunctionReturn(0); 556 } 557 558 #define HASH_KEY 0.6180339887 559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 562 #undef __FUNC__ 563 #define __FUNC__ "MatSetValues_MPIBAIJ_HT_MatScalar" 564 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 565 { 566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567 PetscTruth roworiented = baij->roworiented; 568 int ierr,i,j,row,col; 569 int rstart_orig=baij->rstart_bs; 570 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 571 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 572 PetscReal tmp; 573 MatScalar **HD = baij->hd,value; 574 #if defined(PETSC_USE_BOPT_g) 575 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 576 #endif 577 578 PetscFunctionBegin; 579 580 for (i=0; i<m; i++) { 581 #if defined(PETSC_USE_BOPT_g) 582 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 583 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 584 #endif 585 row = im[i]; 586 if (row >= rstart_orig && row < rend_orig) { 587 for (j=0; j<n; j++) { 588 col = in[j]; 589 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 590 /* Look up into the Hash Table */ 591 key = (row/bs)*Nbs+(col/bs)+1; 592 h1 = HASH(size,key,tmp); 593 594 595 idx = h1; 596 #if defined(PETSC_USE_BOPT_g) 597 insert_ct++; 598 total_ct++; 599 if (HT[idx] != key) { 600 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 601 if (idx == size) { 602 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 603 if (idx == h1) { 604 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 605 } 606 } 607 } 608 #else 609 if (HT[idx] != key) { 610 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 611 if (idx == size) { 612 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 613 if (idx == h1) { 614 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 615 } 616 } 617 } 618 #endif 619 /* A HASH table entry is found, so insert the values at the correct address */ 620 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 621 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 622 } 623 } else { 624 if (!baij->donotstash) { 625 if (roworiented) { 626 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 627 } else { 628 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 629 } 630 } 631 } 632 } 633 #if defined(PETSC_USE_BOPT_g) 634 baij->ht_total_ct = total_ct; 635 baij->ht_insert_ct = insert_ct; 636 #endif 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNC__ 641 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 642 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 643 { 644 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 645 PetscTruth roworiented = baij->roworiented; 646 int ierr,i,j,ii,jj,row,col; 647 int rstart=baij->rstart ; 648 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 649 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 650 PetscReal tmp; 651 MatScalar **HD = baij->hd,*baij_a; 652 MatScalar *v_t,*value; 653 #if defined(PETSC_USE_BOPT_g) 654 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 655 #endif 656 657 PetscFunctionBegin; 658 659 if (roworiented) { 660 stepval = (n-1)*bs; 661 } else { 662 stepval = (m-1)*bs; 663 } 664 for (i=0; i<m; i++) { 665 #if defined(PETSC_USE_BOPT_g) 666 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 667 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 668 #endif 669 row = im[i]; 670 v_t = v + i*bs2; 671 if (row >= rstart && row < rend) { 672 for (j=0; j<n; j++) { 673 col = in[j]; 674 675 /* Look up into the Hash Table */ 676 key = row*Nbs+col+1; 677 h1 = HASH(size,key,tmp); 678 679 idx = h1; 680 #if defined(PETSC_USE_BOPT_g) 681 total_ct++; 682 insert_ct++; 683 if (HT[idx] != key) { 684 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 685 if (idx == size) { 686 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 687 if (idx == h1) { 688 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 689 } 690 } 691 } 692 #else 693 if (HT[idx] != key) { 694 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 695 if (idx == size) { 696 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 697 if (idx == h1) { 698 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 699 } 700 } 701 } 702 #endif 703 baij_a = HD[idx]; 704 if (roworiented) { 705 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 706 /* value = v + (i*(stepval+bs)+j)*bs; */ 707 value = v_t; 708 v_t += bs; 709 if (addv == ADD_VALUES) { 710 for (ii=0; ii<bs; ii++,value+=stepval) { 711 for (jj=ii; jj<bs2; jj+=bs) { 712 baij_a[jj] += *value++; 713 } 714 } 715 } else { 716 for (ii=0; ii<bs; ii++,value+=stepval) { 717 for (jj=ii; jj<bs2; jj+=bs) { 718 baij_a[jj] = *value++; 719 } 720 } 721 } 722 } else { 723 value = v + j*(stepval+bs)*bs + i*bs; 724 if (addv == ADD_VALUES) { 725 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 726 for (jj=0; jj<bs; jj++) { 727 baij_a[jj] += *value++; 728 } 729 } 730 } else { 731 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 732 for (jj=0; jj<bs; jj++) { 733 baij_a[jj] = *value++; 734 } 735 } 736 } 737 } 738 } 739 } else { 740 if (!baij->donotstash) { 741 if (roworiented) { 742 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 743 } else { 744 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 745 } 746 } 747 } 748 } 749 #if defined(PETSC_USE_BOPT_g) 750 baij->ht_total_ct = total_ct; 751 baij->ht_insert_ct = insert_ct; 752 #endif 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNC__ 757 #define __FUNC__ "MatGetValues_MPIBAIJ" 758 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 759 { 760 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 761 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 762 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 763 764 PetscFunctionBegin; 765 for (i=0; i<m; i++) { 766 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 767 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 768 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 769 row = idxm[i] - bsrstart; 770 for (j=0; j<n; j++) { 771 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 772 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 773 if (idxn[j] >= bscstart && idxn[j] < bscend){ 774 col = idxn[j] - bscstart; 775 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 776 } else { 777 if (!baij->colmap) { 778 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 779 } 780 #if defined (PETSC_USE_CTABLE) 781 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 782 data --; 783 #else 784 data = baij->colmap[idxn[j]/bs]-1; 785 #endif 786 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 787 else { 788 col = data + idxn[j]%bs; 789 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 790 } 791 } 792 } 793 } else { 794 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 } 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNC__ 801 #define __FUNC__ "MatNorm_MPIBAIJ" 802 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 803 { 804 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 805 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 806 int ierr,i,bs2=baij->bs2; 807 PetscReal sum = 0.0; 808 MatScalar *v; 809 810 PetscFunctionBegin; 811 if (baij->size == 1) { 812 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 813 } else { 814 if (type == NORM_FROBENIUS) { 815 v = amat->a; 816 for (i=0; i<amat->nz*bs2; i++) { 817 #if defined(PETSC_USE_COMPLEX) 818 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 819 #else 820 sum += (*v)*(*v); v++; 821 #endif 822 } 823 v = bmat->a; 824 for (i=0; i<bmat->nz*bs2; i++) { 825 #if defined(PETSC_USE_COMPLEX) 826 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 827 #else 828 sum += (*v)*(*v); v++; 829 #endif 830 } 831 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 832 *norm = sqrt(*norm); 833 } else { 834 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 835 } 836 } 837 PetscFunctionReturn(0); 838 } 839 840 841 /* 842 Creates the hash table, and sets the table 843 This table is created only once. 844 If new entried need to be added to the matrix 845 then the hash table has to be destroyed and 846 recreated. 847 */ 848 #undef __FUNC__ 849 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 850 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 851 { 852 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853 Mat A = baij->A,B=baij->B; 854 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 855 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 856 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 857 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 858 int *HT,key; 859 MatScalar **HD; 860 PetscReal tmp; 861 #if defined(PETSC_USE_BOPT_g) 862 int ct=0,max=0; 863 #endif 864 865 PetscFunctionBegin; 866 baij->ht_size=(int)(factor*nz); 867 size = baij->ht_size; 868 869 if (baij->ht) { 870 PetscFunctionReturn(0); 871 } 872 873 /* Allocate Memory for Hash Table */ 874 ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 875 baij->ht = (int*)(baij->hd + size); 876 HD = baij->hd; 877 HT = baij->ht; 878 879 880 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 881 882 883 /* Loop Over A */ 884 for (i=0; i<a->mbs; i++) { 885 for (j=ai[i]; j<ai[i+1]; j++) { 886 row = i+rstart; 887 col = aj[j]+cstart; 888 889 key = row*Nbs + col + 1; 890 h1 = HASH(size,key,tmp); 891 for (k=0; k<size; k++){ 892 if (HT[(h1+k)%size] == 0.0) { 893 HT[(h1+k)%size] = key; 894 HD[(h1+k)%size] = a->a + j*bs2; 895 break; 896 #if defined(PETSC_USE_BOPT_g) 897 } else { 898 ct++; 899 #endif 900 } 901 } 902 #if defined(PETSC_USE_BOPT_g) 903 if (k> max) max = k; 904 #endif 905 } 906 } 907 /* Loop Over B */ 908 for (i=0; i<b->mbs; i++) { 909 for (j=bi[i]; j<bi[i+1]; j++) { 910 row = i+rstart; 911 col = garray[bj[j]]; 912 key = row*Nbs + col + 1; 913 h1 = HASH(size,key,tmp); 914 for (k=0; k<size; k++){ 915 if (HT[(h1+k)%size] == 0.0) { 916 HT[(h1+k)%size] = key; 917 HD[(h1+k)%size] = b->a + j*bs2; 918 break; 919 #if defined(PETSC_USE_BOPT_g) 920 } else { 921 ct++; 922 #endif 923 } 924 } 925 #if defined(PETSC_USE_BOPT_g) 926 if (k> max) max = k; 927 #endif 928 } 929 } 930 931 /* Print Summary */ 932 #if defined(PETSC_USE_BOPT_g) 933 for (i=0,j=0; i<size; i++) { 934 if (HT[i]) {j++;} 935 } 936 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 937 #endif 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNC__ 942 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 943 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 944 { 945 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 946 int ierr,nstash,reallocs; 947 InsertMode addv; 948 949 PetscFunctionBegin; 950 if (baij->donotstash) { 951 PetscFunctionReturn(0); 952 } 953 954 /* make sure all processors are either in INSERTMODE or ADDMODE */ 955 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 956 if (addv == (ADD_VALUES|INSERT_VALUES)) { 957 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 958 } 959 mat->insertmode = addv; /* in case this processor had no cache */ 960 961 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 962 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 963 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 964 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 965 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 966 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 967 PetscFunctionReturn(0); 968 } 969 970 #undef __FUNC__ 971 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 972 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 973 { 974 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 975 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 976 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 977 int *row,*col,other_disassembled; 978 PetscTruth r1,r2,r3; 979 MatScalar *val; 980 InsertMode addv = mat->insertmode; 981 982 PetscFunctionBegin; 983 if (!baij->donotstash) { 984 while (1) { 985 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 986 if (!flg) break; 987 988 for (i=0; i<n;) { 989 /* Now identify the consecutive vals belonging to the same row */ 990 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 991 if (j < n) ncols = j-i; 992 else ncols = n-i; 993 /* Now assemble all these values with a single function call */ 994 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 995 i = j; 996 } 997 } 998 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 999 /* Now process the block-stash. Since the values are stashed column-oriented, 1000 set the roworiented flag to column oriented, and after MatSetValues() 1001 restore the original flags */ 1002 r1 = baij->roworiented; 1003 r2 = a->roworiented; 1004 r3 = b->roworiented; 1005 baij->roworiented = PETSC_FALSE; 1006 a->roworiented = PETSC_FALSE; 1007 b->roworiented = PETSC_FALSE; 1008 while (1) { 1009 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1010 if (!flg) break; 1011 1012 for (i=0; i<n;) { 1013 /* Now identify the consecutive vals belonging to the same row */ 1014 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1015 if (j < n) ncols = j-i; 1016 else ncols = n-i; 1017 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1018 i = j; 1019 } 1020 } 1021 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1022 baij->roworiented = r1; 1023 a->roworiented = r2; 1024 b->roworiented = r3; 1025 } 1026 1027 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1028 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1029 1030 /* determine if any processor has disassembled, if so we must 1031 also disassemble ourselfs, in order that we may reassemble. */ 1032 /* 1033 if nonzero structure of submatrix B cannot change then we know that 1034 no processor disassembled thus we can skip this stuff 1035 */ 1036 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1037 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1038 if (mat->was_assembled && !other_disassembled) { 1039 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1040 } 1041 } 1042 1043 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1044 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1045 } 1046 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1047 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1048 1049 #if defined(PETSC_USE_BOPT_g) 1050 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1051 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 1052 baij->ht_total_ct = 0; 1053 baij->ht_insert_ct = 0; 1054 } 1055 #endif 1056 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1057 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1058 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1059 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1060 } 1061 1062 if (baij->rowvalues) { 1063 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1064 baij->rowvalues = 0; 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNC__ 1070 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1071 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1072 { 1073 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1074 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 1075 PetscTruth isascii,isdraw; 1076 PetscViewer sviewer; 1077 1078 PetscFunctionBegin; 1079 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1080 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1081 if (isascii) { 1082 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1083 if (format == PETSC_VIEWER_FORMAT_ASCII_INFO_LONG) { 1084 MatInfo info; 1085 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1086 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1088 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1089 baij->bs,(int)info.memory);CHKERRQ(ierr); 1090 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1091 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1092 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1093 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1094 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1095 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } else if (format == PETSC_VIEWER_FORMAT_ASCII_INFO) { 1098 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 } 1102 1103 if (isdraw) { 1104 PetscDraw draw; 1105 PetscTruth isnull; 1106 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1107 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1108 } 1109 1110 if (size == 1) { 1111 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1112 } else { 1113 /* assemble the entire matrix onto first processor. */ 1114 Mat A; 1115 Mat_SeqBAIJ *Aloc; 1116 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1117 MatScalar *a; 1118 1119 if (!rank) { 1120 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1121 } else { 1122 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1123 } 1124 PetscLogObjectParent(mat,A); 1125 1126 /* copy over the A part */ 1127 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1128 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1129 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1130 1131 for (i=0; i<mbs; i++) { 1132 rvals[0] = bs*(baij->rstart + i); 1133 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1134 for (j=ai[i]; j<ai[i+1]; j++) { 1135 col = (baij->cstart+aj[j])*bs; 1136 for (k=0; k<bs; k++) { 1137 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1138 col++; a += bs; 1139 } 1140 } 1141 } 1142 /* copy over the B part */ 1143 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1144 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1145 for (i=0; i<mbs; i++) { 1146 rvals[0] = bs*(baij->rstart + i); 1147 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1148 for (j=ai[i]; j<ai[i+1]; j++) { 1149 col = baij->garray[aj[j]]*bs; 1150 for (k=0; k<bs; k++) { 1151 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1152 col++; a += bs; 1153 } 1154 } 1155 } 1156 ierr = PetscFree(rvals);CHKERRQ(ierr); 1157 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1158 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1159 /* 1160 Everyone has to call to draw the matrix since the graphics waits are 1161 synchronized across all processors that share the PetscDraw object 1162 */ 1163 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1164 if (!rank) { 1165 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1166 } 1167 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1168 ierr = MatDestroy(A);CHKERRQ(ierr); 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNC__ 1174 #define __FUNC__ "MatView_MPIBAIJ" 1175 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1176 { 1177 int ierr; 1178 PetscTruth isascii,isdraw,issocket,isbinary; 1179 1180 PetscFunctionBegin; 1181 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1182 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1183 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1184 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1185 if (isascii || isdraw || issocket || isbinary) { 1186 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1187 } else { 1188 SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1189 } 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNC__ 1194 #define __FUNC__ "MatDestroy_MPIBAIJ" 1195 int MatDestroy_MPIBAIJ(Mat mat) 1196 { 1197 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1198 int ierr; 1199 1200 PetscFunctionBegin; 1201 #if defined(PETSC_USE_LOG) 1202 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 1203 #endif 1204 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1205 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1206 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1207 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1208 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1209 #if defined (PETSC_USE_CTABLE) 1210 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1211 #else 1212 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1213 #endif 1214 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1215 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1216 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1217 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1218 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1219 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1220 #if defined(PETSC_USE_MAT_SINGLE) 1221 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1222 #endif 1223 ierr = PetscFree(baij);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNC__ 1228 #define __FUNC__ "MatMult_MPIBAIJ" 1229 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1230 { 1231 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1232 int ierr,nt; 1233 1234 PetscFunctionBegin; 1235 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1236 if (nt != A->n) { 1237 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1238 } 1239 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1240 if (nt != A->m) { 1241 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1242 } 1243 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1244 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1245 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1246 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1247 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNC__ 1252 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1253 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1254 { 1255 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1256 int ierr; 1257 1258 PetscFunctionBegin; 1259 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1260 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1261 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1262 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNC__ 1267 #define __FUNC__ "MatMultTranspose_MPIBAIJ" 1268 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1269 { 1270 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1271 int ierr; 1272 1273 PetscFunctionBegin; 1274 /* do nondiagonal part */ 1275 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1276 /* send it on its way */ 1277 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1278 /* do local part */ 1279 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1280 /* receive remote parts: note this assumes the values are not actually */ 1281 /* inserted in yy until the next line, which is true for my implementation*/ 1282 /* but is not perhaps always true. */ 1283 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNC__ 1288 #define __FUNC__ "MatMultTransposeAdd_MPIBAIJ" 1289 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1290 { 1291 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1292 int ierr; 1293 1294 PetscFunctionBegin; 1295 /* do nondiagonal part */ 1296 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1297 /* send it on its way */ 1298 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1299 /* do local part */ 1300 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1301 /* receive remote parts: note this assumes the values are not actually */ 1302 /* inserted in yy until the next line, which is true for my implementation*/ 1303 /* but is not perhaps always true. */ 1304 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 /* 1309 This only works correctly for square matrices where the subblock A->A is the 1310 diagonal block 1311 */ 1312 #undef __FUNC__ 1313 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1314 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1315 { 1316 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1317 int ierr; 1318 1319 PetscFunctionBegin; 1320 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1321 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNC__ 1326 #define __FUNC__ "MatScale_MPIBAIJ" 1327 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1328 { 1329 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1330 int ierr; 1331 1332 PetscFunctionBegin; 1333 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1334 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNC__ 1339 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1340 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1341 { 1342 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1343 1344 PetscFunctionBegin; 1345 if (m) *m = mat->rstart_bs; 1346 if (n) *n = mat->rend_bs; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNC__ 1351 #define __FUNC__ "MatGetRow_MPIBAIJ" 1352 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1353 { 1354 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1355 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1356 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1357 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1358 int *cmap,*idx_p,cstart = mat->cstart; 1359 1360 PetscFunctionBegin; 1361 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1362 mat->getrowactive = PETSC_TRUE; 1363 1364 if (!mat->rowvalues && (idx || v)) { 1365 /* 1366 allocate enough space to hold information from the longest row. 1367 */ 1368 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1369 int max = 1,mbs = mat->mbs,tmp; 1370 for (i=0; i<mbs; i++) { 1371 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1372 if (max < tmp) { max = tmp; } 1373 } 1374 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)),&mat->rowvalues);CHKERRQ(ierr); 1375 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1376 } 1377 1378 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1379 lrow = row - brstart; 1380 1381 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1382 if (!v) {pvA = 0; pvB = 0;} 1383 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1384 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1385 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1386 nztot = nzA + nzB; 1387 1388 cmap = mat->garray; 1389 if (v || idx) { 1390 if (nztot) { 1391 /* Sort by increasing column numbers, assuming A and B already sorted */ 1392 int imark = -1; 1393 if (v) { 1394 *v = v_p = mat->rowvalues; 1395 for (i=0; i<nzB; i++) { 1396 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1397 else break; 1398 } 1399 imark = i; 1400 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1401 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1402 } 1403 if (idx) { 1404 *idx = idx_p = mat->rowindices; 1405 if (imark > -1) { 1406 for (i=0; i<imark; i++) { 1407 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1408 } 1409 } else { 1410 for (i=0; i<nzB; i++) { 1411 if (cmap[cworkB[i]/bs] < cstart) 1412 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1413 else break; 1414 } 1415 imark = i; 1416 } 1417 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1418 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1419 } 1420 } else { 1421 if (idx) *idx = 0; 1422 if (v) *v = 0; 1423 } 1424 } 1425 *nz = nztot; 1426 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1427 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNC__ 1432 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1433 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1434 { 1435 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1436 1437 PetscFunctionBegin; 1438 if (baij->getrowactive == PETSC_FALSE) { 1439 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1440 } 1441 baij->getrowactive = PETSC_FALSE; 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNC__ 1446 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1447 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1448 { 1449 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1450 1451 PetscFunctionBegin; 1452 *bs = baij->bs; 1453 PetscFunctionReturn(0); 1454 } 1455 1456 #undef __FUNC__ 1457 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1458 int MatZeroEntries_MPIBAIJ(Mat A) 1459 { 1460 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1461 int ierr; 1462 1463 PetscFunctionBegin; 1464 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1465 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNC__ 1470 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1471 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1472 { 1473 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1474 Mat A = a->A,B = a->B; 1475 int ierr; 1476 PetscReal isend[5],irecv[5]; 1477 1478 PetscFunctionBegin; 1479 info->block_size = (double)a->bs; 1480 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1481 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1482 isend[3] = info->memory; isend[4] = info->mallocs; 1483 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1484 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1485 isend[3] += info->memory; isend[4] += info->mallocs; 1486 if (flag == MAT_LOCAL) { 1487 info->nz_used = isend[0]; 1488 info->nz_allocated = isend[1]; 1489 info->nz_unneeded = isend[2]; 1490 info->memory = isend[3]; 1491 info->mallocs = isend[4]; 1492 } else if (flag == MAT_GLOBAL_MAX) { 1493 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1494 info->nz_used = irecv[0]; 1495 info->nz_allocated = irecv[1]; 1496 info->nz_unneeded = irecv[2]; 1497 info->memory = irecv[3]; 1498 info->mallocs = irecv[4]; 1499 } else if (flag == MAT_GLOBAL_SUM) { 1500 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1501 info->nz_used = irecv[0]; 1502 info->nz_allocated = irecv[1]; 1503 info->nz_unneeded = irecv[2]; 1504 info->memory = irecv[3]; 1505 info->mallocs = irecv[4]; 1506 } else { 1507 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1508 } 1509 info->rows_global = (double)A->M; 1510 info->columns_global = (double)A->N; 1511 info->rows_local = (double)A->m; 1512 info->columns_local = (double)A->N; 1513 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1514 info->fill_ratio_needed = 0; 1515 info->factor_mallocs = 0; 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #undef __FUNC__ 1520 #define __FUNC__ "MatSetOption_MPIBAIJ" 1521 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1522 { 1523 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1524 int ierr; 1525 1526 PetscFunctionBegin; 1527 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1528 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1529 op == MAT_COLUMNS_UNSORTED || 1530 op == MAT_COLUMNS_SORTED || 1531 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1532 op == MAT_KEEP_ZEROED_ROWS || 1533 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1534 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1535 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1536 } else if (op == MAT_ROW_ORIENTED) { 1537 a->roworiented = PETSC_TRUE; 1538 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1539 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1540 } else if (op == MAT_ROWS_SORTED || 1541 op == MAT_ROWS_UNSORTED || 1542 op == MAT_SYMMETRIC || 1543 op == MAT_STRUCTURALLY_SYMMETRIC || 1544 op == MAT_YES_NEW_DIAGONALS || 1545 op == MAT_USE_HASH_TABLE) { 1546 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1547 } else if (op == MAT_COLUMN_ORIENTED) { 1548 a->roworiented = PETSC_FALSE; 1549 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1550 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1551 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1552 a->donotstash = PETSC_TRUE; 1553 } else if (op == MAT_NO_NEW_DIAGONALS) { 1554 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1555 } else if (op == MAT_USE_HASH_TABLE) { 1556 a->ht_flag = PETSC_TRUE; 1557 } else { 1558 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1559 } 1560 PetscFunctionReturn(0); 1561 } 1562 1563 #undef __FUNC__ 1564 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1565 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1566 { 1567 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1568 Mat_SeqBAIJ *Aloc; 1569 Mat B; 1570 int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1571 int bs=baij->bs,mbs=baij->mbs; 1572 MatScalar *a; 1573 1574 PetscFunctionBegin; 1575 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1576 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1577 1578 /* copy over the A part */ 1579 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1580 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1581 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1582 1583 for (i=0; i<mbs; i++) { 1584 rvals[0] = bs*(baij->rstart + i); 1585 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1586 for (j=ai[i]; j<ai[i+1]; j++) { 1587 col = (baij->cstart+aj[j])*bs; 1588 for (k=0; k<bs; k++) { 1589 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1590 col++; a += bs; 1591 } 1592 } 1593 } 1594 /* copy over the B part */ 1595 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1596 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1597 for (i=0; i<mbs; i++) { 1598 rvals[0] = bs*(baij->rstart + i); 1599 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1600 for (j=ai[i]; j<ai[i+1]; j++) { 1601 col = baij->garray[aj[j]]*bs; 1602 for (k=0; k<bs; k++) { 1603 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1604 col++; a += bs; 1605 } 1606 } 1607 } 1608 ierr = PetscFree(rvals);CHKERRQ(ierr); 1609 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1610 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1611 1612 if (matout) { 1613 *matout = B; 1614 } else { 1615 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNC__ 1621 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1622 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1623 { 1624 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1625 Mat a = baij->A,b = baij->B; 1626 int ierr,s1,s2,s3; 1627 1628 PetscFunctionBegin; 1629 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1630 if (rr) { 1631 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1632 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1633 /* Overlap communication with computation. */ 1634 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1635 } 1636 if (ll) { 1637 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1638 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1639 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1640 } 1641 /* scale the diagonal block */ 1642 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1643 1644 if (rr) { 1645 /* Do a scatter end and then right scale the off-diagonal block */ 1646 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1647 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1648 } 1649 1650 PetscFunctionReturn(0); 1651 } 1652 1653 #undef __FUNC__ 1654 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1655 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1656 { 1657 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1658 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1659 int *procs,*nprocs,j,idx,nsends,*work,row; 1660 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1661 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1662 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1663 MPI_Comm comm = A->comm; 1664 MPI_Request *send_waits,*recv_waits; 1665 MPI_Status recv_status,*send_status; 1666 IS istmp; 1667 PetscTruth found; 1668 1669 PetscFunctionBegin; 1670 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1671 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1672 1673 /* first count number of contributors to each processor */ 1674 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1675 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1676 procs = nprocs + size; 1677 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1678 for (i=0; i<N; i++) { 1679 idx = rows[i]; 1680 found = PETSC_FALSE; 1681 for (j=0; j<size; j++) { 1682 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1683 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 1684 } 1685 } 1686 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1687 } 1688 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1689 1690 /* inform other processors of number of messages and max length*/ 1691 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 1692 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1693 nmax = work[rank]; 1694 nrecvs = work[size+rank]; 1695 ierr = PetscFree(work);CHKERRQ(ierr); 1696 1697 /* post receives: */ 1698 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1699 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1700 for (i=0; i<nrecvs; i++) { 1701 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1702 } 1703 1704 /* do sends: 1705 1) starts[i] gives the starting index in svalues for stuff going to 1706 the ith processor 1707 */ 1708 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1709 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1710 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1711 starts[0] = 0; 1712 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1713 for (i=0; i<N; i++) { 1714 svalues[starts[owner[i]]++] = rows[i]; 1715 } 1716 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1717 1718 starts[0] = 0; 1719 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1720 count = 0; 1721 for (i=0; i<size; i++) { 1722 if (procs[i]) { 1723 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1724 } 1725 } 1726 ierr = PetscFree(starts);CHKERRQ(ierr); 1727 1728 base = owners[rank]*bs; 1729 1730 /* wait on receives */ 1731 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1732 source = lens + nrecvs; 1733 count = nrecvs; slen = 0; 1734 while (count) { 1735 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1736 /* unpack receives into our local space */ 1737 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1738 source[imdex] = recv_status.MPI_SOURCE; 1739 lens[imdex] = n; 1740 slen += n; 1741 count--; 1742 } 1743 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1744 1745 /* move the data into the send scatter */ 1746 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1747 count = 0; 1748 for (i=0; i<nrecvs; i++) { 1749 values = rvalues + i*nmax; 1750 for (j=0; j<lens[i]; j++) { 1751 lrows[count++] = values[j] - base; 1752 } 1753 } 1754 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1755 ierr = PetscFree(lens);CHKERRQ(ierr); 1756 ierr = PetscFree(owner);CHKERRQ(ierr); 1757 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1758 1759 /* actually zap the local rows */ 1760 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1761 PetscLogObjectParent(A,istmp); 1762 1763 /* 1764 Zero the required rows. If the "diagonal block" of the matrix 1765 is square and the user wishes to set the diagonal we use seperate 1766 code so that MatSetValues() is not called for each diagonal allocating 1767 new memory, thus calling lots of mallocs and slowing things down. 1768 1769 Contributed by: Mathew Knepley 1770 */ 1771 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1772 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1773 if (diag && (l->A->M == l->A->N)) { 1774 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1775 } else if (diag) { 1776 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1777 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1778 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1779 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1780 } 1781 for (i=0; i<slen; i++) { 1782 row = lrows[i] + rstart_bs; 1783 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1784 } 1785 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1786 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1787 } else { 1788 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1789 } 1790 1791 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1792 ierr = PetscFree(lrows);CHKERRQ(ierr); 1793 1794 /* wait on sends */ 1795 if (nsends) { 1796 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1797 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1798 ierr = PetscFree(send_status);CHKERRQ(ierr); 1799 } 1800 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1801 ierr = PetscFree(svalues);CHKERRQ(ierr); 1802 1803 PetscFunctionReturn(0); 1804 } 1805 1806 #undef __FUNC__ 1807 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1808 int MatPrintHelp_MPIBAIJ(Mat A) 1809 { 1810 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1811 MPI_Comm comm = A->comm; 1812 static int called = 0; 1813 int ierr; 1814 1815 PetscFunctionBegin; 1816 if (!a->rank) { 1817 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1818 } 1819 if (called) {PetscFunctionReturn(0);} else called = 1; 1820 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1821 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 #undef __FUNC__ 1826 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1827 int MatSetUnfactored_MPIBAIJ(Mat A) 1828 { 1829 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1830 int ierr; 1831 1832 PetscFunctionBegin; 1833 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1834 PetscFunctionReturn(0); 1835 } 1836 1837 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1838 1839 #undef __FUNC__ 1840 #define __FUNC__ "MatEqual_MPIBAIJ" 1841 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1842 { 1843 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1844 Mat a,b,c,d; 1845 PetscTruth flg; 1846 int ierr; 1847 1848 PetscFunctionBegin; 1849 ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg);CHKERRQ(ierr); 1850 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1851 a = matA->A; b = matA->B; 1852 c = matB->A; d = matB->B; 1853 1854 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1855 if (flg == PETSC_TRUE) { 1856 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1857 } 1858 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1859 PetscFunctionReturn(0); 1860 } 1861 1862 1863 #undef __FUNC__ 1864 #define __FUNC__ "MatSetUpPreallocation_MPIBAIJ" 1865 int MatSetUpPreallocation_MPIBAIJ(Mat A) 1866 { 1867 int ierr; 1868 1869 PetscFunctionBegin; 1870 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 /* -------------------------------------------------------------------*/ 1875 static struct _MatOps MatOps_Values = { 1876 MatSetValues_MPIBAIJ, 1877 MatGetRow_MPIBAIJ, 1878 MatRestoreRow_MPIBAIJ, 1879 MatMult_MPIBAIJ, 1880 MatMultAdd_MPIBAIJ, 1881 MatMultTranspose_MPIBAIJ, 1882 MatMultTransposeAdd_MPIBAIJ, 1883 0, 1884 0, 1885 0, 1886 0, 1887 0, 1888 0, 1889 0, 1890 MatTranspose_MPIBAIJ, 1891 MatGetInfo_MPIBAIJ, 1892 MatEqual_MPIBAIJ, 1893 MatGetDiagonal_MPIBAIJ, 1894 MatDiagonalScale_MPIBAIJ, 1895 MatNorm_MPIBAIJ, 1896 MatAssemblyBegin_MPIBAIJ, 1897 MatAssemblyEnd_MPIBAIJ, 1898 0, 1899 MatSetOption_MPIBAIJ, 1900 MatZeroEntries_MPIBAIJ, 1901 MatZeroRows_MPIBAIJ, 1902 0, 1903 0, 1904 0, 1905 0, 1906 MatSetUpPreallocation_MPIBAIJ, 1907 0, 1908 MatGetOwnershipRange_MPIBAIJ, 1909 0, 1910 0, 1911 0, 1912 0, 1913 MatDuplicate_MPIBAIJ, 1914 0, 1915 0, 1916 0, 1917 0, 1918 0, 1919 MatGetSubMatrices_MPIBAIJ, 1920 MatIncreaseOverlap_MPIBAIJ, 1921 MatGetValues_MPIBAIJ, 1922 0, 1923 MatPrintHelp_MPIBAIJ, 1924 MatScale_MPIBAIJ, 1925 0, 1926 0, 1927 0, 1928 MatGetBlockSize_MPIBAIJ, 1929 0, 1930 0, 1931 0, 1932 0, 1933 0, 1934 0, 1935 MatSetUnfactored_MPIBAIJ, 1936 0, 1937 MatSetValuesBlocked_MPIBAIJ, 1938 0, 1939 MatDestroy_MPIBAIJ, 1940 MatView_MPIBAIJ, 1941 MatGetMaps_Petsc, 1942 0, 1943 0, 1944 0, 1945 0, 1946 0, 1947 0, 1948 MatGetRowMax_MPIBAIJ}; 1949 1950 1951 EXTERN_C_BEGIN 1952 #undef __FUNC__ 1953 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1954 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1955 { 1956 PetscFunctionBegin; 1957 *a = ((Mat_MPIBAIJ *)A->data)->A; 1958 *iscopy = PETSC_FALSE; 1959 PetscFunctionReturn(0); 1960 } 1961 EXTERN_C_END 1962 1963 EXTERN_C_BEGIN 1964 #undef __FUNC__ 1965 #define __FUNC__ "MatCreate_MPIBAIJ" 1966 int MatCreate_MPIBAIJ(Mat B) 1967 { 1968 Mat_MPIBAIJ *b; 1969 int ierr,size; 1970 PetscTruth flg; 1971 1972 PetscFunctionBegin; 1973 1974 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1975 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 1976 B->data = (void*)b; 1977 1978 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 1979 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1980 B->mapping = 0; 1981 B->factor = 0; 1982 B->assembled = PETSC_FALSE; 1983 1984 B->insertmode = NOT_SET_VALUES; 1985 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1986 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1987 1988 /* build local table of row and column ownerships */ 1989 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1990 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1991 b->cowners = b->rowners + b->size + 2; 1992 b->rowners_bs = b->cowners + b->size + 2; 1993 1994 /* build cache for off array entries formed */ 1995 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1996 b->donotstash = PETSC_FALSE; 1997 b->colmap = PETSC_NULL; 1998 b->garray = PETSC_NULL; 1999 b->roworiented = PETSC_TRUE; 2000 2001 #if defined(PEYSC_USE_MAT_SINGLE) 2002 /* stuff for MatSetValues_XXX in single precision */ 2003 b->lensetvalues = 0; 2004 b->setvaluescopy = PETSC_NULL; 2005 #endif 2006 2007 /* stuff used in block assembly */ 2008 b->barray = 0; 2009 2010 /* stuff used for matrix vector multiply */ 2011 b->lvec = 0; 2012 b->Mvctx = 0; 2013 2014 /* stuff for MatGetRow() */ 2015 b->rowindices = 0; 2016 b->rowvalues = 0; 2017 b->getrowactive = PETSC_FALSE; 2018 2019 /* hash table stuff */ 2020 b->ht = 0; 2021 b->hd = 0; 2022 b->ht_size = 0; 2023 b->ht_flag = PETSC_FALSE; 2024 b->ht_fact = 0; 2025 b->ht_total_ct = 0; 2026 b->ht_insert_ct = 0; 2027 2028 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2029 if (flg) { 2030 double fact = 1.39; 2031 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2032 ierr = PetscOptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2033 if (fact <= 1.0) fact = 1.39; 2034 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2035 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2036 } 2037 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2038 "MatStoreValues_MPIBAIJ", 2039 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2040 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2041 "MatRetrieveValues_MPIBAIJ", 2042 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2043 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2044 "MatGetDiagonalBlock_MPIBAIJ", 2045 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2046 PetscFunctionReturn(0); 2047 } 2048 EXTERN_C_END 2049 2050 #undef __FUNC__ 2051 #define __FUNC__ "MatMPIBAIJSetPreallocation" 2052 /*@C 2053 MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2054 (block compressed row). For good matrix assembly performance 2055 the user should preallocate the matrix storage by setting the parameters 2056 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2057 performance can be increased by more than a factor of 50. 2058 2059 Collective on Mat 2060 2061 Input Parameters: 2062 + A - the matrix 2063 . bs - size of blockk 2064 . d_nz - number of block nonzeros per block row in diagonal portion of local 2065 submatrix (same for all local rows) 2066 . d_nnz - array containing the number of block nonzeros in the various block rows 2067 of the in diagonal portion of the local (possibly different for each block 2068 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2069 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2070 submatrix (same for all local rows). 2071 - o_nnz - array containing the number of nonzeros in the various block rows of the 2072 off-diagonal portion of the local submatrix (possibly different for 2073 each block row) or PETSC_NULL. 2074 2075 Output Parameter: 2076 2077 2078 Options Database Keys: 2079 . -mat_no_unroll - uses code that does not unroll the loops in the 2080 block calculations (much slower) 2081 . -mat_block_size - size of the blocks to use 2082 2083 Notes: 2084 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2085 than it must be used on all processors that share the object for that argument. 2086 2087 Storage Information: 2088 For a square global matrix we define each processor's diagonal portion 2089 to be its local rows and the corresponding columns (a square submatrix); 2090 each processor's off-diagonal portion encompasses the remainder of the 2091 local matrix (a rectangular submatrix). 2092 2093 The user can specify preallocated storage for the diagonal part of 2094 the local submatrix with either d_nz or d_nnz (not both). Set 2095 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2096 memory allocation. Likewise, specify preallocated storage for the 2097 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2098 2099 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2100 the figure below we depict these three local rows and all columns (0-11). 2101 2102 .vb 2103 0 1 2 3 4 5 6 7 8 9 10 11 2104 ------------------- 2105 row 3 | o o o d d d o o o o o o 2106 row 4 | o o o d d d o o o o o o 2107 row 5 | o o o d d d o o o o o o 2108 ------------------- 2109 .ve 2110 2111 Thus, any entries in the d locations are stored in the d (diagonal) 2112 submatrix, and any entries in the o locations are stored in the 2113 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2114 stored simply in the MATSEQBAIJ format for compressed row storage. 2115 2116 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2117 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2118 In general, for PDE problems in which most nonzeros are near the diagonal, 2119 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2120 or you will get TERRIBLE performance; see the users' manual chapter on 2121 matrices. 2122 2123 Level: intermediate 2124 2125 .keywords: matrix, block, aij, compressed row, sparse, parallel 2126 2127 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2128 @*/ 2129 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2130 { 2131 Mat_MPIBAIJ *b; 2132 int ierr,i; 2133 PetscTruth flg2; 2134 2135 PetscFunctionBegin; 2136 ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 2137 if (!flg2) PetscFunctionReturn(0); 2138 2139 B->preallocated = PETSC_TRUE; 2140 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2141 2142 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2143 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -1: value %d",d_nz); 2144 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -1: value %d",o_nz); 2145 if (d_nnz) { 2146 for (i=0; i<B->m/bs; i++) { 2147 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2148 } 2149 } 2150 if (o_nnz) { 2151 for (i=0; i<B->m/bs; i++) { 2152 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2153 } 2154 } 2155 2156 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2157 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2158 ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2159 ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2160 2161 b = (Mat_MPIBAIJ*)B->data; 2162 b->bs = bs; 2163 b->bs2 = bs*bs; 2164 b->mbs = B->m/bs; 2165 b->nbs = B->n/bs; 2166 b->Mbs = B->M/bs; 2167 b->Nbs = B->N/bs; 2168 2169 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2170 b->rowners[0] = 0; 2171 for (i=2; i<=b->size; i++) { 2172 b->rowners[i] += b->rowners[i-1]; 2173 } 2174 b->rstart = b->rowners[b->rank]; 2175 b->rend = b->rowners[b->rank+1]; 2176 2177 ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2178 b->cowners[0] = 0; 2179 for (i=2; i<=b->size; i++) { 2180 b->cowners[i] += b->cowners[i-1]; 2181 } 2182 b->cstart = b->cowners[b->rank]; 2183 b->cend = b->cowners[b->rank+1]; 2184 2185 for (i=0; i<=b->size; i++) { 2186 b->rowners_bs[i] = b->rowners[i]*bs; 2187 } 2188 b->rstart_bs = b->rstart*bs; 2189 b->rend_bs = b->rend*bs; 2190 b->cstart_bs = b->cstart*bs; 2191 b->cend_bs = b->cend*bs; 2192 2193 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2194 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2195 PetscLogObjectParent(B,b->A); 2196 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2197 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2198 PetscLogObjectParent(B,b->B); 2199 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2200 2201 PetscFunctionReturn(0); 2202 } 2203 2204 #undef __FUNC__ 2205 #define __FUNC__ "MatCreateMPIBAIJ" 2206 /*@C 2207 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2208 (block compressed row). For good matrix assembly performance 2209 the user should preallocate the matrix storage by setting the parameters 2210 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2211 performance can be increased by more than a factor of 50. 2212 2213 Collective on MPI_Comm 2214 2215 Input Parameters: 2216 + comm - MPI communicator 2217 . bs - size of blockk 2218 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2219 This value should be the same as the local size used in creating the 2220 y vector for the matrix-vector product y = Ax. 2221 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2222 This value should be the same as the local size used in creating the 2223 x vector for the matrix-vector product y = Ax. 2224 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2225 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2226 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2227 submatrix (same for all local rows) 2228 . d_nnz - array containing the number of nonzero blocks in the various block rows 2229 of the in diagonal portion of the local (possibly different for each block 2230 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2231 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2232 submatrix (same for all local rows). 2233 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2234 off-diagonal portion of the local submatrix (possibly different for 2235 each block row) or PETSC_NULL. 2236 2237 Output Parameter: 2238 . A - the matrix 2239 2240 Options Database Keys: 2241 . -mat_no_unroll - uses code that does not unroll the loops in the 2242 block calculations (much slower) 2243 . -mat_block_size - size of the blocks to use 2244 2245 Notes: 2246 A nonzero block is any block that as 1 or more nonzeros in it 2247 2248 The user MUST specify either the local or global matrix dimensions 2249 (possibly both). 2250 2251 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2252 than it must be used on all processors that share the object for that argument. 2253 2254 Storage Information: 2255 For a square global matrix we define each processor's diagonal portion 2256 to be its local rows and the corresponding columns (a square submatrix); 2257 each processor's off-diagonal portion encompasses the remainder of the 2258 local matrix (a rectangular submatrix). 2259 2260 The user can specify preallocated storage for the diagonal part of 2261 the local submatrix with either d_nz or d_nnz (not both). Set 2262 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2263 memory allocation. Likewise, specify preallocated storage for the 2264 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2265 2266 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2267 the figure below we depict these three local rows and all columns (0-11). 2268 2269 .vb 2270 0 1 2 3 4 5 6 7 8 9 10 11 2271 ------------------- 2272 row 3 | o o o d d d o o o o o o 2273 row 4 | o o o d d d o o o o o o 2274 row 5 | o o o d d d o o o o o o 2275 ------------------- 2276 .ve 2277 2278 Thus, any entries in the d locations are stored in the d (diagonal) 2279 submatrix, and any entries in the o locations are stored in the 2280 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2281 stored simply in the MATSEQBAIJ format for compressed row storage. 2282 2283 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2284 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2285 In general, for PDE problems in which most nonzeros are near the diagonal, 2286 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2287 or you will get TERRIBLE performance; see the users' manual chapter on 2288 matrices. 2289 2290 Level: intermediate 2291 2292 .keywords: matrix, block, aij, compressed row, sparse, parallel 2293 2294 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2295 @*/ 2296 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2297 { 2298 int ierr,size; 2299 2300 PetscFunctionBegin; 2301 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2302 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2303 if (size > 1) { 2304 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2305 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2306 } else { 2307 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2308 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2309 } 2310 PetscFunctionReturn(0); 2311 } 2312 2313 #undef __FUNC__ 2314 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2315 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2316 { 2317 Mat mat; 2318 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2319 int ierr,len=0; 2320 2321 PetscFunctionBegin; 2322 *newmat = 0; 2323 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2324 ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr); 2325 mat->preallocated = PETSC_TRUE; 2326 mat->assembled = PETSC_TRUE; 2327 a = (Mat_MPIBAIJ*)mat->data; 2328 a->bs = oldmat->bs; 2329 a->bs2 = oldmat->bs2; 2330 a->mbs = oldmat->mbs; 2331 a->nbs = oldmat->nbs; 2332 a->Mbs = oldmat->Mbs; 2333 a->Nbs = oldmat->Nbs; 2334 2335 a->rstart = oldmat->rstart; 2336 a->rend = oldmat->rend; 2337 a->cstart = oldmat->cstart; 2338 a->cend = oldmat->cend; 2339 a->size = oldmat->size; 2340 a->rank = oldmat->rank; 2341 a->donotstash = oldmat->donotstash; 2342 a->roworiented = oldmat->roworiented; 2343 a->rowindices = 0; 2344 a->rowvalues = 0; 2345 a->getrowactive = PETSC_FALSE; 2346 a->barray = 0; 2347 a->rstart_bs = oldmat->rstart_bs; 2348 a->rend_bs = oldmat->rend_bs; 2349 a->cstart_bs = oldmat->cstart_bs; 2350 a->cend_bs = oldmat->cend_bs; 2351 2352 /* hash table stuff */ 2353 a->ht = 0; 2354 a->hd = 0; 2355 a->ht_size = 0; 2356 a->ht_flag = oldmat->ht_flag; 2357 a->ht_fact = oldmat->ht_fact; 2358 a->ht_total_ct = 0; 2359 a->ht_insert_ct = 0; 2360 2361 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2362 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2363 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2364 if (oldmat->colmap) { 2365 #if defined (PETSC_USE_CTABLE) 2366 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2367 #else 2368 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2369 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2370 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2371 #endif 2372 } else a->colmap = 0; 2373 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2374 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2375 PetscLogObjectMemory(mat,len*sizeof(int)); 2376 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2377 } else a->garray = 0; 2378 2379 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2380 PetscLogObjectParent(mat,a->lvec); 2381 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2382 2383 PetscLogObjectParent(mat,a->Mvctx); 2384 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2385 PetscLogObjectParent(mat,a->A); 2386 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2387 PetscLogObjectParent(mat,a->B); 2388 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2389 *newmat = mat; 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #include "petscsys.h" 2394 2395 EXTERN_C_BEGIN 2396 #undef __FUNC__ 2397 #define __FUNC__ "MatLoad_MPIBAIJ" 2398 int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2399 { 2400 Mat A; 2401 int i,nz,ierr,j,rstart,rend,fd; 2402 Scalar *vals,*buf; 2403 MPI_Comm comm = ((PetscObject)viewer)->comm; 2404 MPI_Status status; 2405 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2406 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2407 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2408 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2409 int dcount,kmax,k,nzcount,tmp; 2410 2411 PetscFunctionBegin; 2412 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2413 2414 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2415 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2416 if (!rank) { 2417 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2418 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2419 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2420 if (header[3] < 0) { 2421 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2422 } 2423 } 2424 2425 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2426 M = header[1]; N = header[2]; 2427 2428 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2429 2430 /* 2431 This code adds extra rows to make sure the number of rows is 2432 divisible by the blocksize 2433 */ 2434 Mbs = M/bs; 2435 extra_rows = bs - M + bs*(Mbs); 2436 if (extra_rows == bs) extra_rows = 0; 2437 else Mbs++; 2438 if (extra_rows &&!rank) { 2439 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2440 } 2441 2442 /* determine ownership of all rows */ 2443 mbs = Mbs/size + ((Mbs % size) > rank); 2444 m = mbs*bs; 2445 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2446 browners = rowners + size + 1; 2447 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2448 rowners[0] = 0; 2449 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2450 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2451 rstart = rowners[rank]; 2452 rend = rowners[rank+1]; 2453 2454 /* distribute row lengths to all processors */ 2455 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2456 if (!rank) { 2457 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2458 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2459 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2460 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2461 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2462 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2463 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2464 } else { 2465 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2466 } 2467 2468 if (!rank) { 2469 /* calculate the number of nonzeros on each processor */ 2470 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2471 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2472 for (i=0; i<size; i++) { 2473 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2474 procsnz[i] += rowlengths[j]; 2475 } 2476 } 2477 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2478 2479 /* determine max buffer needed and allocate it */ 2480 maxnz = 0; 2481 for (i=0; i<size; i++) { 2482 maxnz = PetscMax(maxnz,procsnz[i]); 2483 } 2484 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2485 2486 /* read in my part of the matrix column indices */ 2487 nz = procsnz[0]; 2488 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2489 mycols = ibuf; 2490 if (size == 1) nz -= extra_rows; 2491 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2492 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2493 2494 /* read in every ones (except the last) and ship off */ 2495 for (i=1; i<size-1; i++) { 2496 nz = procsnz[i]; 2497 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2498 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2499 } 2500 /* read in the stuff for the last proc */ 2501 if (size != 1) { 2502 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2503 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2504 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2505 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2506 } 2507 ierr = PetscFree(cols);CHKERRQ(ierr); 2508 } else { 2509 /* determine buffer space needed for message */ 2510 nz = 0; 2511 for (i=0; i<m; i++) { 2512 nz += locrowlens[i]; 2513 } 2514 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2515 mycols = ibuf; 2516 /* receive message of column indices*/ 2517 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2518 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2519 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2520 } 2521 2522 /* loop over local rows, determining number of off diagonal entries */ 2523 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2524 odlens = dlens + (rend-rstart); 2525 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2526 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2527 masked1 = mask + Mbs; 2528 masked2 = masked1 + Mbs; 2529 rowcount = 0; nzcount = 0; 2530 for (i=0; i<mbs; i++) { 2531 dcount = 0; 2532 odcount = 0; 2533 for (j=0; j<bs; j++) { 2534 kmax = locrowlens[rowcount]; 2535 for (k=0; k<kmax; k++) { 2536 tmp = mycols[nzcount++]/bs; 2537 if (!mask[tmp]) { 2538 mask[tmp] = 1; 2539 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2540 else masked1[dcount++] = tmp; 2541 } 2542 } 2543 rowcount++; 2544 } 2545 2546 dlens[i] = dcount; 2547 odlens[i] = odcount; 2548 2549 /* zero out the mask elements we set */ 2550 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2551 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2552 } 2553 2554 /* create our matrix */ 2555 ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2556 A = *newmat; 2557 MatSetOption(A,MAT_COLUMNS_SORTED); 2558 2559 if (!rank) { 2560 ierr = PetscMalloc(maxnz*sizeof(Scalar),&buf);CHKERRQ(ierr); 2561 /* read in my part of the matrix numerical values */ 2562 nz = procsnz[0]; 2563 vals = buf; 2564 mycols = ibuf; 2565 if (size == 1) nz -= extra_rows; 2566 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2567 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2568 2569 /* insert into matrix */ 2570 jj = rstart*bs; 2571 for (i=0; i<m; i++) { 2572 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2573 mycols += locrowlens[i]; 2574 vals += locrowlens[i]; 2575 jj++; 2576 } 2577 /* read in other processors (except the last one) and ship out */ 2578 for (i=1; i<size-1; i++) { 2579 nz = procsnz[i]; 2580 vals = buf; 2581 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2582 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2583 } 2584 /* the last proc */ 2585 if (size != 1){ 2586 nz = procsnz[i] - extra_rows; 2587 vals = buf; 2588 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2589 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2590 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2591 } 2592 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2593 } else { 2594 /* receive numeric values */ 2595 ierr = PetscMalloc(nz*sizeof(Scalar),&buf);CHKERRQ(ierr); 2596 2597 /* receive message of values*/ 2598 vals = buf; 2599 mycols = ibuf; 2600 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2601 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2602 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2603 2604 /* insert into matrix */ 2605 jj = rstart*bs; 2606 for (i=0; i<m; i++) { 2607 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2608 mycols += locrowlens[i]; 2609 vals += locrowlens[i]; 2610 jj++; 2611 } 2612 } 2613 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2614 ierr = PetscFree(buf);CHKERRQ(ierr); 2615 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2616 ierr = PetscFree(rowners);CHKERRQ(ierr); 2617 ierr = PetscFree(dlens);CHKERRQ(ierr); 2618 ierr = PetscFree(mask);CHKERRQ(ierr); 2619 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2620 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2621 PetscFunctionReturn(0); 2622 } 2623 EXTERN_C_END 2624 2625 #undef __FUNC__ 2626 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2627 /*@ 2628 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2629 2630 Input Parameters: 2631 . mat - the matrix 2632 . fact - factor 2633 2634 Collective on Mat 2635 2636 Level: advanced 2637 2638 Notes: 2639 This can also be set by the command line option: -mat_use_hash_table fact 2640 2641 .keywords: matrix, hashtable, factor, HT 2642 2643 .seealso: MatSetOption() 2644 @*/ 2645 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2646 { 2647 Mat_MPIBAIJ *baij; 2648 int ierr; 2649 PetscTruth flg; 2650 2651 PetscFunctionBegin; 2652 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2653 ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg);CHKERRQ(ierr); 2654 if (!flg) { 2655 SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only."); 2656 } 2657 baij = (Mat_MPIBAIJ*)mat->data; 2658 baij->ht_fact = fact; 2659 PetscFunctionReturn(0); 2660 } 2661