1 /*$Id: mpibaij.c,v 1.217 2001/03/22 20:30:21 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN int DisAssemble_MPIBAIJ(Mat); 8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15 EXTERN int MatPrintHelp_SeqBAIJ(Mat); 16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,Scalar*); 17 18 /* UGLY, ugly, ugly 19 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 20 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 21 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 22 converts the entries into single precision and then calls ..._MatScalar() to put them 23 into the single precision data structures. 24 */ 25 #if defined(PETSC_USE_MAT_SINGLE) 26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 #else 32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 33 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 35 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 37 #endif 38 39 #undef __FUNC__ 40 #define __FUNC__ "MatGetRowMax_MPIBAIJ" 41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v) 42 { 43 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 44 int ierr,i; 45 Scalar *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<A->m; i++){ 58 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNC__ 70 #define __FUNC__ "MatStoreValues_MPIBAIJ" 71 int MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 int ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNC__ 85 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 86 int MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 int ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNC__ 105 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 106 static int CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 int nbs = B->nbs,i,bs=B->bs,ierr; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 120 PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (int*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (int*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNC__ 284 #define __FUNC__ "MatSetValues_MPIBAIJ" 285 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 int ierr,i,N = m*n; 289 MatScalar *vsingle; 290 291 PetscFunctionBegin; 292 if (N > b->setvalueslen) { 293 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 294 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 295 b->setvalueslen = N; 296 } 297 vsingle = b->setvaluescopy; 298 299 for (i=0; i<N; i++) { 300 vsingle[i] = v[i]; 301 } 302 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNC__ 307 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 309 { 310 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 311 int ierr,i,N = m*n*b->bs2; 312 MatScalar *vsingle; 313 314 PetscFunctionBegin; 315 if (N > b->setvalueslen) { 316 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 317 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 318 b->setvalueslen = N; 319 } 320 vsingle = b->setvaluescopy; 321 for (i=0; i<N; i++) { 322 vsingle[i] = v[i]; 323 } 324 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNC__ 329 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 331 { 332 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 333 int ierr,i,N = m*n; 334 MatScalar *vsingle; 335 336 PetscFunctionBegin; 337 if (N > b->setvalueslen) { 338 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 339 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 340 b->setvalueslen = N; 341 } 342 vsingle = b->setvaluescopy; 343 for (i=0; i<N; i++) { 344 vsingle[i] = v[i]; 345 } 346 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNC__ 351 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 353 { 354 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 355 int ierr,i,N = m*n*b->bs2; 356 MatScalar *vsingle; 357 358 PetscFunctionBegin; 359 if (N > b->setvalueslen) { 360 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 361 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 362 b->setvalueslen = N; 363 } 364 vsingle = b->setvaluescopy; 365 for (i=0; i<N; i++) { 366 vsingle[i] = v[i]; 367 } 368 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 #endif 372 373 #undef __FUNC__ 374 #define __FUNC__ "MatSetValues_MPIBAIJ" 375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 376 { 377 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 378 MatScalar value; 379 PetscTruth roworiented = baij->roworiented; 380 int ierr,i,j,row,col; 381 int rstart_orig=baij->rstart_bs; 382 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 383 int cend_orig=baij->cend_bs,bs=baij->bs; 384 385 /* Some Variables required in the macro */ 386 Mat A = baij->A; 387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 388 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 389 MatScalar *aa=a->a; 390 391 Mat B = baij->B; 392 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 393 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 394 MatScalar *ba=b->a; 395 396 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 397 int low,high,t,ridx,cidx,bs2=a->bs2; 398 MatScalar *ap,*bap; 399 400 PetscFunctionBegin; 401 for (i=0; i<m; i++) { 402 if (im[i] < 0) continue; 403 #if defined(PETSC_USE_BOPT_g) 404 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 405 #endif 406 if (im[i] >= rstart_orig && im[i] < rend_orig) { 407 row = im[i] - rstart_orig; 408 for (j=0; j<n; j++) { 409 if (in[j] >= cstart_orig && in[j] < cend_orig){ 410 col = in[j] - cstart_orig; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 413 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 414 } else if (in[j] < 0) continue; 415 #if defined(PETSC_USE_BOPT_g) 416 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 417 #endif 418 else { 419 if (mat->was_assembled) { 420 if (!baij->colmap) { 421 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 422 } 423 #if defined (PETSC_USE_CTABLE) 424 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 425 col = col - 1 + in[j]%bs; 426 #else 427 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 428 #endif 429 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 430 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 431 col = in[j]; 432 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 433 B = baij->B; 434 b = (Mat_SeqBAIJ*)(B)->data; 435 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 436 ba=b->a; 437 } 438 } else col = in[j]; 439 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 440 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 441 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 442 } 443 } 444 } else { 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNC__ 458 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 460 { 461 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 462 MatScalar *value,*barray=baij->barray; 463 PetscTruth roworiented = baij->roworiented; 464 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 465 int rend=baij->rend,cstart=baij->cstart,stepval; 466 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 467 468 PetscFunctionBegin; 469 if(!barray) { 470 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 471 baij->barray = barray; 472 } 473 474 if (roworiented) { 475 stepval = (n-1)*bs; 476 } else { 477 stepval = (m-1)*bs; 478 } 479 for (i=0; i<m; i++) { 480 if (im[i] < 0) continue; 481 #if defined(PETSC_USE_BOPT_g) 482 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs); 483 #endif 484 if (im[i] >= rstart && im[i] < rend) { 485 row = im[i] - rstart; 486 for (j=0; j<n; j++) { 487 /* If NumCol = 1 then a copy is not required */ 488 if ((roworiented) && (n == 1)) { 489 barray = v + i*bs2; 490 } else if((!roworiented) && (m == 1)) { 491 barray = v + j*bs2; 492 } else { /* Here a copy is required */ 493 if (roworiented) { 494 value = v + i*(stepval+bs)*bs + j*bs; 495 } else { 496 value = v + j*(stepval+bs)*bs + i*bs; 497 } 498 for (ii=0; ii<bs; ii++,value+=stepval) { 499 for (jj=0; jj<bs; jj++) { 500 *barray++ = *value++; 501 } 502 } 503 barray -=bs2; 504 } 505 506 if (in[j] >= cstart && in[j] < cend){ 507 col = in[j] - cstart; 508 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 509 } 510 else if (in[j] < 0) continue; 511 #if defined(PETSC_USE_BOPT_g) 512 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);} 513 #endif 514 else { 515 if (mat->was_assembled) { 516 if (!baij->colmap) { 517 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 518 } 519 520 #if defined(PETSC_USE_BOPT_g) 521 #if defined (PETSC_USE_CTABLE) 522 { int data; 523 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 524 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 525 } 526 #else 527 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 528 #endif 529 #endif 530 #if defined (PETSC_USE_CTABLE) 531 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 532 col = (col - 1)/bs; 533 #else 534 col = (baij->colmap[in[j]] - 1)/bs; 535 #endif 536 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 537 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 538 col = in[j]; 539 } 540 } 541 else col = in[j]; 542 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 543 } 544 } 545 } else { 546 if (!baij->donotstash) { 547 if (roworiented) { 548 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 PetscFunctionReturn(0); 556 } 557 558 #define HASH_KEY 0.6180339887 559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 562 #undef __FUNC__ 563 #define __FUNC__ "MatSetValues_MPIBAIJ_HT_MatScalar" 564 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 565 { 566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567 PetscTruth roworiented = baij->roworiented; 568 int ierr,i,j,row,col; 569 int rstart_orig=baij->rstart_bs; 570 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 571 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 572 PetscReal tmp; 573 MatScalar **HD = baij->hd,value; 574 #if defined(PETSC_USE_BOPT_g) 575 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 576 #endif 577 578 PetscFunctionBegin; 579 580 for (i=0; i<m; i++) { 581 #if defined(PETSC_USE_BOPT_g) 582 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 583 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 584 #endif 585 row = im[i]; 586 if (row >= rstart_orig && row < rend_orig) { 587 for (j=0; j<n; j++) { 588 col = in[j]; 589 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 590 /* Look up into the Hash Table */ 591 key = (row/bs)*Nbs+(col/bs)+1; 592 h1 = HASH(size,key,tmp); 593 594 595 idx = h1; 596 #if defined(PETSC_USE_BOPT_g) 597 insert_ct++; 598 total_ct++; 599 if (HT[idx] != key) { 600 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 601 if (idx == size) { 602 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 603 if (idx == h1) { 604 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 605 } 606 } 607 } 608 #else 609 if (HT[idx] != key) { 610 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 611 if (idx == size) { 612 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 613 if (idx == h1) { 614 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 615 } 616 } 617 } 618 #endif 619 /* A HASH table entry is found, so insert the values at the correct address */ 620 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 621 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 622 } 623 } else { 624 if (!baij->donotstash) { 625 if (roworiented) { 626 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 627 } else { 628 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 629 } 630 } 631 } 632 } 633 #if defined(PETSC_USE_BOPT_g) 634 baij->ht_total_ct = total_ct; 635 baij->ht_insert_ct = insert_ct; 636 #endif 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNC__ 641 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 642 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 643 { 644 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 645 PetscTruth roworiented = baij->roworiented; 646 int ierr,i,j,ii,jj,row,col; 647 int rstart=baij->rstart ; 648 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 649 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 650 PetscReal tmp; 651 MatScalar **HD = baij->hd,*baij_a; 652 MatScalar *v_t,*value; 653 #if defined(PETSC_USE_BOPT_g) 654 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 655 #endif 656 657 PetscFunctionBegin; 658 659 if (roworiented) { 660 stepval = (n-1)*bs; 661 } else { 662 stepval = (m-1)*bs; 663 } 664 for (i=0; i<m; i++) { 665 #if defined(PETSC_USE_BOPT_g) 666 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 667 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 668 #endif 669 row = im[i]; 670 v_t = v + i*bs2; 671 if (row >= rstart && row < rend) { 672 for (j=0; j<n; j++) { 673 col = in[j]; 674 675 /* Look up into the Hash Table */ 676 key = row*Nbs+col+1; 677 h1 = HASH(size,key,tmp); 678 679 idx = h1; 680 #if defined(PETSC_USE_BOPT_g) 681 total_ct++; 682 insert_ct++; 683 if (HT[idx] != key) { 684 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 685 if (idx == size) { 686 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 687 if (idx == h1) { 688 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 689 } 690 } 691 } 692 #else 693 if (HT[idx] != key) { 694 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 695 if (idx == size) { 696 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 697 if (idx == h1) { 698 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 699 } 700 } 701 } 702 #endif 703 baij_a = HD[idx]; 704 if (roworiented) { 705 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 706 /* value = v + (i*(stepval+bs)+j)*bs; */ 707 value = v_t; 708 v_t += bs; 709 if (addv == ADD_VALUES) { 710 for (ii=0; ii<bs; ii++,value+=stepval) { 711 for (jj=ii; jj<bs2; jj+=bs) { 712 baij_a[jj] += *value++; 713 } 714 } 715 } else { 716 for (ii=0; ii<bs; ii++,value+=stepval) { 717 for (jj=ii; jj<bs2; jj+=bs) { 718 baij_a[jj] = *value++; 719 } 720 } 721 } 722 } else { 723 value = v + j*(stepval+bs)*bs + i*bs; 724 if (addv == ADD_VALUES) { 725 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 726 for (jj=0; jj<bs; jj++) { 727 baij_a[jj] += *value++; 728 } 729 } 730 } else { 731 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 732 for (jj=0; jj<bs; jj++) { 733 baij_a[jj] = *value++; 734 } 735 } 736 } 737 } 738 } 739 } else { 740 if (!baij->donotstash) { 741 if (roworiented) { 742 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 743 } else { 744 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 745 } 746 } 747 } 748 } 749 #if defined(PETSC_USE_BOPT_g) 750 baij->ht_total_ct = total_ct; 751 baij->ht_insert_ct = insert_ct; 752 #endif 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNC__ 757 #define __FUNC__ "MatGetValues_MPIBAIJ" 758 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 759 { 760 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 761 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 762 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 763 764 PetscFunctionBegin; 765 for (i=0; i<m; i++) { 766 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 767 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 768 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 769 row = idxm[i] - bsrstart; 770 for (j=0; j<n; j++) { 771 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 772 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 773 if (idxn[j] >= bscstart && idxn[j] < bscend){ 774 col = idxn[j] - bscstart; 775 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 776 } else { 777 if (!baij->colmap) { 778 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 779 } 780 #if defined (PETSC_USE_CTABLE) 781 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 782 data --; 783 #else 784 data = baij->colmap[idxn[j]/bs]-1; 785 #endif 786 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 787 else { 788 col = data + idxn[j]%bs; 789 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 790 } 791 } 792 } 793 } else { 794 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 } 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNC__ 801 #define __FUNC__ "MatNorm_MPIBAIJ" 802 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 803 { 804 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 805 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 806 int ierr,i,bs2=baij->bs2; 807 PetscReal sum = 0.0; 808 MatScalar *v; 809 810 PetscFunctionBegin; 811 if (baij->size == 1) { 812 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 813 } else { 814 if (type == NORM_FROBENIUS) { 815 v = amat->a; 816 for (i=0; i<amat->nz*bs2; i++) { 817 #if defined(PETSC_USE_COMPLEX) 818 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 819 #else 820 sum += (*v)*(*v); v++; 821 #endif 822 } 823 v = bmat->a; 824 for (i=0; i<bmat->nz*bs2; i++) { 825 #if defined(PETSC_USE_COMPLEX) 826 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 827 #else 828 sum += (*v)*(*v); v++; 829 #endif 830 } 831 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 832 *norm = sqrt(*norm); 833 } else { 834 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 835 } 836 } 837 PetscFunctionReturn(0); 838 } 839 840 841 /* 842 Creates the hash table, and sets the table 843 This table is created only once. 844 If new entried need to be added to the matrix 845 then the hash table has to be destroyed and 846 recreated. 847 */ 848 #undef __FUNC__ 849 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 850 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 851 { 852 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853 Mat A = baij->A,B=baij->B; 854 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 855 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 856 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 857 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 858 int *HT,key; 859 MatScalar **HD; 860 PetscReal tmp; 861 #if defined(PETSC_USE_BOPT_g) 862 int ct=0,max=0; 863 #endif 864 865 PetscFunctionBegin; 866 baij->ht_size=(int)(factor*nz); 867 size = baij->ht_size; 868 869 if (baij->ht) { 870 PetscFunctionReturn(0); 871 } 872 873 /* Allocate Memory for Hash Table */ 874 ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 875 baij->ht = (int*)(baij->hd + size); 876 HD = baij->hd; 877 HT = baij->ht; 878 879 880 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 881 882 883 /* Loop Over A */ 884 for (i=0; i<a->mbs; i++) { 885 for (j=ai[i]; j<ai[i+1]; j++) { 886 row = i+rstart; 887 col = aj[j]+cstart; 888 889 key = row*Nbs + col + 1; 890 h1 = HASH(size,key,tmp); 891 for (k=0; k<size; k++){ 892 if (HT[(h1+k)%size] == 0.0) { 893 HT[(h1+k)%size] = key; 894 HD[(h1+k)%size] = a->a + j*bs2; 895 break; 896 #if defined(PETSC_USE_BOPT_g) 897 } else { 898 ct++; 899 #endif 900 } 901 } 902 #if defined(PETSC_USE_BOPT_g) 903 if (k> max) max = k; 904 #endif 905 } 906 } 907 /* Loop Over B */ 908 for (i=0; i<b->mbs; i++) { 909 for (j=bi[i]; j<bi[i+1]; j++) { 910 row = i+rstart; 911 col = garray[bj[j]]; 912 key = row*Nbs + col + 1; 913 h1 = HASH(size,key,tmp); 914 for (k=0; k<size; k++){ 915 if (HT[(h1+k)%size] == 0.0) { 916 HT[(h1+k)%size] = key; 917 HD[(h1+k)%size] = b->a + j*bs2; 918 break; 919 #if defined(PETSC_USE_BOPT_g) 920 } else { 921 ct++; 922 #endif 923 } 924 } 925 #if defined(PETSC_USE_BOPT_g) 926 if (k> max) max = k; 927 #endif 928 } 929 } 930 931 /* Print Summary */ 932 #if defined(PETSC_USE_BOPT_g) 933 for (i=0,j=0; i<size; i++) { 934 if (HT[i]) {j++;} 935 } 936 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 937 #endif 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNC__ 942 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 943 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 944 { 945 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 946 int ierr,nstash,reallocs; 947 InsertMode addv; 948 949 PetscFunctionBegin; 950 if (baij->donotstash) { 951 PetscFunctionReturn(0); 952 } 953 954 /* make sure all processors are either in INSERTMODE or ADDMODE */ 955 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 956 if (addv == (ADD_VALUES|INSERT_VALUES)) { 957 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 958 } 959 mat->insertmode = addv; /* in case this processor had no cache */ 960 961 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 962 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 963 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 964 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 965 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 966 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 967 PetscFunctionReturn(0); 968 } 969 970 #undef __FUNC__ 971 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 972 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 973 { 974 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 975 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 976 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 977 int *row,*col,other_disassembled; 978 PetscTruth r1,r2,r3; 979 MatScalar *val; 980 InsertMode addv = mat->insertmode; 981 982 PetscFunctionBegin; 983 if (!baij->donotstash) { 984 while (1) { 985 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 986 if (!flg) break; 987 988 for (i=0; i<n;) { 989 /* Now identify the consecutive vals belonging to the same row */ 990 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 991 if (j < n) ncols = j-i; 992 else ncols = n-i; 993 /* Now assemble all these values with a single function call */ 994 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 995 i = j; 996 } 997 } 998 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 999 /* Now process the block-stash. Since the values are stashed column-oriented, 1000 set the roworiented flag to column oriented, and after MatSetValues() 1001 restore the original flags */ 1002 r1 = baij->roworiented; 1003 r2 = a->roworiented; 1004 r3 = b->roworiented; 1005 baij->roworiented = PETSC_FALSE; 1006 a->roworiented = PETSC_FALSE; 1007 b->roworiented = PETSC_FALSE; 1008 while (1) { 1009 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1010 if (!flg) break; 1011 1012 for (i=0; i<n;) { 1013 /* Now identify the consecutive vals belonging to the same row */ 1014 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1015 if (j < n) ncols = j-i; 1016 else ncols = n-i; 1017 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1018 i = j; 1019 } 1020 } 1021 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1022 baij->roworiented = r1; 1023 a->roworiented = r2; 1024 b->roworiented = r3; 1025 } 1026 1027 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1028 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1029 1030 /* determine if any processor has disassembled, if so we must 1031 also disassemble ourselfs, in order that we may reassemble. */ 1032 /* 1033 if nonzero structure of submatrix B cannot change then we know that 1034 no processor disassembled thus we can skip this stuff 1035 */ 1036 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1037 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1038 if (mat->was_assembled && !other_disassembled) { 1039 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1040 } 1041 } 1042 1043 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1044 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1045 } 1046 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1047 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1048 1049 #if defined(PETSC_USE_BOPT_g) 1050 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1051 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 1052 baij->ht_total_ct = 0; 1053 baij->ht_insert_ct = 0; 1054 } 1055 #endif 1056 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1057 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1058 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1059 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1060 } 1061 1062 if (baij->rowvalues) { 1063 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1064 baij->rowvalues = 0; 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNC__ 1070 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1071 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1072 { 1073 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1074 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 1075 PetscTruth isascii,isdraw; 1076 PetscViewer sviewer; 1077 PetscViewerFormat format; 1078 1079 PetscFunctionBegin; 1080 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1081 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1082 if (isascii) { 1083 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1084 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 1085 MatInfo info; 1086 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1087 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1089 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1090 baij->bs,(int)info.memory);CHKERRQ(ierr); 1091 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1092 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1093 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1094 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1095 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1096 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1099 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 } 1103 1104 if (isdraw) { 1105 PetscDraw draw; 1106 PetscTruth isnull; 1107 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1108 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1109 } 1110 1111 if (size == 1) { 1112 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1113 } else { 1114 /* assemble the entire matrix onto first processor. */ 1115 Mat A; 1116 Mat_SeqBAIJ *Aloc; 1117 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1118 MatScalar *a; 1119 1120 if (!rank) { 1121 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1122 } else { 1123 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1124 } 1125 PetscLogObjectParent(mat,A); 1126 1127 /* copy over the A part */ 1128 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1129 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1130 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1131 1132 for (i=0; i<mbs; i++) { 1133 rvals[0] = bs*(baij->rstart + i); 1134 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1135 for (j=ai[i]; j<ai[i+1]; j++) { 1136 col = (baij->cstart+aj[j])*bs; 1137 for (k=0; k<bs; k++) { 1138 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1139 col++; a += bs; 1140 } 1141 } 1142 } 1143 /* copy over the B part */ 1144 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1145 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1146 for (i=0; i<mbs; i++) { 1147 rvals[0] = bs*(baij->rstart + i); 1148 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1149 for (j=ai[i]; j<ai[i+1]; j++) { 1150 col = baij->garray[aj[j]]*bs; 1151 for (k=0; k<bs; k++) { 1152 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1153 col++; a += bs; 1154 } 1155 } 1156 } 1157 ierr = PetscFree(rvals);CHKERRQ(ierr); 1158 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1159 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1160 /* 1161 Everyone has to call to draw the matrix since the graphics waits are 1162 synchronized across all processors that share the PetscDraw object 1163 */ 1164 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1165 if (!rank) { 1166 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1167 } 1168 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1169 ierr = MatDestroy(A);CHKERRQ(ierr); 1170 } 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNC__ 1175 #define __FUNC__ "MatView_MPIBAIJ" 1176 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1177 { 1178 int ierr; 1179 PetscTruth isascii,isdraw,issocket,isbinary; 1180 1181 PetscFunctionBegin; 1182 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1183 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1184 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1185 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1186 if (isascii || isdraw || issocket || isbinary) { 1187 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1188 } else { 1189 SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNC__ 1195 #define __FUNC__ "MatDestroy_MPIBAIJ" 1196 int MatDestroy_MPIBAIJ(Mat mat) 1197 { 1198 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1199 int ierr; 1200 1201 PetscFunctionBegin; 1202 #if defined(PETSC_USE_LOG) 1203 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 1204 #endif 1205 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1206 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1207 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1208 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1209 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1210 #if defined (PETSC_USE_CTABLE) 1211 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1212 #else 1213 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1214 #endif 1215 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1216 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1217 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1218 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1219 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1220 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1221 #if defined(PETSC_USE_MAT_SINGLE) 1222 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1223 #endif 1224 ierr = PetscFree(baij);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNC__ 1229 #define __FUNC__ "MatMult_MPIBAIJ" 1230 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1231 { 1232 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1233 int ierr,nt; 1234 1235 PetscFunctionBegin; 1236 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1237 if (nt != A->n) { 1238 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1239 } 1240 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1241 if (nt != A->m) { 1242 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1243 } 1244 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1245 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1246 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1247 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1248 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNC__ 1253 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1254 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1255 { 1256 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1257 int ierr; 1258 1259 PetscFunctionBegin; 1260 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1261 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1262 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1263 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNC__ 1268 #define __FUNC__ "MatMultTranspose_MPIBAIJ" 1269 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1270 { 1271 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1272 int ierr; 1273 1274 PetscFunctionBegin; 1275 /* do nondiagonal part */ 1276 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1277 /* send it on its way */ 1278 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1279 /* do local part */ 1280 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1281 /* receive remote parts: note this assumes the values are not actually */ 1282 /* inserted in yy until the next line, which is true for my implementation*/ 1283 /* but is not perhaps always true. */ 1284 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNC__ 1289 #define __FUNC__ "MatMultTransposeAdd_MPIBAIJ" 1290 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1291 { 1292 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1293 int ierr; 1294 1295 PetscFunctionBegin; 1296 /* do nondiagonal part */ 1297 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1298 /* send it on its way */ 1299 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1300 /* do local part */ 1301 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1302 /* receive remote parts: note this assumes the values are not actually */ 1303 /* inserted in yy until the next line, which is true for my implementation*/ 1304 /* but is not perhaps always true. */ 1305 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 /* 1310 This only works correctly for square matrices where the subblock A->A is the 1311 diagonal block 1312 */ 1313 #undef __FUNC__ 1314 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1315 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1316 { 1317 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1318 int ierr; 1319 1320 PetscFunctionBegin; 1321 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1322 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNC__ 1327 #define __FUNC__ "MatScale_MPIBAIJ" 1328 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1329 { 1330 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1331 int ierr; 1332 1333 PetscFunctionBegin; 1334 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1335 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNC__ 1340 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1341 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1342 { 1343 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1344 1345 PetscFunctionBegin; 1346 if (m) *m = mat->rstart_bs; 1347 if (n) *n = mat->rend_bs; 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNC__ 1352 #define __FUNC__ "MatGetRow_MPIBAIJ" 1353 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1354 { 1355 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1356 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1357 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1358 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1359 int *cmap,*idx_p,cstart = mat->cstart; 1360 1361 PetscFunctionBegin; 1362 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1363 mat->getrowactive = PETSC_TRUE; 1364 1365 if (!mat->rowvalues && (idx || v)) { 1366 /* 1367 allocate enough space to hold information from the longest row. 1368 */ 1369 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1370 int max = 1,mbs = mat->mbs,tmp; 1371 for (i=0; i<mbs; i++) { 1372 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1373 if (max < tmp) { max = tmp; } 1374 } 1375 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)),&mat->rowvalues);CHKERRQ(ierr); 1376 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1377 } 1378 1379 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1380 lrow = row - brstart; 1381 1382 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1383 if (!v) {pvA = 0; pvB = 0;} 1384 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1385 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1386 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1387 nztot = nzA + nzB; 1388 1389 cmap = mat->garray; 1390 if (v || idx) { 1391 if (nztot) { 1392 /* Sort by increasing column numbers, assuming A and B already sorted */ 1393 int imark = -1; 1394 if (v) { 1395 *v = v_p = mat->rowvalues; 1396 for (i=0; i<nzB; i++) { 1397 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1398 else break; 1399 } 1400 imark = i; 1401 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1402 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1403 } 1404 if (idx) { 1405 *idx = idx_p = mat->rowindices; 1406 if (imark > -1) { 1407 for (i=0; i<imark; i++) { 1408 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1409 } 1410 } else { 1411 for (i=0; i<nzB; i++) { 1412 if (cmap[cworkB[i]/bs] < cstart) 1413 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1414 else break; 1415 } 1416 imark = i; 1417 } 1418 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1419 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1420 } 1421 } else { 1422 if (idx) *idx = 0; 1423 if (v) *v = 0; 1424 } 1425 } 1426 *nz = nztot; 1427 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1428 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNC__ 1433 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1434 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1435 { 1436 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1437 1438 PetscFunctionBegin; 1439 if (baij->getrowactive == PETSC_FALSE) { 1440 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1441 } 1442 baij->getrowactive = PETSC_FALSE; 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNC__ 1447 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1448 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1449 { 1450 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1451 1452 PetscFunctionBegin; 1453 *bs = baij->bs; 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNC__ 1458 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1459 int MatZeroEntries_MPIBAIJ(Mat A) 1460 { 1461 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1462 int ierr; 1463 1464 PetscFunctionBegin; 1465 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1466 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNC__ 1471 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1472 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1473 { 1474 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1475 Mat A = a->A,B = a->B; 1476 int ierr; 1477 PetscReal isend[5],irecv[5]; 1478 1479 PetscFunctionBegin; 1480 info->block_size = (double)a->bs; 1481 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1482 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1483 isend[3] = info->memory; isend[4] = info->mallocs; 1484 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1485 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1486 isend[3] += info->memory; isend[4] += info->mallocs; 1487 if (flag == MAT_LOCAL) { 1488 info->nz_used = isend[0]; 1489 info->nz_allocated = isend[1]; 1490 info->nz_unneeded = isend[2]; 1491 info->memory = isend[3]; 1492 info->mallocs = isend[4]; 1493 } else if (flag == MAT_GLOBAL_MAX) { 1494 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1495 info->nz_used = irecv[0]; 1496 info->nz_allocated = irecv[1]; 1497 info->nz_unneeded = irecv[2]; 1498 info->memory = irecv[3]; 1499 info->mallocs = irecv[4]; 1500 } else if (flag == MAT_GLOBAL_SUM) { 1501 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1502 info->nz_used = irecv[0]; 1503 info->nz_allocated = irecv[1]; 1504 info->nz_unneeded = irecv[2]; 1505 info->memory = irecv[3]; 1506 info->mallocs = irecv[4]; 1507 } else { 1508 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1509 } 1510 info->rows_global = (double)A->M; 1511 info->columns_global = (double)A->N; 1512 info->rows_local = (double)A->m; 1513 info->columns_local = (double)A->N; 1514 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1515 info->fill_ratio_needed = 0; 1516 info->factor_mallocs = 0; 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNC__ 1521 #define __FUNC__ "MatSetOption_MPIBAIJ" 1522 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1523 { 1524 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1525 int ierr; 1526 1527 PetscFunctionBegin; 1528 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1529 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1530 op == MAT_COLUMNS_UNSORTED || 1531 op == MAT_COLUMNS_SORTED || 1532 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1533 op == MAT_KEEP_ZEROED_ROWS || 1534 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1535 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1536 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1537 } else if (op == MAT_ROW_ORIENTED) { 1538 a->roworiented = PETSC_TRUE; 1539 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1540 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1541 } else if (op == MAT_ROWS_SORTED || 1542 op == MAT_ROWS_UNSORTED || 1543 op == MAT_YES_NEW_DIAGONALS || 1544 op == MAT_USE_HASH_TABLE) { 1545 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1546 } else if (op == MAT_COLUMN_ORIENTED) { 1547 a->roworiented = PETSC_FALSE; 1548 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1549 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1550 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1551 a->donotstash = PETSC_TRUE; 1552 } else if (op == MAT_NO_NEW_DIAGONALS) { 1553 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1554 } else if (op == MAT_USE_HASH_TABLE) { 1555 a->ht_flag = PETSC_TRUE; 1556 } else { 1557 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1558 } 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNC__ 1563 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1564 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1565 { 1566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1567 Mat_SeqBAIJ *Aloc; 1568 Mat B; 1569 int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1570 int bs=baij->bs,mbs=baij->mbs; 1571 MatScalar *a; 1572 1573 PetscFunctionBegin; 1574 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1575 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1576 1577 /* copy over the A part */ 1578 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1579 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1580 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1581 1582 for (i=0; i<mbs; i++) { 1583 rvals[0] = bs*(baij->rstart + i); 1584 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1585 for (j=ai[i]; j<ai[i+1]; j++) { 1586 col = (baij->cstart+aj[j])*bs; 1587 for (k=0; k<bs; k++) { 1588 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1589 col++; a += bs; 1590 } 1591 } 1592 } 1593 /* copy over the B part */ 1594 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1595 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1596 for (i=0; i<mbs; i++) { 1597 rvals[0] = bs*(baij->rstart + i); 1598 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1599 for (j=ai[i]; j<ai[i+1]; j++) { 1600 col = baij->garray[aj[j]]*bs; 1601 for (k=0; k<bs; k++) { 1602 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1603 col++; a += bs; 1604 } 1605 } 1606 } 1607 ierr = PetscFree(rvals);CHKERRQ(ierr); 1608 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1609 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1610 1611 if (matout) { 1612 *matout = B; 1613 } else { 1614 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1615 } 1616 PetscFunctionReturn(0); 1617 } 1618 1619 #undef __FUNC__ 1620 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1621 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1622 { 1623 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1624 Mat a = baij->A,b = baij->B; 1625 int ierr,s1,s2,s3; 1626 1627 PetscFunctionBegin; 1628 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1629 if (rr) { 1630 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1631 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1632 /* Overlap communication with computation. */ 1633 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1634 } 1635 if (ll) { 1636 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1637 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1638 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1639 } 1640 /* scale the diagonal block */ 1641 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1642 1643 if (rr) { 1644 /* Do a scatter end and then right scale the off-diagonal block */ 1645 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1646 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1647 } 1648 1649 PetscFunctionReturn(0); 1650 } 1651 1652 #undef __FUNC__ 1653 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1654 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1655 { 1656 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1657 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1658 int *procs,*nprocs,j,idx,nsends,*work,row; 1659 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1660 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1661 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1662 MPI_Comm comm = A->comm; 1663 MPI_Request *send_waits,*recv_waits; 1664 MPI_Status recv_status,*send_status; 1665 IS istmp; 1666 PetscTruth found; 1667 1668 PetscFunctionBegin; 1669 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1670 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1671 1672 /* first count number of contributors to each processor */ 1673 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1674 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1675 procs = nprocs + size; 1676 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1677 for (i=0; i<N; i++) { 1678 idx = rows[i]; 1679 found = PETSC_FALSE; 1680 for (j=0; j<size; j++) { 1681 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1682 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 1683 } 1684 } 1685 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1686 } 1687 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1688 1689 /* inform other processors of number of messages and max length*/ 1690 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 1691 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1692 nmax = work[rank]; 1693 nrecvs = work[size+rank]; 1694 ierr = PetscFree(work);CHKERRQ(ierr); 1695 1696 /* post receives: */ 1697 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1698 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1699 for (i=0; i<nrecvs; i++) { 1700 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1701 } 1702 1703 /* do sends: 1704 1) starts[i] gives the starting index in svalues for stuff going to 1705 the ith processor 1706 */ 1707 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1708 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1709 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1710 starts[0] = 0; 1711 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1712 for (i=0; i<N; i++) { 1713 svalues[starts[owner[i]]++] = rows[i]; 1714 } 1715 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1716 1717 starts[0] = 0; 1718 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1719 count = 0; 1720 for (i=0; i<size; i++) { 1721 if (procs[i]) { 1722 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1723 } 1724 } 1725 ierr = PetscFree(starts);CHKERRQ(ierr); 1726 1727 base = owners[rank]*bs; 1728 1729 /* wait on receives */ 1730 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1731 source = lens + nrecvs; 1732 count = nrecvs; slen = 0; 1733 while (count) { 1734 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1735 /* unpack receives into our local space */ 1736 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1737 source[imdex] = recv_status.MPI_SOURCE; 1738 lens[imdex] = n; 1739 slen += n; 1740 count--; 1741 } 1742 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1743 1744 /* move the data into the send scatter */ 1745 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1746 count = 0; 1747 for (i=0; i<nrecvs; i++) { 1748 values = rvalues + i*nmax; 1749 for (j=0; j<lens[i]; j++) { 1750 lrows[count++] = values[j] - base; 1751 } 1752 } 1753 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1754 ierr = PetscFree(lens);CHKERRQ(ierr); 1755 ierr = PetscFree(owner);CHKERRQ(ierr); 1756 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1757 1758 /* actually zap the local rows */ 1759 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1760 PetscLogObjectParent(A,istmp); 1761 1762 /* 1763 Zero the required rows. If the "diagonal block" of the matrix 1764 is square and the user wishes to set the diagonal we use seperate 1765 code so that MatSetValues() is not called for each diagonal allocating 1766 new memory, thus calling lots of mallocs and slowing things down. 1767 1768 Contributed by: Mathew Knepley 1769 */ 1770 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1771 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1772 if (diag && (l->A->M == l->A->N)) { 1773 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1774 } else if (diag) { 1775 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1776 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1777 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1778 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1779 } 1780 for (i=0; i<slen; i++) { 1781 row = lrows[i] + rstart_bs; 1782 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1783 } 1784 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1785 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1786 } else { 1787 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1788 } 1789 1790 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1791 ierr = PetscFree(lrows);CHKERRQ(ierr); 1792 1793 /* wait on sends */ 1794 if (nsends) { 1795 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1796 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1797 ierr = PetscFree(send_status);CHKERRQ(ierr); 1798 } 1799 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1800 ierr = PetscFree(svalues);CHKERRQ(ierr); 1801 1802 PetscFunctionReturn(0); 1803 } 1804 1805 #undef __FUNC__ 1806 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1807 int MatPrintHelp_MPIBAIJ(Mat A) 1808 { 1809 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1810 MPI_Comm comm = A->comm; 1811 static int called = 0; 1812 int ierr; 1813 1814 PetscFunctionBegin; 1815 if (!a->rank) { 1816 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1817 } 1818 if (called) {PetscFunctionReturn(0);} else called = 1; 1819 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1820 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #undef __FUNC__ 1825 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1826 int MatSetUnfactored_MPIBAIJ(Mat A) 1827 { 1828 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1829 int ierr; 1830 1831 PetscFunctionBegin; 1832 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1833 PetscFunctionReturn(0); 1834 } 1835 1836 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1837 1838 #undef __FUNC__ 1839 #define __FUNC__ "MatEqual_MPIBAIJ" 1840 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1841 { 1842 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1843 Mat a,b,c,d; 1844 PetscTruth flg; 1845 int ierr; 1846 1847 PetscFunctionBegin; 1848 ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg);CHKERRQ(ierr); 1849 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1850 a = matA->A; b = matA->B; 1851 c = matB->A; d = matB->B; 1852 1853 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1854 if (flg == PETSC_TRUE) { 1855 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1856 } 1857 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1858 PetscFunctionReturn(0); 1859 } 1860 1861 1862 #undef __FUNC__ 1863 #define __FUNC__ "MatSetUpPreallocation_MPIBAIJ" 1864 int MatSetUpPreallocation_MPIBAIJ(Mat A) 1865 { 1866 int ierr; 1867 1868 PetscFunctionBegin; 1869 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1870 PetscFunctionReturn(0); 1871 } 1872 1873 /* -------------------------------------------------------------------*/ 1874 static struct _MatOps MatOps_Values = { 1875 MatSetValues_MPIBAIJ, 1876 MatGetRow_MPIBAIJ, 1877 MatRestoreRow_MPIBAIJ, 1878 MatMult_MPIBAIJ, 1879 MatMultAdd_MPIBAIJ, 1880 MatMultTranspose_MPIBAIJ, 1881 MatMultTransposeAdd_MPIBAIJ, 1882 0, 1883 0, 1884 0, 1885 0, 1886 0, 1887 0, 1888 0, 1889 MatTranspose_MPIBAIJ, 1890 MatGetInfo_MPIBAIJ, 1891 MatEqual_MPIBAIJ, 1892 MatGetDiagonal_MPIBAIJ, 1893 MatDiagonalScale_MPIBAIJ, 1894 MatNorm_MPIBAIJ, 1895 MatAssemblyBegin_MPIBAIJ, 1896 MatAssemblyEnd_MPIBAIJ, 1897 0, 1898 MatSetOption_MPIBAIJ, 1899 MatZeroEntries_MPIBAIJ, 1900 MatZeroRows_MPIBAIJ, 1901 0, 1902 0, 1903 0, 1904 0, 1905 MatSetUpPreallocation_MPIBAIJ, 1906 0, 1907 MatGetOwnershipRange_MPIBAIJ, 1908 0, 1909 0, 1910 0, 1911 0, 1912 MatDuplicate_MPIBAIJ, 1913 0, 1914 0, 1915 0, 1916 0, 1917 0, 1918 MatGetSubMatrices_MPIBAIJ, 1919 MatIncreaseOverlap_MPIBAIJ, 1920 MatGetValues_MPIBAIJ, 1921 0, 1922 MatPrintHelp_MPIBAIJ, 1923 MatScale_MPIBAIJ, 1924 0, 1925 0, 1926 0, 1927 MatGetBlockSize_MPIBAIJ, 1928 0, 1929 0, 1930 0, 1931 0, 1932 0, 1933 0, 1934 MatSetUnfactored_MPIBAIJ, 1935 0, 1936 MatSetValuesBlocked_MPIBAIJ, 1937 0, 1938 MatDestroy_MPIBAIJ, 1939 MatView_MPIBAIJ, 1940 MatGetMaps_Petsc, 1941 0, 1942 0, 1943 0, 1944 0, 1945 0, 1946 0, 1947 MatGetRowMax_MPIBAIJ}; 1948 1949 1950 EXTERN_C_BEGIN 1951 #undef __FUNC__ 1952 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1953 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1954 { 1955 PetscFunctionBegin; 1956 *a = ((Mat_MPIBAIJ *)A->data)->A; 1957 *iscopy = PETSC_FALSE; 1958 PetscFunctionReturn(0); 1959 } 1960 EXTERN_C_END 1961 1962 EXTERN_C_BEGIN 1963 #undef __FUNC__ 1964 #define __FUNC__ "MatCreate_MPIBAIJ" 1965 int MatCreate_MPIBAIJ(Mat B) 1966 { 1967 Mat_MPIBAIJ *b; 1968 int ierr; 1969 PetscTruth flg; 1970 1971 PetscFunctionBegin; 1972 1973 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 1974 B->data = (void*)b; 1975 1976 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 1977 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1978 B->mapping = 0; 1979 B->factor = 0; 1980 B->assembled = PETSC_FALSE; 1981 1982 B->insertmode = NOT_SET_VALUES; 1983 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1984 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1985 1986 /* build local table of row and column ownerships */ 1987 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1988 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1989 b->cowners = b->rowners + b->size + 2; 1990 b->rowners_bs = b->cowners + b->size + 2; 1991 1992 /* build cache for off array entries formed */ 1993 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1994 b->donotstash = PETSC_FALSE; 1995 b->colmap = PETSC_NULL; 1996 b->garray = PETSC_NULL; 1997 b->roworiented = PETSC_TRUE; 1998 1999 #if defined(PETSC_USE_MAT_SINGLE) 2000 /* stuff for MatSetValues_XXX in single precision */ 2001 b->lensetvalues = 0; 2002 b->setvaluescopy = PETSC_NULL; 2003 #endif 2004 2005 /* stuff used in block assembly */ 2006 b->barray = 0; 2007 2008 /* stuff used for matrix vector multiply */ 2009 b->lvec = 0; 2010 b->Mvctx = 0; 2011 2012 /* stuff for MatGetRow() */ 2013 b->rowindices = 0; 2014 b->rowvalues = 0; 2015 b->getrowactive = PETSC_FALSE; 2016 2017 /* hash table stuff */ 2018 b->ht = 0; 2019 b->hd = 0; 2020 b->ht_size = 0; 2021 b->ht_flag = PETSC_FALSE; 2022 b->ht_fact = 0; 2023 b->ht_total_ct = 0; 2024 b->ht_insert_ct = 0; 2025 2026 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2027 if (flg) { 2028 double fact = 1.39; 2029 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2030 ierr = PetscOptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2031 if (fact <= 1.0) fact = 1.39; 2032 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2033 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2034 } 2035 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2036 "MatStoreValues_MPIBAIJ", 2037 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2038 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2039 "MatRetrieveValues_MPIBAIJ", 2040 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2041 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2042 "MatGetDiagonalBlock_MPIBAIJ", 2043 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2044 PetscFunctionReturn(0); 2045 } 2046 EXTERN_C_END 2047 2048 #undef __FUNC__ 2049 #define __FUNC__ "MatMPIBAIJSetPreallocation" 2050 /*@C 2051 MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2052 (block compressed row). For good matrix assembly performance 2053 the user should preallocate the matrix storage by setting the parameters 2054 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2055 performance can be increased by more than a factor of 50. 2056 2057 Collective on Mat 2058 2059 Input Parameters: 2060 + A - the matrix 2061 . bs - size of blockk 2062 . d_nz - number of block nonzeros per block row in diagonal portion of local 2063 submatrix (same for all local rows) 2064 . d_nnz - array containing the number of block nonzeros in the various block rows 2065 of the in diagonal portion of the local (possibly different for each block 2066 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2067 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2068 submatrix (same for all local rows). 2069 - o_nnz - array containing the number of nonzeros in the various block rows of the 2070 off-diagonal portion of the local submatrix (possibly different for 2071 each block row) or PETSC_NULL. 2072 2073 Output Parameter: 2074 2075 2076 Options Database Keys: 2077 . -mat_no_unroll - uses code that does not unroll the loops in the 2078 block calculations (much slower) 2079 . -mat_block_size - size of the blocks to use 2080 2081 Notes: 2082 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2083 than it must be used on all processors that share the object for that argument. 2084 2085 Storage Information: 2086 For a square global matrix we define each processor's diagonal portion 2087 to be its local rows and the corresponding columns (a square submatrix); 2088 each processor's off-diagonal portion encompasses the remainder of the 2089 local matrix (a rectangular submatrix). 2090 2091 The user can specify preallocated storage for the diagonal part of 2092 the local submatrix with either d_nz or d_nnz (not both). Set 2093 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2094 memory allocation. Likewise, specify preallocated storage for the 2095 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2096 2097 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2098 the figure below we depict these three local rows and all columns (0-11). 2099 2100 .vb 2101 0 1 2 3 4 5 6 7 8 9 10 11 2102 ------------------- 2103 row 3 | o o o d d d o o o o o o 2104 row 4 | o o o d d d o o o o o o 2105 row 5 | o o o d d d o o o o o o 2106 ------------------- 2107 .ve 2108 2109 Thus, any entries in the d locations are stored in the d (diagonal) 2110 submatrix, and any entries in the o locations are stored in the 2111 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2112 stored simply in the MATSEQBAIJ format for compressed row storage. 2113 2114 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2115 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2116 In general, for PDE problems in which most nonzeros are near the diagonal, 2117 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2118 or you will get TERRIBLE performance; see the users' manual chapter on 2119 matrices. 2120 2121 Level: intermediate 2122 2123 .keywords: matrix, block, aij, compressed row, sparse, parallel 2124 2125 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2126 @*/ 2127 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2128 { 2129 Mat_MPIBAIJ *b; 2130 int ierr,i; 2131 PetscTruth flg2; 2132 2133 PetscFunctionBegin; 2134 ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 2135 if (!flg2) PetscFunctionReturn(0); 2136 2137 B->preallocated = PETSC_TRUE; 2138 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2139 2140 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2141 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2142 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2143 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2144 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2145 if (d_nnz) { 2146 for (i=0; i<B->m/bs; i++) { 2147 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2148 } 2149 } 2150 if (o_nnz) { 2151 for (i=0; i<B->m/bs; i++) { 2152 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2153 } 2154 } 2155 2156 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2157 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2158 ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2159 ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2160 2161 b = (Mat_MPIBAIJ*)B->data; 2162 b->bs = bs; 2163 b->bs2 = bs*bs; 2164 b->mbs = B->m/bs; 2165 b->nbs = B->n/bs; 2166 b->Mbs = B->M/bs; 2167 b->Nbs = B->N/bs; 2168 2169 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2170 b->rowners[0] = 0; 2171 for (i=2; i<=b->size; i++) { 2172 b->rowners[i] += b->rowners[i-1]; 2173 } 2174 b->rstart = b->rowners[b->rank]; 2175 b->rend = b->rowners[b->rank+1]; 2176 2177 ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2178 b->cowners[0] = 0; 2179 for (i=2; i<=b->size; i++) { 2180 b->cowners[i] += b->cowners[i-1]; 2181 } 2182 b->cstart = b->cowners[b->rank]; 2183 b->cend = b->cowners[b->rank+1]; 2184 2185 for (i=0; i<=b->size; i++) { 2186 b->rowners_bs[i] = b->rowners[i]*bs; 2187 } 2188 b->rstart_bs = b->rstart*bs; 2189 b->rend_bs = b->rend*bs; 2190 b->cstart_bs = b->cstart*bs; 2191 b->cend_bs = b->cend*bs; 2192 2193 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2194 PetscLogObjectParent(B,b->A); 2195 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2196 PetscLogObjectParent(B,b->B); 2197 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2198 2199 PetscFunctionReturn(0); 2200 } 2201 2202 #undef __FUNC__ 2203 #define __FUNC__ "MatCreateMPIBAIJ" 2204 /*@C 2205 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2206 (block compressed row). For good matrix assembly performance 2207 the user should preallocate the matrix storage by setting the parameters 2208 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2209 performance can be increased by more than a factor of 50. 2210 2211 Collective on MPI_Comm 2212 2213 Input Parameters: 2214 + comm - MPI communicator 2215 . bs - size of blockk 2216 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2217 This value should be the same as the local size used in creating the 2218 y vector for the matrix-vector product y = Ax. 2219 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2220 This value should be the same as the local size used in creating the 2221 x vector for the matrix-vector product y = Ax. 2222 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2223 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2224 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2225 submatrix (same for all local rows) 2226 . d_nnz - array containing the number of nonzero blocks in the various block rows 2227 of the in diagonal portion of the local (possibly different for each block 2228 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2229 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2230 submatrix (same for all local rows). 2231 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2232 off-diagonal portion of the local submatrix (possibly different for 2233 each block row) or PETSC_NULL. 2234 2235 Output Parameter: 2236 . A - the matrix 2237 2238 Options Database Keys: 2239 . -mat_no_unroll - uses code that does not unroll the loops in the 2240 block calculations (much slower) 2241 . -mat_block_size - size of the blocks to use 2242 2243 Notes: 2244 A nonzero block is any block that as 1 or more nonzeros in it 2245 2246 The user MUST specify either the local or global matrix dimensions 2247 (possibly both). 2248 2249 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2250 than it must be used on all processors that share the object for that argument. 2251 2252 Storage Information: 2253 For a square global matrix we define each processor's diagonal portion 2254 to be its local rows and the corresponding columns (a square submatrix); 2255 each processor's off-diagonal portion encompasses the remainder of the 2256 local matrix (a rectangular submatrix). 2257 2258 The user can specify preallocated storage for the diagonal part of 2259 the local submatrix with either d_nz or d_nnz (not both). Set 2260 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2261 memory allocation. Likewise, specify preallocated storage for the 2262 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2263 2264 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2265 the figure below we depict these three local rows and all columns (0-11). 2266 2267 .vb 2268 0 1 2 3 4 5 6 7 8 9 10 11 2269 ------------------- 2270 row 3 | o o o d d d o o o o o o 2271 row 4 | o o o d d d o o o o o o 2272 row 5 | o o o d d d o o o o o o 2273 ------------------- 2274 .ve 2275 2276 Thus, any entries in the d locations are stored in the d (diagonal) 2277 submatrix, and any entries in the o locations are stored in the 2278 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2279 stored simply in the MATSEQBAIJ format for compressed row storage. 2280 2281 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2282 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2283 In general, for PDE problems in which most nonzeros are near the diagonal, 2284 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2285 or you will get TERRIBLE performance; see the users' manual chapter on 2286 matrices. 2287 2288 Level: intermediate 2289 2290 .keywords: matrix, block, aij, compressed row, sparse, parallel 2291 2292 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2293 @*/ 2294 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2295 { 2296 int ierr,size; 2297 2298 PetscFunctionBegin; 2299 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2300 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2301 if (size > 1) { 2302 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2303 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2304 } else { 2305 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2306 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2307 } 2308 PetscFunctionReturn(0); 2309 } 2310 2311 #undef __FUNC__ 2312 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2313 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2314 { 2315 Mat mat; 2316 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2317 int ierr,len=0; 2318 2319 PetscFunctionBegin; 2320 *newmat = 0; 2321 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2322 ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr); 2323 mat->preallocated = PETSC_TRUE; 2324 mat->assembled = PETSC_TRUE; 2325 a = (Mat_MPIBAIJ*)mat->data; 2326 a->bs = oldmat->bs; 2327 a->bs2 = oldmat->bs2; 2328 a->mbs = oldmat->mbs; 2329 a->nbs = oldmat->nbs; 2330 a->Mbs = oldmat->Mbs; 2331 a->Nbs = oldmat->Nbs; 2332 2333 a->rstart = oldmat->rstart; 2334 a->rend = oldmat->rend; 2335 a->cstart = oldmat->cstart; 2336 a->cend = oldmat->cend; 2337 a->size = oldmat->size; 2338 a->rank = oldmat->rank; 2339 a->donotstash = oldmat->donotstash; 2340 a->roworiented = oldmat->roworiented; 2341 a->rowindices = 0; 2342 a->rowvalues = 0; 2343 a->getrowactive = PETSC_FALSE; 2344 a->barray = 0; 2345 a->rstart_bs = oldmat->rstart_bs; 2346 a->rend_bs = oldmat->rend_bs; 2347 a->cstart_bs = oldmat->cstart_bs; 2348 a->cend_bs = oldmat->cend_bs; 2349 2350 /* hash table stuff */ 2351 a->ht = 0; 2352 a->hd = 0; 2353 a->ht_size = 0; 2354 a->ht_flag = oldmat->ht_flag; 2355 a->ht_fact = oldmat->ht_fact; 2356 a->ht_total_ct = 0; 2357 a->ht_insert_ct = 0; 2358 2359 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2360 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2361 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2362 if (oldmat->colmap) { 2363 #if defined (PETSC_USE_CTABLE) 2364 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2365 #else 2366 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2367 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2368 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2369 #endif 2370 } else a->colmap = 0; 2371 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2372 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2373 PetscLogObjectMemory(mat,len*sizeof(int)); 2374 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2375 } else a->garray = 0; 2376 2377 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2378 PetscLogObjectParent(mat,a->lvec); 2379 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2380 2381 PetscLogObjectParent(mat,a->Mvctx); 2382 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2383 PetscLogObjectParent(mat,a->A); 2384 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2385 PetscLogObjectParent(mat,a->B); 2386 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2387 *newmat = mat; 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #include "petscsys.h" 2392 2393 EXTERN_C_BEGIN 2394 #undef __FUNC__ 2395 #define __FUNC__ "MatLoad_MPIBAIJ" 2396 int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2397 { 2398 Mat A; 2399 int i,nz,ierr,j,rstart,rend,fd; 2400 Scalar *vals,*buf; 2401 MPI_Comm comm = ((PetscObject)viewer)->comm; 2402 MPI_Status status; 2403 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2404 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2405 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2406 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2407 int dcount,kmax,k,nzcount,tmp; 2408 2409 PetscFunctionBegin; 2410 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2411 2412 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2413 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2414 if (!rank) { 2415 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2416 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2417 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2418 if (header[3] < 0) { 2419 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2420 } 2421 } 2422 2423 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2424 M = header[1]; N = header[2]; 2425 2426 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2427 2428 /* 2429 This code adds extra rows to make sure the number of rows is 2430 divisible by the blocksize 2431 */ 2432 Mbs = M/bs; 2433 extra_rows = bs - M + bs*(Mbs); 2434 if (extra_rows == bs) extra_rows = 0; 2435 else Mbs++; 2436 if (extra_rows &&!rank) { 2437 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2438 } 2439 2440 /* determine ownership of all rows */ 2441 mbs = Mbs/size + ((Mbs % size) > rank); 2442 m = mbs*bs; 2443 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2444 browners = rowners + size + 1; 2445 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2446 rowners[0] = 0; 2447 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2448 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2449 rstart = rowners[rank]; 2450 rend = rowners[rank+1]; 2451 2452 /* distribute row lengths to all processors */ 2453 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2454 if (!rank) { 2455 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2456 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2457 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2458 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2459 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2460 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2461 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2462 } else { 2463 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2464 } 2465 2466 if (!rank) { 2467 /* calculate the number of nonzeros on each processor */ 2468 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2469 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2470 for (i=0; i<size; i++) { 2471 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2472 procsnz[i] += rowlengths[j]; 2473 } 2474 } 2475 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2476 2477 /* determine max buffer needed and allocate it */ 2478 maxnz = 0; 2479 for (i=0; i<size; i++) { 2480 maxnz = PetscMax(maxnz,procsnz[i]); 2481 } 2482 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2483 2484 /* read in my part of the matrix column indices */ 2485 nz = procsnz[0]; 2486 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2487 mycols = ibuf; 2488 if (size == 1) nz -= extra_rows; 2489 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2490 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2491 2492 /* read in every ones (except the last) and ship off */ 2493 for (i=1; i<size-1; i++) { 2494 nz = procsnz[i]; 2495 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2496 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2497 } 2498 /* read in the stuff for the last proc */ 2499 if (size != 1) { 2500 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2501 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2502 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2503 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2504 } 2505 ierr = PetscFree(cols);CHKERRQ(ierr); 2506 } else { 2507 /* determine buffer space needed for message */ 2508 nz = 0; 2509 for (i=0; i<m; i++) { 2510 nz += locrowlens[i]; 2511 } 2512 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2513 mycols = ibuf; 2514 /* receive message of column indices*/ 2515 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2516 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2517 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2518 } 2519 2520 /* loop over local rows, determining number of off diagonal entries */ 2521 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2522 odlens = dlens + (rend-rstart); 2523 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2524 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2525 masked1 = mask + Mbs; 2526 masked2 = masked1 + Mbs; 2527 rowcount = 0; nzcount = 0; 2528 for (i=0; i<mbs; i++) { 2529 dcount = 0; 2530 odcount = 0; 2531 for (j=0; j<bs; j++) { 2532 kmax = locrowlens[rowcount]; 2533 for (k=0; k<kmax; k++) { 2534 tmp = mycols[nzcount++]/bs; 2535 if (!mask[tmp]) { 2536 mask[tmp] = 1; 2537 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2538 else masked1[dcount++] = tmp; 2539 } 2540 } 2541 rowcount++; 2542 } 2543 2544 dlens[i] = dcount; 2545 odlens[i] = odcount; 2546 2547 /* zero out the mask elements we set */ 2548 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2549 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2550 } 2551 2552 /* create our matrix */ 2553 ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2554 A = *newmat; 2555 MatSetOption(A,MAT_COLUMNS_SORTED); 2556 2557 if (!rank) { 2558 ierr = PetscMalloc(maxnz*sizeof(Scalar),&buf);CHKERRQ(ierr); 2559 /* read in my part of the matrix numerical values */ 2560 nz = procsnz[0]; 2561 vals = buf; 2562 mycols = ibuf; 2563 if (size == 1) nz -= extra_rows; 2564 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2565 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2566 2567 /* insert into matrix */ 2568 jj = rstart*bs; 2569 for (i=0; i<m; i++) { 2570 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2571 mycols += locrowlens[i]; 2572 vals += locrowlens[i]; 2573 jj++; 2574 } 2575 /* read in other processors (except the last one) and ship out */ 2576 for (i=1; i<size-1; i++) { 2577 nz = procsnz[i]; 2578 vals = buf; 2579 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2580 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2581 } 2582 /* the last proc */ 2583 if (size != 1){ 2584 nz = procsnz[i] - extra_rows; 2585 vals = buf; 2586 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2587 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2588 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2589 } 2590 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2591 } else { 2592 /* receive numeric values */ 2593 ierr = PetscMalloc(nz*sizeof(Scalar),&buf);CHKERRQ(ierr); 2594 2595 /* receive message of values*/ 2596 vals = buf; 2597 mycols = ibuf; 2598 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2599 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2600 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2601 2602 /* insert into matrix */ 2603 jj = rstart*bs; 2604 for (i=0; i<m; i++) { 2605 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2606 mycols += locrowlens[i]; 2607 vals += locrowlens[i]; 2608 jj++; 2609 } 2610 } 2611 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2612 ierr = PetscFree(buf);CHKERRQ(ierr); 2613 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2614 ierr = PetscFree(rowners);CHKERRQ(ierr); 2615 ierr = PetscFree(dlens);CHKERRQ(ierr); 2616 ierr = PetscFree(mask);CHKERRQ(ierr); 2617 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2618 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2619 PetscFunctionReturn(0); 2620 } 2621 EXTERN_C_END 2622 2623 #undef __FUNC__ 2624 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2625 /*@ 2626 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2627 2628 Input Parameters: 2629 . mat - the matrix 2630 . fact - factor 2631 2632 Collective on Mat 2633 2634 Level: advanced 2635 2636 Notes: 2637 This can also be set by the command line option: -mat_use_hash_table fact 2638 2639 .keywords: matrix, hashtable, factor, HT 2640 2641 .seealso: MatSetOption() 2642 @*/ 2643 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2644 { 2645 Mat_MPIBAIJ *baij; 2646 int ierr; 2647 PetscTruth flg; 2648 2649 PetscFunctionBegin; 2650 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2651 ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg);CHKERRQ(ierr); 2652 if (!flg) { 2653 SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only."); 2654 } 2655 baij = (Mat_MPIBAIJ*)mat->data; 2656 baij->ht_fact = fact; 2657 PetscFunctionReturn(0); 2658 } 2659 2660 #undef __FUNC__ 2661 #define __FUNC__ "MatMPIBAIJGetSeqBAIJ" 2662 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2663 { 2664 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2665 PetscFunctionBegin; 2666 *Ad = a->A; 2667 *Ao = a->B; 2668 *colmap = a->garray; 2669 PetscFunctionReturn(0); 2670 } 2671