1 /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN int DisAssemble_MPIBAIJ(Mat); 8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *); 11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode); 12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**); 14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**); 15 EXTERN int MatPrintHelp_SeqBAIJ(Mat); 16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*); 17 18 /* UGLY, ugly, ugly 19 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 20 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 21 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 22 converts the entries into single precision and then calls ..._MatScalar() to put them 23 into the single precision data structures. 24 */ 25 #if defined(PETSC_USE_MAT_SINGLE) 26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 #else 32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 33 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 35 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 37 #endif 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v) 42 { 43 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 44 int ierr,i; 45 PetscScalar *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<A->m; i++){ 58 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 71 int MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 int ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 86 int MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 int ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNCT__ 105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 106 static int CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 int nbs = B->nbs,i,bs=B->bs,ierr; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 120 PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (int*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (int*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatSetValues_MPIBAIJ" 285 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 int ierr,i,N = m*n; 289 MatScalar *vsingle; 290 291 PetscFunctionBegin; 292 if (N > b->setvalueslen) { 293 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 294 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 295 b->setvalueslen = N; 296 } 297 vsingle = b->setvaluescopy; 298 299 for (i=0; i<N; i++) { 300 vsingle[i] = v[i]; 301 } 302 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 309 { 310 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 311 int ierr,i,N = m*n*b->bs2; 312 MatScalar *vsingle; 313 314 PetscFunctionBegin; 315 if (N > b->setvalueslen) { 316 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 317 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 318 b->setvalueslen = N; 319 } 320 vsingle = b->setvaluescopy; 321 for (i=0; i<N; i++) { 322 vsingle[i] = v[i]; 323 } 324 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 331 { 332 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 333 int ierr,i,N = m*n; 334 MatScalar *vsingle; 335 336 PetscFunctionBegin; 337 if (N > b->setvalueslen) { 338 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 339 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 340 b->setvalueslen = N; 341 } 342 vsingle = b->setvaluescopy; 343 for (i=0; i<N; i++) { 344 vsingle[i] = v[i]; 345 } 346 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 353 { 354 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 355 int ierr,i,N = m*n*b->bs2; 356 MatScalar *vsingle; 357 358 PetscFunctionBegin; 359 if (N > b->setvalueslen) { 360 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 361 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 362 b->setvalueslen = N; 363 } 364 vsingle = b->setvaluescopy; 365 for (i=0; i<N; i++) { 366 vsingle[i] = v[i]; 367 } 368 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 #endif 372 373 #undef __FUNCT__ 374 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 376 { 377 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 378 MatScalar value; 379 PetscTruth roworiented = baij->roworiented; 380 int ierr,i,j,row,col; 381 int rstart_orig=baij->rstart_bs; 382 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 383 int cend_orig=baij->cend_bs,bs=baij->bs; 384 385 /* Some Variables required in the macro */ 386 Mat A = baij->A; 387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 388 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 389 MatScalar *aa=a->a; 390 391 Mat B = baij->B; 392 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 393 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 394 MatScalar *ba=b->a; 395 396 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 397 int low,high,t,ridx,cidx,bs2=a->bs2; 398 MatScalar *ap,*bap; 399 400 PetscFunctionBegin; 401 for (i=0; i<m; i++) { 402 if (im[i] < 0) continue; 403 #if defined(PETSC_USE_BOPT_g) 404 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 405 #endif 406 if (im[i] >= rstart_orig && im[i] < rend_orig) { 407 row = im[i] - rstart_orig; 408 for (j=0; j<n; j++) { 409 if (in[j] >= cstart_orig && in[j] < cend_orig){ 410 col = in[j] - cstart_orig; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 413 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 414 } else if (in[j] < 0) continue; 415 #if defined(PETSC_USE_BOPT_g) 416 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 417 #endif 418 else { 419 if (mat->was_assembled) { 420 if (!baij->colmap) { 421 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 422 } 423 #if defined (PETSC_USE_CTABLE) 424 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 425 col = col - 1; 426 #else 427 col = baij->colmap[in[j]/bs] - 1; 428 #endif 429 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 430 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 431 col = in[j]; 432 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 433 B = baij->B; 434 b = (Mat_SeqBAIJ*)(B)->data; 435 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 436 ba=b->a; 437 } else col += in[j]%bs; 438 } else col = in[j]; 439 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 440 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 441 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 442 } 443 } 444 } else { 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 460 { 461 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 462 MatScalar *value,*barray=baij->barray; 463 PetscTruth roworiented = baij->roworiented; 464 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 465 int rend=baij->rend,cstart=baij->cstart,stepval; 466 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 467 468 PetscFunctionBegin; 469 if(!barray) { 470 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 471 baij->barray = barray; 472 } 473 474 if (roworiented) { 475 stepval = (n-1)*bs; 476 } else { 477 stepval = (m-1)*bs; 478 } 479 for (i=0; i<m; i++) { 480 if (im[i] < 0) continue; 481 #if defined(PETSC_USE_BOPT_g) 482 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs); 483 #endif 484 if (im[i] >= rstart && im[i] < rend) { 485 row = im[i] - rstart; 486 for (j=0; j<n; j++) { 487 /* If NumCol = 1 then a copy is not required */ 488 if ((roworiented) && (n == 1)) { 489 barray = v + i*bs2; 490 } else if((!roworiented) && (m == 1)) { 491 barray = v + j*bs2; 492 } else { /* Here a copy is required */ 493 if (roworiented) { 494 value = v + i*(stepval+bs)*bs + j*bs; 495 } else { 496 value = v + j*(stepval+bs)*bs + i*bs; 497 } 498 for (ii=0; ii<bs; ii++,value+=stepval) { 499 for (jj=0; jj<bs; jj++) { 500 *barray++ = *value++; 501 } 502 } 503 barray -=bs2; 504 } 505 506 if (in[j] >= cstart && in[j] < cend){ 507 col = in[j] - cstart; 508 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 509 } 510 else if (in[j] < 0) continue; 511 #if defined(PETSC_USE_BOPT_g) 512 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);} 513 #endif 514 else { 515 if (mat->was_assembled) { 516 if (!baij->colmap) { 517 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 518 } 519 520 #if defined(PETSC_USE_BOPT_g) 521 #if defined (PETSC_USE_CTABLE) 522 { int data; 523 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 524 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 525 } 526 #else 527 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 528 #endif 529 #endif 530 #if defined (PETSC_USE_CTABLE) 531 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 532 col = (col - 1)/bs; 533 #else 534 col = (baij->colmap[in[j]] - 1)/bs; 535 #endif 536 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 537 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 538 col = in[j]; 539 } 540 } 541 else col = in[j]; 542 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 543 } 544 } 545 } else { 546 if (!baij->donotstash) { 547 if (roworiented) { 548 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 PetscFunctionReturn(0); 556 } 557 558 #define HASH_KEY 0.6180339887 559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 564 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 565 { 566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567 PetscTruth roworiented = baij->roworiented; 568 int ierr,i,j,row,col; 569 int rstart_orig=baij->rstart_bs; 570 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 571 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 572 PetscReal tmp; 573 MatScalar **HD = baij->hd,value; 574 #if defined(PETSC_USE_BOPT_g) 575 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 576 #endif 577 578 PetscFunctionBegin; 579 580 for (i=0; i<m; i++) { 581 #if defined(PETSC_USE_BOPT_g) 582 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 583 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 584 #endif 585 row = im[i]; 586 if (row >= rstart_orig && row < rend_orig) { 587 for (j=0; j<n; j++) { 588 col = in[j]; 589 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 590 /* Look up into the Hash Table */ 591 key = (row/bs)*Nbs+(col/bs)+1; 592 h1 = HASH(size,key,tmp); 593 594 595 idx = h1; 596 #if defined(PETSC_USE_BOPT_g) 597 insert_ct++; 598 total_ct++; 599 if (HT[idx] != key) { 600 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 601 if (idx == size) { 602 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 603 if (idx == h1) { 604 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 605 } 606 } 607 } 608 #else 609 if (HT[idx] != key) { 610 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 611 if (idx == size) { 612 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 613 if (idx == h1) { 614 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 615 } 616 } 617 } 618 #endif 619 /* A HASH table entry is found, so insert the values at the correct address */ 620 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 621 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 622 } 623 } else { 624 if (!baij->donotstash) { 625 if (roworiented) { 626 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 627 } else { 628 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 629 } 630 } 631 } 632 } 633 #if defined(PETSC_USE_BOPT_g) 634 baij->ht_total_ct = total_ct; 635 baij->ht_insert_ct = insert_ct; 636 #endif 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 642 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 643 { 644 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 645 PetscTruth roworiented = baij->roworiented; 646 int ierr,i,j,ii,jj,row,col; 647 int rstart=baij->rstart ; 648 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 649 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 650 PetscReal tmp; 651 MatScalar **HD = baij->hd,*baij_a; 652 MatScalar *v_t,*value; 653 #if defined(PETSC_USE_BOPT_g) 654 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 655 #endif 656 657 PetscFunctionBegin; 658 659 if (roworiented) { 660 stepval = (n-1)*bs; 661 } else { 662 stepval = (m-1)*bs; 663 } 664 for (i=0; i<m; i++) { 665 #if defined(PETSC_USE_BOPT_g) 666 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 667 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 668 #endif 669 row = im[i]; 670 v_t = v + i*bs2; 671 if (row >= rstart && row < rend) { 672 for (j=0; j<n; j++) { 673 col = in[j]; 674 675 /* Look up into the Hash Table */ 676 key = row*Nbs+col+1; 677 h1 = HASH(size,key,tmp); 678 679 idx = h1; 680 #if defined(PETSC_USE_BOPT_g) 681 total_ct++; 682 insert_ct++; 683 if (HT[idx] != key) { 684 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 685 if (idx == size) { 686 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 687 if (idx == h1) { 688 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 689 } 690 } 691 } 692 #else 693 if (HT[idx] != key) { 694 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 695 if (idx == size) { 696 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 697 if (idx == h1) { 698 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 699 } 700 } 701 } 702 #endif 703 baij_a = HD[idx]; 704 if (roworiented) { 705 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 706 /* value = v + (i*(stepval+bs)+j)*bs; */ 707 value = v_t; 708 v_t += bs; 709 if (addv == ADD_VALUES) { 710 for (ii=0; ii<bs; ii++,value+=stepval) { 711 for (jj=ii; jj<bs2; jj+=bs) { 712 baij_a[jj] += *value++; 713 } 714 } 715 } else { 716 for (ii=0; ii<bs; ii++,value+=stepval) { 717 for (jj=ii; jj<bs2; jj+=bs) { 718 baij_a[jj] = *value++; 719 } 720 } 721 } 722 } else { 723 value = v + j*(stepval+bs)*bs + i*bs; 724 if (addv == ADD_VALUES) { 725 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 726 for (jj=0; jj<bs; jj++) { 727 baij_a[jj] += *value++; 728 } 729 } 730 } else { 731 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 732 for (jj=0; jj<bs; jj++) { 733 baij_a[jj] = *value++; 734 } 735 } 736 } 737 } 738 } 739 } else { 740 if (!baij->donotstash) { 741 if (roworiented) { 742 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 743 } else { 744 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 745 } 746 } 747 } 748 } 749 #if defined(PETSC_USE_BOPT_g) 750 baij->ht_total_ct = total_ct; 751 baij->ht_insert_ct = insert_ct; 752 #endif 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatGetValues_MPIBAIJ" 758 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 759 { 760 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 761 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 762 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 763 764 PetscFunctionBegin; 765 for (i=0; i<m; i++) { 766 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 767 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 768 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 769 row = idxm[i] - bsrstart; 770 for (j=0; j<n; j++) { 771 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 772 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 773 if (idxn[j] >= bscstart && idxn[j] < bscend){ 774 col = idxn[j] - bscstart; 775 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 776 } else { 777 if (!baij->colmap) { 778 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 779 } 780 #if defined (PETSC_USE_CTABLE) 781 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 782 data --; 783 #else 784 data = baij->colmap[idxn[j]/bs]-1; 785 #endif 786 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 787 else { 788 col = data + idxn[j]%bs; 789 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 790 } 791 } 792 } 793 } else { 794 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 } 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatNorm_MPIBAIJ" 802 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 803 { 804 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 805 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 806 int ierr,i,bs2=baij->bs2; 807 PetscReal sum = 0.0; 808 MatScalar *v; 809 810 PetscFunctionBegin; 811 if (baij->size == 1) { 812 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 813 } else { 814 if (type == NORM_FROBENIUS) { 815 v = amat->a; 816 for (i=0; i<amat->nz*bs2; i++) { 817 #if defined(PETSC_USE_COMPLEX) 818 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 819 #else 820 sum += (*v)*(*v); v++; 821 #endif 822 } 823 v = bmat->a; 824 for (i=0; i<bmat->nz*bs2; i++) { 825 #if defined(PETSC_USE_COMPLEX) 826 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 827 #else 828 sum += (*v)*(*v); v++; 829 #endif 830 } 831 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 832 *nrm = sqrt(*nrm); 833 } else { 834 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 835 } 836 } 837 PetscFunctionReturn(0); 838 } 839 840 841 /* 842 Creates the hash table, and sets the table 843 This table is created only once. 844 If new entried need to be added to the matrix 845 then the hash table has to be destroyed and 846 recreated. 847 */ 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 850 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 851 { 852 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853 Mat A = baij->A,B=baij->B; 854 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 855 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 856 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 857 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 858 int *HT,key; 859 MatScalar **HD; 860 PetscReal tmp; 861 #if defined(PETSC_USE_BOPT_g) 862 int ct=0,max=0; 863 #endif 864 865 PetscFunctionBegin; 866 baij->ht_size=(int)(factor*nz); 867 size = baij->ht_size; 868 869 if (baij->ht) { 870 PetscFunctionReturn(0); 871 } 872 873 /* Allocate Memory for Hash Table */ 874 ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 875 baij->ht = (int*)(baij->hd + size); 876 HD = baij->hd; 877 HT = baij->ht; 878 879 880 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr); 881 882 883 /* Loop Over A */ 884 for (i=0; i<a->mbs; i++) { 885 for (j=ai[i]; j<ai[i+1]; j++) { 886 row = i+rstart; 887 col = aj[j]+cstart; 888 889 key = row*Nbs + col + 1; 890 h1 = HASH(size,key,tmp); 891 for (k=0; k<size; k++){ 892 if (HT[(h1+k)%size] == 0.0) { 893 HT[(h1+k)%size] = key; 894 HD[(h1+k)%size] = a->a + j*bs2; 895 break; 896 #if defined(PETSC_USE_BOPT_g) 897 } else { 898 ct++; 899 #endif 900 } 901 } 902 #if defined(PETSC_USE_BOPT_g) 903 if (k> max) max = k; 904 #endif 905 } 906 } 907 /* Loop Over B */ 908 for (i=0; i<b->mbs; i++) { 909 for (j=bi[i]; j<bi[i+1]; j++) { 910 row = i+rstart; 911 col = garray[bj[j]]; 912 key = row*Nbs + col + 1; 913 h1 = HASH(size,key,tmp); 914 for (k=0; k<size; k++){ 915 if (HT[(h1+k)%size] == 0.0) { 916 HT[(h1+k)%size] = key; 917 HD[(h1+k)%size] = b->a + j*bs2; 918 break; 919 #if defined(PETSC_USE_BOPT_g) 920 } else { 921 ct++; 922 #endif 923 } 924 } 925 #if defined(PETSC_USE_BOPT_g) 926 if (k> max) max = k; 927 #endif 928 } 929 } 930 931 /* Print Summary */ 932 #if defined(PETSC_USE_BOPT_g) 933 for (i=0,j=0; i<size; i++) { 934 if (HT[i]) {j++;} 935 } 936 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 937 #endif 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 943 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 944 { 945 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 946 int ierr,nstash,reallocs; 947 InsertMode addv; 948 949 PetscFunctionBegin; 950 if (baij->donotstash) { 951 PetscFunctionReturn(0); 952 } 953 954 /* make sure all processors are either in INSERTMODE or ADDMODE */ 955 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 956 if (addv == (ADD_VALUES|INSERT_VALUES)) { 957 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 958 } 959 mat->insertmode = addv; /* in case this processor had no cache */ 960 961 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 962 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 963 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 964 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 965 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 966 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 967 PetscFunctionReturn(0); 968 } 969 970 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 973 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 974 { 975 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 976 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 977 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 978 int *row,*col,other_disassembled; 979 PetscTruth r1,r2,r3; 980 MatScalar *val; 981 InsertMode addv = mat->insertmode; 982 #if defined(PETSC_HAVE_DSCPACK) 983 PetscTruth flag; 984 #endif 985 986 PetscFunctionBegin; 987 if (!baij->donotstash) { 988 while (1) { 989 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 990 if (!flg) break; 991 992 for (i=0; i<n;) { 993 /* Now identify the consecutive vals belonging to the same row */ 994 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 995 if (j < n) ncols = j-i; 996 else ncols = n-i; 997 /* Now assemble all these values with a single function call */ 998 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 999 i = j; 1000 } 1001 } 1002 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1003 /* Now process the block-stash. Since the values are stashed column-oriented, 1004 set the roworiented flag to column oriented, and after MatSetValues() 1005 restore the original flags */ 1006 r1 = baij->roworiented; 1007 r2 = a->roworiented; 1008 r3 = b->roworiented; 1009 baij->roworiented = PETSC_FALSE; 1010 a->roworiented = PETSC_FALSE; 1011 b->roworiented = PETSC_FALSE; 1012 while (1) { 1013 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1014 if (!flg) break; 1015 1016 for (i=0; i<n;) { 1017 /* Now identify the consecutive vals belonging to the same row */ 1018 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1019 if (j < n) ncols = j-i; 1020 else ncols = n-i; 1021 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1022 i = j; 1023 } 1024 } 1025 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1026 baij->roworiented = r1; 1027 a->roworiented = r2; 1028 b->roworiented = r3; 1029 } 1030 1031 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1032 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1033 1034 /* determine if any processor has disassembled, if so we must 1035 also disassemble ourselfs, in order that we may reassemble. */ 1036 /* 1037 if nonzero structure of submatrix B cannot change then we know that 1038 no processor disassembled thus we can skip this stuff 1039 */ 1040 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1041 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1042 if (mat->was_assembled && !other_disassembled) { 1043 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1044 } 1045 } 1046 1047 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1048 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1049 } 1050 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1051 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1052 1053 #if defined(PETSC_USE_BOPT_g) 1054 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1055 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1056 baij->ht_total_ct = 0; 1057 baij->ht_insert_ct = 0; 1058 } 1059 #endif 1060 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1061 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1062 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1063 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1064 } 1065 1066 if (baij->rowvalues) { 1067 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1068 baij->rowvalues = 0; 1069 } 1070 #if defined(PETSC_HAVE_DSCPACK) 1071 ierr = PetscOptionsHasName(mat->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1072 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(mat);CHKERRQ(ierr); } 1073 #endif 1074 PetscFunctionReturn(0); 1075 } 1076 1077 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1081 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1082 { 1083 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1084 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 1085 PetscTruth isascii,isdraw; 1086 PetscViewer sviewer; 1087 PetscViewerFormat format; 1088 1089 PetscFunctionBegin; 1090 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1091 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1092 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1093 if (isascii) { 1094 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1095 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1096 MatInfo info; 1097 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1098 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1099 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1100 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1101 baij->bs,(int)info.memory);CHKERRQ(ierr); 1102 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1103 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1104 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1105 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1106 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1107 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1108 PetscFunctionReturn(0); 1109 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1110 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1113 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 1114 ierr = MatMPIBAIJFactorInfo_DSCPACK(mat,viewer);CHKERRQ(ierr); 1115 #endif 1116 PetscFunctionReturn(0); 1117 } 1118 } 1119 1120 if (isdraw) { 1121 PetscDraw draw; 1122 PetscTruth isnull; 1123 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1124 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1125 } 1126 1127 if (size == 1) { 1128 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1129 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1130 } else { 1131 /* assemble the entire matrix onto first processor. */ 1132 Mat A; 1133 Mat_SeqBAIJ *Aloc; 1134 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1135 MatScalar *a; 1136 1137 if (!rank) { 1138 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1139 } else { 1140 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1141 } 1142 PetscLogObjectParent(mat,A); 1143 1144 /* copy over the A part */ 1145 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1146 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1147 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1148 1149 for (i=0; i<mbs; i++) { 1150 rvals[0] = bs*(baij->rstart + i); 1151 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1152 for (j=ai[i]; j<ai[i+1]; j++) { 1153 col = (baij->cstart+aj[j])*bs; 1154 for (k=0; k<bs; k++) { 1155 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1156 col++; a += bs; 1157 } 1158 } 1159 } 1160 /* copy over the B part */ 1161 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1162 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1163 for (i=0; i<mbs; i++) { 1164 rvals[0] = bs*(baij->rstart + i); 1165 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1166 for (j=ai[i]; j<ai[i+1]; j++) { 1167 col = baij->garray[aj[j]]*bs; 1168 for (k=0; k<bs; k++) { 1169 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1170 col++; a += bs; 1171 } 1172 } 1173 } 1174 ierr = PetscFree(rvals);CHKERRQ(ierr); 1175 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1176 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1177 /* 1178 Everyone has to call to draw the matrix since the graphics waits are 1179 synchronized across all processors that share the PetscDraw object 1180 */ 1181 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1182 if (!rank) { 1183 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1184 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1185 } 1186 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1187 ierr = MatDestroy(A);CHKERRQ(ierr); 1188 } 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatView_MPIBAIJ" 1194 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1195 { 1196 int ierr; 1197 PetscTruth isascii,isdraw,issocket,isbinary; 1198 1199 PetscFunctionBegin; 1200 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1201 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1202 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1203 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1204 if (isascii || isdraw || issocket || isbinary) { 1205 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1206 } else { 1207 SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1214 int MatDestroy_MPIBAIJ(Mat mat) 1215 { 1216 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1217 int ierr; 1218 1219 PetscFunctionBegin; 1220 #if defined(PETSC_USE_LOG) 1221 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 1222 #endif 1223 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1224 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1225 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1226 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1227 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1228 #if defined (PETSC_USE_CTABLE) 1229 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1230 #else 1231 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1232 #endif 1233 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1234 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1235 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1236 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1237 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1238 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1239 #if defined(PETSC_USE_MAT_SINGLE) 1240 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1241 #endif 1242 ierr = PetscFree(baij);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "MatMult_MPIBAIJ" 1248 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1249 { 1250 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1251 int ierr,nt; 1252 1253 PetscFunctionBegin; 1254 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1255 if (nt != A->n) { 1256 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1257 } 1258 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1259 if (nt != A->m) { 1260 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1261 } 1262 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1263 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1264 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1265 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1266 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1272 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1273 { 1274 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1275 int ierr; 1276 1277 PetscFunctionBegin; 1278 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1279 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1280 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1281 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1287 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1288 { 1289 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1290 int ierr; 1291 1292 PetscFunctionBegin; 1293 /* do nondiagonal part */ 1294 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1295 /* send it on its way */ 1296 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1297 /* do local part */ 1298 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1299 /* receive remote parts: note this assumes the values are not actually */ 1300 /* inserted in yy until the next line, which is true for my implementation*/ 1301 /* but is not perhaps always true. */ 1302 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1308 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1309 { 1310 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1311 int ierr; 1312 1313 PetscFunctionBegin; 1314 /* do nondiagonal part */ 1315 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1316 /* send it on its way */ 1317 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1318 /* do local part */ 1319 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1320 /* receive remote parts: note this assumes the values are not actually */ 1321 /* inserted in yy until the next line, which is true for my implementation*/ 1322 /* but is not perhaps always true. */ 1323 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 /* 1328 This only works correctly for square matrices where the subblock A->A is the 1329 diagonal block 1330 */ 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1333 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1334 { 1335 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1336 int ierr; 1337 1338 PetscFunctionBegin; 1339 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1340 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatScale_MPIBAIJ" 1346 int MatScale_MPIBAIJ(PetscScalar *aa,Mat A) 1347 { 1348 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1349 int ierr; 1350 1351 PetscFunctionBegin; 1352 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1353 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1359 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1360 { 1361 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1362 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1363 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1364 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1365 int *cmap,*idx_p,cstart = mat->cstart; 1366 1367 PetscFunctionBegin; 1368 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1369 mat->getrowactive = PETSC_TRUE; 1370 1371 if (!mat->rowvalues && (idx || v)) { 1372 /* 1373 allocate enough space to hold information from the longest row. 1374 */ 1375 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1376 int max = 1,mbs = mat->mbs,tmp; 1377 for (i=0; i<mbs; i++) { 1378 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1379 if (max < tmp) { max = tmp; } 1380 } 1381 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1382 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1383 } 1384 1385 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1386 lrow = row - brstart; 1387 1388 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1389 if (!v) {pvA = 0; pvB = 0;} 1390 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1391 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1392 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1393 nztot = nzA + nzB; 1394 1395 cmap = mat->garray; 1396 if (v || idx) { 1397 if (nztot) { 1398 /* Sort by increasing column numbers, assuming A and B already sorted */ 1399 int imark = -1; 1400 if (v) { 1401 *v = v_p = mat->rowvalues; 1402 for (i=0; i<nzB; i++) { 1403 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1404 else break; 1405 } 1406 imark = i; 1407 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1408 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1409 } 1410 if (idx) { 1411 *idx = idx_p = mat->rowindices; 1412 if (imark > -1) { 1413 for (i=0; i<imark; i++) { 1414 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1415 } 1416 } else { 1417 for (i=0; i<nzB; i++) { 1418 if (cmap[cworkB[i]/bs] < cstart) 1419 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1420 else break; 1421 } 1422 imark = i; 1423 } 1424 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1425 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1426 } 1427 } else { 1428 if (idx) *idx = 0; 1429 if (v) *v = 0; 1430 } 1431 } 1432 *nz = nztot; 1433 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1434 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1440 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1441 { 1442 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1443 1444 PetscFunctionBegin; 1445 if (baij->getrowactive == PETSC_FALSE) { 1446 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1447 } 1448 baij->getrowactive = PETSC_FALSE; 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1454 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1455 { 1456 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1457 1458 PetscFunctionBegin; 1459 *bs = baij->bs; 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1465 int MatZeroEntries_MPIBAIJ(Mat A) 1466 { 1467 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1468 int ierr; 1469 1470 PetscFunctionBegin; 1471 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1472 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1478 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1479 { 1480 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1481 Mat A = a->A,B = a->B; 1482 int ierr; 1483 PetscReal isend[5],irecv[5]; 1484 1485 PetscFunctionBegin; 1486 info->block_size = (PetscReal)a->bs; 1487 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1488 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1489 isend[3] = info->memory; isend[4] = info->mallocs; 1490 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1491 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1492 isend[3] += info->memory; isend[4] += info->mallocs; 1493 if (flag == MAT_LOCAL) { 1494 info->nz_used = isend[0]; 1495 info->nz_allocated = isend[1]; 1496 info->nz_unneeded = isend[2]; 1497 info->memory = isend[3]; 1498 info->mallocs = isend[4]; 1499 } else if (flag == MAT_GLOBAL_MAX) { 1500 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1501 info->nz_used = irecv[0]; 1502 info->nz_allocated = irecv[1]; 1503 info->nz_unneeded = irecv[2]; 1504 info->memory = irecv[3]; 1505 info->mallocs = irecv[4]; 1506 } else if (flag == MAT_GLOBAL_SUM) { 1507 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1508 info->nz_used = irecv[0]; 1509 info->nz_allocated = irecv[1]; 1510 info->nz_unneeded = irecv[2]; 1511 info->memory = irecv[3]; 1512 info->mallocs = irecv[4]; 1513 } else { 1514 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1515 } 1516 info->rows_global = (PetscReal)A->M; 1517 info->columns_global = (PetscReal)A->N; 1518 info->rows_local = (PetscReal)A->m; 1519 info->columns_local = (PetscReal)A->N; 1520 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1521 info->fill_ratio_needed = 0; 1522 info->factor_mallocs = 0; 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1528 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1529 { 1530 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1531 int ierr; 1532 1533 PetscFunctionBegin; 1534 switch (op) { 1535 case MAT_NO_NEW_NONZERO_LOCATIONS: 1536 case MAT_YES_NEW_NONZERO_LOCATIONS: 1537 case MAT_COLUMNS_UNSORTED: 1538 case MAT_COLUMNS_SORTED: 1539 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1540 case MAT_KEEP_ZEROED_ROWS: 1541 case MAT_NEW_NONZERO_LOCATION_ERR: 1542 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1543 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1544 break; 1545 case MAT_ROW_ORIENTED: 1546 a->roworiented = PETSC_TRUE; 1547 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1548 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1549 break; 1550 case MAT_ROWS_SORTED: 1551 case MAT_ROWS_UNSORTED: 1552 case MAT_YES_NEW_DIAGONALS: 1553 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1554 break; 1555 case MAT_COLUMN_ORIENTED: 1556 a->roworiented = PETSC_FALSE; 1557 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1558 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1559 break; 1560 case MAT_IGNORE_OFF_PROC_ENTRIES: 1561 a->donotstash = PETSC_TRUE; 1562 break; 1563 case MAT_NO_NEW_DIAGONALS: 1564 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1565 case MAT_USE_HASH_TABLE: 1566 a->ht_flag = PETSC_TRUE; 1567 break; 1568 default: 1569 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1570 } 1571 PetscFunctionReturn(0); 1572 } 1573 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1576 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1577 { 1578 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1579 Mat_SeqBAIJ *Aloc; 1580 Mat B; 1581 int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1582 int bs=baij->bs,mbs=baij->mbs; 1583 MatScalar *a; 1584 1585 PetscFunctionBegin; 1586 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1587 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1588 1589 /* copy over the A part */ 1590 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1591 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1592 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1593 1594 for (i=0; i<mbs; i++) { 1595 rvals[0] = bs*(baij->rstart + i); 1596 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1597 for (j=ai[i]; j<ai[i+1]; j++) { 1598 col = (baij->cstart+aj[j])*bs; 1599 for (k=0; k<bs; k++) { 1600 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1601 col++; a += bs; 1602 } 1603 } 1604 } 1605 /* copy over the B part */ 1606 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1607 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1608 for (i=0; i<mbs; i++) { 1609 rvals[0] = bs*(baij->rstart + i); 1610 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1611 for (j=ai[i]; j<ai[i+1]; j++) { 1612 col = baij->garray[aj[j]]*bs; 1613 for (k=0; k<bs; k++) { 1614 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1615 col++; a += bs; 1616 } 1617 } 1618 } 1619 ierr = PetscFree(rvals);CHKERRQ(ierr); 1620 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1621 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1622 1623 if (matout) { 1624 *matout = B; 1625 } else { 1626 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1627 } 1628 PetscFunctionReturn(0); 1629 } 1630 1631 #undef __FUNCT__ 1632 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1633 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1634 { 1635 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1636 Mat a = baij->A,b = baij->B; 1637 int ierr,s1,s2,s3; 1638 1639 PetscFunctionBegin; 1640 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1641 if (rr) { 1642 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1643 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1644 /* Overlap communication with computation. */ 1645 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1646 } 1647 if (ll) { 1648 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1649 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1650 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1651 } 1652 /* scale the diagonal block */ 1653 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1654 1655 if (rr) { 1656 /* Do a scatter end and then right scale the off-diagonal block */ 1657 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1658 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1659 } 1660 1661 PetscFunctionReturn(0); 1662 } 1663 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1666 int MatZeroRows_MPIBAIJ(Mat A,IS is,PetscScalar *diag) 1667 { 1668 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1669 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1670 int *nprocs,j,idx,nsends,row; 1671 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1672 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1673 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1674 MPI_Comm comm = A->comm; 1675 MPI_Request *send_waits,*recv_waits; 1676 MPI_Status recv_status,*send_status; 1677 IS istmp; 1678 PetscTruth found; 1679 1680 PetscFunctionBegin; 1681 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1682 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1683 1684 /* first count number of contributors to each processor */ 1685 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1686 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1687 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1688 for (i=0; i<N; i++) { 1689 idx = rows[i]; 1690 found = PETSC_FALSE; 1691 for (j=0; j<size; j++) { 1692 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1693 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 1694 } 1695 } 1696 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1697 } 1698 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1699 1700 /* inform other processors of number of messages and max length*/ 1701 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1702 1703 /* post receives: */ 1704 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1705 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1706 for (i=0; i<nrecvs; i++) { 1707 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1708 } 1709 1710 /* do sends: 1711 1) starts[i] gives the starting index in svalues for stuff going to 1712 the ith processor 1713 */ 1714 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1715 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1716 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1717 starts[0] = 0; 1718 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1719 for (i=0; i<N; i++) { 1720 svalues[starts[owner[i]]++] = rows[i]; 1721 } 1722 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1723 1724 starts[0] = 0; 1725 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1726 count = 0; 1727 for (i=0; i<size; i++) { 1728 if (nprocs[2*i+1]) { 1729 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1730 } 1731 } 1732 ierr = PetscFree(starts);CHKERRQ(ierr); 1733 1734 base = owners[rank]*bs; 1735 1736 /* wait on receives */ 1737 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1738 source = lens + nrecvs; 1739 count = nrecvs; slen = 0; 1740 while (count) { 1741 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1742 /* unpack receives into our local space */ 1743 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1744 source[imdex] = recv_status.MPI_SOURCE; 1745 lens[imdex] = n; 1746 slen += n; 1747 count--; 1748 } 1749 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1750 1751 /* move the data into the send scatter */ 1752 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1753 count = 0; 1754 for (i=0; i<nrecvs; i++) { 1755 values = rvalues + i*nmax; 1756 for (j=0; j<lens[i]; j++) { 1757 lrows[count++] = values[j] - base; 1758 } 1759 } 1760 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1761 ierr = PetscFree(lens);CHKERRQ(ierr); 1762 ierr = PetscFree(owner);CHKERRQ(ierr); 1763 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1764 1765 /* actually zap the local rows */ 1766 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1767 PetscLogObjectParent(A,istmp); 1768 1769 /* 1770 Zero the required rows. If the "diagonal block" of the matrix 1771 is square and the user wishes to set the diagonal we use seperate 1772 code so that MatSetValues() is not called for each diagonal allocating 1773 new memory, thus calling lots of mallocs and slowing things down. 1774 1775 Contributed by: Mathew Knepley 1776 */ 1777 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1778 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1779 if (diag && (l->A->M == l->A->N)) { 1780 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1781 } else if (diag) { 1782 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1783 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1784 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1785 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1786 } 1787 for (i=0; i<slen; i++) { 1788 row = lrows[i] + rstart_bs; 1789 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1790 } 1791 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1792 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1793 } else { 1794 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1795 } 1796 1797 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1798 ierr = PetscFree(lrows);CHKERRQ(ierr); 1799 1800 /* wait on sends */ 1801 if (nsends) { 1802 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1803 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1804 ierr = PetscFree(send_status);CHKERRQ(ierr); 1805 } 1806 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1807 ierr = PetscFree(svalues);CHKERRQ(ierr); 1808 1809 PetscFunctionReturn(0); 1810 } 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1814 int MatPrintHelp_MPIBAIJ(Mat A) 1815 { 1816 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1817 MPI_Comm comm = A->comm; 1818 static int called = 0; 1819 int ierr; 1820 1821 PetscFunctionBegin; 1822 if (!a->rank) { 1823 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1824 } 1825 if (called) {PetscFunctionReturn(0);} else called = 1; 1826 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1827 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1828 PetscFunctionReturn(0); 1829 } 1830 1831 #undef __FUNCT__ 1832 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1833 int MatSetUnfactored_MPIBAIJ(Mat A) 1834 { 1835 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1836 int ierr; 1837 1838 PetscFunctionBegin; 1839 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1840 PetscFunctionReturn(0); 1841 } 1842 1843 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1844 1845 #undef __FUNCT__ 1846 #define __FUNCT__ "MatEqual_MPIBAIJ" 1847 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1848 { 1849 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1850 Mat a,b,c,d; 1851 PetscTruth flg; 1852 int ierr; 1853 1854 PetscFunctionBegin; 1855 a = matA->A; b = matA->B; 1856 c = matB->A; d = matB->B; 1857 1858 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1859 if (flg == PETSC_TRUE) { 1860 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1861 } 1862 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1863 PetscFunctionReturn(0); 1864 } 1865 1866 1867 #undef __FUNCT__ 1868 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1869 int MatSetUpPreallocation_MPIBAIJ(Mat A) 1870 { 1871 int ierr; 1872 1873 PetscFunctionBegin; 1874 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1875 PetscFunctionReturn(0); 1876 } 1877 1878 /* -------------------------------------------------------------------*/ 1879 static struct _MatOps MatOps_Values = { 1880 MatSetValues_MPIBAIJ, 1881 MatGetRow_MPIBAIJ, 1882 MatRestoreRow_MPIBAIJ, 1883 MatMult_MPIBAIJ, 1884 MatMultAdd_MPIBAIJ, 1885 MatMultTranspose_MPIBAIJ, 1886 MatMultTransposeAdd_MPIBAIJ, 1887 0, 1888 0, 1889 0, 1890 0, 1891 0, 1892 0, 1893 0, 1894 MatTranspose_MPIBAIJ, 1895 MatGetInfo_MPIBAIJ, 1896 MatEqual_MPIBAIJ, 1897 MatGetDiagonal_MPIBAIJ, 1898 MatDiagonalScale_MPIBAIJ, 1899 MatNorm_MPIBAIJ, 1900 MatAssemblyBegin_MPIBAIJ, 1901 MatAssemblyEnd_MPIBAIJ, 1902 0, 1903 MatSetOption_MPIBAIJ, 1904 MatZeroEntries_MPIBAIJ, 1905 MatZeroRows_MPIBAIJ, 1906 0, 1907 0, 1908 0, 1909 0, 1910 MatSetUpPreallocation_MPIBAIJ, 1911 0, 1912 0, 1913 0, 1914 0, 1915 MatDuplicate_MPIBAIJ, 1916 0, 1917 0, 1918 0, 1919 0, 1920 0, 1921 MatGetSubMatrices_MPIBAIJ, 1922 MatIncreaseOverlap_MPIBAIJ, 1923 MatGetValues_MPIBAIJ, 1924 0, 1925 MatPrintHelp_MPIBAIJ, 1926 MatScale_MPIBAIJ, 1927 0, 1928 0, 1929 0, 1930 MatGetBlockSize_MPIBAIJ, 1931 0, 1932 0, 1933 0, 1934 0, 1935 0, 1936 0, 1937 MatSetUnfactored_MPIBAIJ, 1938 0, 1939 MatSetValuesBlocked_MPIBAIJ, 1940 0, 1941 MatDestroy_MPIBAIJ, 1942 MatView_MPIBAIJ, 1943 MatGetPetscMaps_Petsc, 1944 0, 1945 0, 1946 0, 1947 0, 1948 0, 1949 0, 1950 MatGetRowMax_MPIBAIJ}; 1951 1952 1953 EXTERN_C_BEGIN 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 1956 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1957 { 1958 PetscFunctionBegin; 1959 *a = ((Mat_MPIBAIJ *)A->data)->A; 1960 *iscopy = PETSC_FALSE; 1961 PetscFunctionReturn(0); 1962 } 1963 EXTERN_C_END 1964 1965 EXTERN_C_BEGIN 1966 #undef __FUNCT__ 1967 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 1968 int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1969 { 1970 Mat_MPIBAIJ *b; 1971 int ierr,i; 1972 1973 PetscFunctionBegin; 1974 B->preallocated = PETSC_TRUE; 1975 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1976 1977 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1978 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1979 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1980 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1981 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1982 if (d_nnz) { 1983 for (i=0; i<B->m/bs; i++) { 1984 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1985 } 1986 } 1987 if (o_nnz) { 1988 for (i=0; i<B->m/bs; i++) { 1989 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1990 } 1991 } 1992 1993 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1994 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1995 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1996 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1997 1998 b = (Mat_MPIBAIJ*)B->data; 1999 b->bs = bs; 2000 b->bs2 = bs*bs; 2001 b->mbs = B->m/bs; 2002 b->nbs = B->n/bs; 2003 b->Mbs = B->M/bs; 2004 b->Nbs = B->N/bs; 2005 2006 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2007 b->rowners[0] = 0; 2008 for (i=2; i<=b->size; i++) { 2009 b->rowners[i] += b->rowners[i-1]; 2010 } 2011 b->rstart = b->rowners[b->rank]; 2012 b->rend = b->rowners[b->rank+1]; 2013 2014 ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2015 b->cowners[0] = 0; 2016 for (i=2; i<=b->size; i++) { 2017 b->cowners[i] += b->cowners[i-1]; 2018 } 2019 b->cstart = b->cowners[b->rank]; 2020 b->cend = b->cowners[b->rank+1]; 2021 2022 for (i=0; i<=b->size; i++) { 2023 b->rowners_bs[i] = b->rowners[i]*bs; 2024 } 2025 b->rstart_bs = b->rstart*bs; 2026 b->rend_bs = b->rend*bs; 2027 b->cstart_bs = b->cstart*bs; 2028 b->cend_bs = b->cend*bs; 2029 2030 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2031 PetscLogObjectParent(B,b->A); 2032 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2033 PetscLogObjectParent(B,b->B); 2034 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2035 2036 PetscFunctionReturn(0); 2037 } 2038 EXTERN_C_END 2039 2040 EXTERN_C_BEGIN 2041 EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2042 EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2043 EXTERN_C_END 2044 2045 EXTERN_C_BEGIN 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatCreate_MPIBAIJ" 2048 int MatCreate_MPIBAIJ(Mat B) 2049 { 2050 Mat_MPIBAIJ *b; 2051 int ierr; 2052 PetscTruth flg; 2053 2054 PetscFunctionBegin; 2055 2056 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2057 B->data = (void*)b; 2058 2059 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2060 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2061 B->mapping = 0; 2062 B->factor = 0; 2063 B->assembled = PETSC_FALSE; 2064 2065 B->insertmode = NOT_SET_VALUES; 2066 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2067 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2068 2069 /* build local table of row and column ownerships */ 2070 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 2071 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2072 b->cowners = b->rowners + b->size + 2; 2073 b->rowners_bs = b->cowners + b->size + 2; 2074 2075 /* build cache for off array entries formed */ 2076 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2077 b->donotstash = PETSC_FALSE; 2078 b->colmap = PETSC_NULL; 2079 b->garray = PETSC_NULL; 2080 b->roworiented = PETSC_TRUE; 2081 2082 #if defined(PETSC_USE_MAT_SINGLE) 2083 /* stuff for MatSetValues_XXX in single precision */ 2084 b->setvalueslen = 0; 2085 b->setvaluescopy = PETSC_NULL; 2086 #endif 2087 2088 /* stuff used in block assembly */ 2089 b->barray = 0; 2090 2091 /* stuff used for matrix vector multiply */ 2092 b->lvec = 0; 2093 b->Mvctx = 0; 2094 2095 /* stuff for MatGetRow() */ 2096 b->rowindices = 0; 2097 b->rowvalues = 0; 2098 b->getrowactive = PETSC_FALSE; 2099 2100 /* hash table stuff */ 2101 b->ht = 0; 2102 b->hd = 0; 2103 b->ht_size = 0; 2104 b->ht_flag = PETSC_FALSE; 2105 b->ht_fact = 0; 2106 b->ht_total_ct = 0; 2107 b->ht_insert_ct = 0; 2108 2109 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2110 if (flg) { 2111 PetscReal fact = 1.39; 2112 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2113 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2114 if (fact <= 1.0) fact = 1.39; 2115 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2116 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2117 } 2118 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2119 "MatStoreValues_MPIBAIJ", 2120 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2121 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2122 "MatRetrieveValues_MPIBAIJ", 2123 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2124 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2125 "MatGetDiagonalBlock_MPIBAIJ", 2126 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2127 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2128 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2129 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2130 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2131 "MatDiagonalScaleLocal_MPIBAIJ", 2132 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2133 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2134 "MatSetHashTableFactor_MPIBAIJ", 2135 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 EXTERN_C_END 2139 2140 #undef __FUNCT__ 2141 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2142 /*@C 2143 MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2144 (block compressed row). For good matrix assembly performance 2145 the user should preallocate the matrix storage by setting the parameters 2146 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2147 performance can be increased by more than a factor of 50. 2148 2149 Collective on Mat 2150 2151 Input Parameters: 2152 + A - the matrix 2153 . bs - size of blockk 2154 . d_nz - number of block nonzeros per block row in diagonal portion of local 2155 submatrix (same for all local rows) 2156 . d_nnz - array containing the number of block nonzeros in the various block rows 2157 of the in diagonal portion of the local (possibly different for each block 2158 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2159 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2160 submatrix (same for all local rows). 2161 - o_nnz - array containing the number of nonzeros in the various block rows of the 2162 off-diagonal portion of the local submatrix (possibly different for 2163 each block row) or PETSC_NULL. 2164 2165 Output Parameter: 2166 2167 2168 Options Database Keys: 2169 . -mat_no_unroll - uses code that does not unroll the loops in the 2170 block calculations (much slower) 2171 . -mat_block_size - size of the blocks to use 2172 2173 Notes: 2174 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2175 than it must be used on all processors that share the object for that argument. 2176 2177 Storage Information: 2178 For a square global matrix we define each processor's diagonal portion 2179 to be its local rows and the corresponding columns (a square submatrix); 2180 each processor's off-diagonal portion encompasses the remainder of the 2181 local matrix (a rectangular submatrix). 2182 2183 The user can specify preallocated storage for the diagonal part of 2184 the local submatrix with either d_nz or d_nnz (not both). Set 2185 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2186 memory allocation. Likewise, specify preallocated storage for the 2187 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2188 2189 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2190 the figure below we depict these three local rows and all columns (0-11). 2191 2192 .vb 2193 0 1 2 3 4 5 6 7 8 9 10 11 2194 ------------------- 2195 row 3 | o o o d d d o o o o o o 2196 row 4 | o o o d d d o o o o o o 2197 row 5 | o o o d d d o o o o o o 2198 ------------------- 2199 .ve 2200 2201 Thus, any entries in the d locations are stored in the d (diagonal) 2202 submatrix, and any entries in the o locations are stored in the 2203 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2204 stored simply in the MATSEQBAIJ format for compressed row storage. 2205 2206 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2207 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2208 In general, for PDE problems in which most nonzeros are near the diagonal, 2209 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2210 or you will get TERRIBLE performance; see the users' manual chapter on 2211 matrices. 2212 2213 Level: intermediate 2214 2215 .keywords: matrix, block, aij, compressed row, sparse, parallel 2216 2217 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2218 @*/ 2219 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2220 { 2221 int ierr,(*f)(Mat,int,int,int*,int,int*); 2222 2223 PetscFunctionBegin; 2224 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2225 if (f) { 2226 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatCreateMPIBAIJ" 2233 /*@C 2234 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2235 (block compressed row). For good matrix assembly performance 2236 the user should preallocate the matrix storage by setting the parameters 2237 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2238 performance can be increased by more than a factor of 50. 2239 2240 Collective on MPI_Comm 2241 2242 Input Parameters: 2243 + comm - MPI communicator 2244 . bs - size of blockk 2245 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2246 This value should be the same as the local size used in creating the 2247 y vector for the matrix-vector product y = Ax. 2248 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2249 This value should be the same as the local size used in creating the 2250 x vector for the matrix-vector product y = Ax. 2251 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2252 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2253 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2254 submatrix (same for all local rows) 2255 . d_nnz - array containing the number of nonzero blocks in the various block rows 2256 of the in diagonal portion of the local (possibly different for each block 2257 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2258 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2259 submatrix (same for all local rows). 2260 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2261 off-diagonal portion of the local submatrix (possibly different for 2262 each block row) or PETSC_NULL. 2263 2264 Output Parameter: 2265 . A - the matrix 2266 2267 Options Database Keys: 2268 . -mat_no_unroll - uses code that does not unroll the loops in the 2269 block calculations (much slower) 2270 . -mat_block_size - size of the blocks to use 2271 2272 Notes: 2273 A nonzero block is any block that as 1 or more nonzeros in it 2274 2275 The user MUST specify either the local or global matrix dimensions 2276 (possibly both). 2277 2278 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2279 than it must be used on all processors that share the object for that argument. 2280 2281 Storage Information: 2282 For a square global matrix we define each processor's diagonal portion 2283 to be its local rows and the corresponding columns (a square submatrix); 2284 each processor's off-diagonal portion encompasses the remainder of the 2285 local matrix (a rectangular submatrix). 2286 2287 The user can specify preallocated storage for the diagonal part of 2288 the local submatrix with either d_nz or d_nnz (not both). Set 2289 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2290 memory allocation. Likewise, specify preallocated storage for the 2291 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2292 2293 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2294 the figure below we depict these three local rows and all columns (0-11). 2295 2296 .vb 2297 0 1 2 3 4 5 6 7 8 9 10 11 2298 ------------------- 2299 row 3 | o o o d d d o o o o o o 2300 row 4 | o o o d d d o o o o o o 2301 row 5 | o o o d d d o o o o o o 2302 ------------------- 2303 .ve 2304 2305 Thus, any entries in the d locations are stored in the d (diagonal) 2306 submatrix, and any entries in the o locations are stored in the 2307 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2308 stored simply in the MATSEQBAIJ format for compressed row storage. 2309 2310 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2311 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2312 In general, for PDE problems in which most nonzeros are near the diagonal, 2313 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2314 or you will get TERRIBLE performance; see the users' manual chapter on 2315 matrices. 2316 2317 Level: intermediate 2318 2319 .keywords: matrix, block, aij, compressed row, sparse, parallel 2320 2321 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2322 @*/ 2323 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2324 { 2325 int ierr,size; 2326 2327 PetscFunctionBegin; 2328 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2329 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2330 if (size > 1) { 2331 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2332 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2333 } else { 2334 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2335 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2336 } 2337 PetscFunctionReturn(0); 2338 } 2339 2340 #undef __FUNCT__ 2341 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2342 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2343 { 2344 Mat mat; 2345 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2346 int ierr,len=0; 2347 2348 PetscFunctionBegin; 2349 *newmat = 0; 2350 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2351 ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr); 2352 mat->preallocated = PETSC_TRUE; 2353 mat->assembled = PETSC_TRUE; 2354 a = (Mat_MPIBAIJ*)mat->data; 2355 a->bs = oldmat->bs; 2356 a->bs2 = oldmat->bs2; 2357 a->mbs = oldmat->mbs; 2358 a->nbs = oldmat->nbs; 2359 a->Mbs = oldmat->Mbs; 2360 a->Nbs = oldmat->Nbs; 2361 2362 a->rstart = oldmat->rstart; 2363 a->rend = oldmat->rend; 2364 a->cstart = oldmat->cstart; 2365 a->cend = oldmat->cend; 2366 a->size = oldmat->size; 2367 a->rank = oldmat->rank; 2368 a->donotstash = oldmat->donotstash; 2369 a->roworiented = oldmat->roworiented; 2370 a->rowindices = 0; 2371 a->rowvalues = 0; 2372 a->getrowactive = PETSC_FALSE; 2373 a->barray = 0; 2374 a->rstart_bs = oldmat->rstart_bs; 2375 a->rend_bs = oldmat->rend_bs; 2376 a->cstart_bs = oldmat->cstart_bs; 2377 a->cend_bs = oldmat->cend_bs; 2378 2379 /* hash table stuff */ 2380 a->ht = 0; 2381 a->hd = 0; 2382 a->ht_size = 0; 2383 a->ht_flag = oldmat->ht_flag; 2384 a->ht_fact = oldmat->ht_fact; 2385 a->ht_total_ct = 0; 2386 a->ht_insert_ct = 0; 2387 2388 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2389 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2390 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2391 if (oldmat->colmap) { 2392 #if defined (PETSC_USE_CTABLE) 2393 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2394 #else 2395 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2396 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2397 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2398 #endif 2399 } else a->colmap = 0; 2400 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2401 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2402 PetscLogObjectMemory(mat,len*sizeof(int)); 2403 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2404 } else a->garray = 0; 2405 2406 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2407 PetscLogObjectParent(mat,a->lvec); 2408 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2409 2410 PetscLogObjectParent(mat,a->Mvctx); 2411 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2412 PetscLogObjectParent(mat,a->A); 2413 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2414 PetscLogObjectParent(mat,a->B); 2415 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2416 *newmat = mat; 2417 PetscFunctionReturn(0); 2418 } 2419 2420 #include "petscsys.h" 2421 2422 EXTERN_C_BEGIN 2423 #undef __FUNCT__ 2424 #define __FUNCT__ "MatLoad_MPIBAIJ" 2425 int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2426 { 2427 Mat A; 2428 int i,nz,ierr,j,rstart,rend,fd; 2429 PetscScalar *vals,*buf; 2430 MPI_Comm comm = ((PetscObject)viewer)->comm; 2431 MPI_Status status; 2432 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2433 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2434 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2435 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2436 int dcount,kmax,k,nzcount,tmp; 2437 #if defined(PETSC_HAVE_DSCPACK) 2438 PetscTruth flag; 2439 #endif 2440 2441 PetscFunctionBegin; 2442 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2443 2444 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2445 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2446 if (!rank) { 2447 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2448 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2449 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2450 if (header[3] < 0) { 2451 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2452 } 2453 } 2454 2455 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2456 M = header[1]; N = header[2]; 2457 2458 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2459 2460 /* 2461 This code adds extra rows to make sure the number of rows is 2462 divisible by the blocksize 2463 */ 2464 Mbs = M/bs; 2465 extra_rows = bs - M + bs*(Mbs); 2466 if (extra_rows == bs) extra_rows = 0; 2467 else Mbs++; 2468 if (extra_rows &&!rank) { 2469 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2470 } 2471 2472 /* determine ownership of all rows */ 2473 mbs = Mbs/size + ((Mbs % size) > rank); 2474 m = mbs*bs; 2475 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2476 browners = rowners + size + 1; 2477 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2478 rowners[0] = 0; 2479 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2480 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2481 rstart = rowners[rank]; 2482 rend = rowners[rank+1]; 2483 2484 /* distribute row lengths to all processors */ 2485 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2486 if (!rank) { 2487 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2488 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2489 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2490 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2491 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2492 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2493 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2494 } else { 2495 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2496 } 2497 2498 if (!rank) { 2499 /* calculate the number of nonzeros on each processor */ 2500 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2501 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2502 for (i=0; i<size; i++) { 2503 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2504 procsnz[i] += rowlengths[j]; 2505 } 2506 } 2507 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2508 2509 /* determine max buffer needed and allocate it */ 2510 maxnz = 0; 2511 for (i=0; i<size; i++) { 2512 maxnz = PetscMax(maxnz,procsnz[i]); 2513 } 2514 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2515 2516 /* read in my part of the matrix column indices */ 2517 nz = procsnz[0]; 2518 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2519 mycols = ibuf; 2520 if (size == 1) nz -= extra_rows; 2521 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2522 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2523 2524 /* read in every ones (except the last) and ship off */ 2525 for (i=1; i<size-1; i++) { 2526 nz = procsnz[i]; 2527 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2528 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2529 } 2530 /* read in the stuff for the last proc */ 2531 if (size != 1) { 2532 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2533 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2534 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2535 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2536 } 2537 ierr = PetscFree(cols);CHKERRQ(ierr); 2538 } else { 2539 /* determine buffer space needed for message */ 2540 nz = 0; 2541 for (i=0; i<m; i++) { 2542 nz += locrowlens[i]; 2543 } 2544 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2545 mycols = ibuf; 2546 /* receive message of column indices*/ 2547 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2548 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2549 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2550 } 2551 2552 /* loop over local rows, determining number of off diagonal entries */ 2553 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2554 odlens = dlens + (rend-rstart); 2555 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2556 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2557 masked1 = mask + Mbs; 2558 masked2 = masked1 + Mbs; 2559 rowcount = 0; nzcount = 0; 2560 for (i=0; i<mbs; i++) { 2561 dcount = 0; 2562 odcount = 0; 2563 for (j=0; j<bs; j++) { 2564 kmax = locrowlens[rowcount]; 2565 for (k=0; k<kmax; k++) { 2566 tmp = mycols[nzcount++]/bs; 2567 if (!mask[tmp]) { 2568 mask[tmp] = 1; 2569 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2570 else masked1[dcount++] = tmp; 2571 } 2572 } 2573 rowcount++; 2574 } 2575 2576 dlens[i] = dcount; 2577 odlens[i] = odcount; 2578 2579 /* zero out the mask elements we set */ 2580 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2581 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2582 } 2583 2584 /* create our matrix */ 2585 ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2586 A = *newmat; 2587 MatSetOption(A,MAT_COLUMNS_SORTED); 2588 2589 if (!rank) { 2590 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2591 /* read in my part of the matrix numerical values */ 2592 nz = procsnz[0]; 2593 vals = buf; 2594 mycols = ibuf; 2595 if (size == 1) nz -= extra_rows; 2596 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2597 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2598 2599 /* insert into matrix */ 2600 jj = rstart*bs; 2601 for (i=0; i<m; i++) { 2602 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2603 mycols += locrowlens[i]; 2604 vals += locrowlens[i]; 2605 jj++; 2606 } 2607 /* read in other processors (except the last one) and ship out */ 2608 for (i=1; i<size-1; i++) { 2609 nz = procsnz[i]; 2610 vals = buf; 2611 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2612 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2613 } 2614 /* the last proc */ 2615 if (size != 1){ 2616 nz = procsnz[i] - extra_rows; 2617 vals = buf; 2618 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2619 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2620 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2621 } 2622 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2623 } else { 2624 /* receive numeric values */ 2625 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2626 2627 /* receive message of values*/ 2628 vals = buf; 2629 mycols = ibuf; 2630 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2631 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2632 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2633 2634 /* insert into matrix */ 2635 jj = rstart*bs; 2636 for (i=0; i<m; i++) { 2637 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2638 mycols += locrowlens[i]; 2639 vals += locrowlens[i]; 2640 jj++; 2641 } 2642 } 2643 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2644 ierr = PetscFree(buf);CHKERRQ(ierr); 2645 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2646 ierr = PetscFree(rowners);CHKERRQ(ierr); 2647 ierr = PetscFree(dlens);CHKERRQ(ierr); 2648 ierr = PetscFree(mask);CHKERRQ(ierr); 2649 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2650 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2651 #if defined(PETSC_HAVE_DSCPACK) 2652 ierr = PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 2653 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 2654 #endif 2655 PetscFunctionReturn(0); 2656 } 2657 EXTERN_C_END 2658 2659 #undef __FUNCT__ 2660 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2661 /*@ 2662 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2663 2664 Input Parameters: 2665 . mat - the matrix 2666 . fact - factor 2667 2668 Collective on Mat 2669 2670 Level: advanced 2671 2672 Notes: 2673 This can also be set by the command line option: -mat_use_hash_table fact 2674 2675 .keywords: matrix, hashtable, factor, HT 2676 2677 .seealso: MatSetOption() 2678 @*/ 2679 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2680 { 2681 int ierr,(*f)(Mat,PetscReal); 2682 2683 PetscFunctionBegin; 2684 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2685 if (f) { 2686 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2687 } 2688 PetscFunctionReturn(0); 2689 } 2690 2691 #undef __FUNCT__ 2692 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2693 int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2694 { 2695 Mat_MPIBAIJ *baij; 2696 2697 PetscFunctionBegin; 2698 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2699 baij = (Mat_MPIBAIJ*)mat->data; 2700 baij->ht_fact = fact; 2701 PetscFunctionReturn(0); 2702 } 2703 2704 #undef __FUNCT__ 2705 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2706 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2707 { 2708 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2709 PetscFunctionBegin; 2710 *Ad = a->A; 2711 *Ao = a->B; 2712 *colmap = a->garray; 2713 PetscFunctionReturn(0); 2714 } 2715