1 /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN int DisAssemble_MPIBAIJ(Mat); 8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int); 9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []); 11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode); 12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 15 EXTERN int MatPrintHelp_SeqBAIJ(Mat); 16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 17 18 /* UGLY, ugly, ugly 19 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 20 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 21 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 22 converts the entries into single precision and then calls ..._MatScalar() to put them 23 into the single precision data structures. 24 */ 25 #if defined(PETSC_USE_MAT_SINGLE) 26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 31 #else 32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 33 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 35 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 37 #endif 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v) 42 { 43 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 44 int ierr,i; 45 PetscScalar *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<A->m; i++){ 58 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 71 int MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 int ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 86 int MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 int ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNCT__ 105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 106 static int CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 int nbs = B->nbs,i,bs=B->bs,ierr; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 120 PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (int*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (int*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatSetValues_MPIBAIJ" 285 int MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 int ierr,i,N = m*n; 289 MatScalar *vsingle; 290 291 PetscFunctionBegin; 292 if (N > b->setvalueslen) { 293 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 294 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 295 b->setvalueslen = N; 296 } 297 vsingle = b->setvaluescopy; 298 299 for (i=0; i<N; i++) { 300 vsingle[i] = v[i]; 301 } 302 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 309 { 310 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 311 int ierr,i,N = m*n*b->bs2; 312 MatScalar *vsingle; 313 314 PetscFunctionBegin; 315 if (N > b->setvalueslen) { 316 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 317 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 318 b->setvalueslen = N; 319 } 320 vsingle = b->setvaluescopy; 321 for (i=0; i<N; i++) { 322 vsingle[i] = v[i]; 323 } 324 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 331 { 332 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 333 int ierr,i,N = m*n; 334 MatScalar *vsingle; 335 336 PetscFunctionBegin; 337 if (N > b->setvalueslen) { 338 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 339 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 340 b->setvalueslen = N; 341 } 342 vsingle = b->setvaluescopy; 343 for (i=0; i<N; i++) { 344 vsingle[i] = v[i]; 345 } 346 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 353 { 354 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 355 int ierr,i,N = m*n*b->bs2; 356 MatScalar *vsingle; 357 358 PetscFunctionBegin; 359 if (N > b->setvalueslen) { 360 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 361 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 362 b->setvalueslen = N; 363 } 364 vsingle = b->setvaluescopy; 365 for (i=0; i<N; i++) { 366 vsingle[i] = v[i]; 367 } 368 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 #endif 372 373 #undef __FUNCT__ 374 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 376 { 377 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 378 MatScalar value; 379 PetscTruth roworiented = baij->roworiented; 380 int ierr,i,j,row,col; 381 int rstart_orig=baij->rstart_bs; 382 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 383 int cend_orig=baij->cend_bs,bs=baij->bs; 384 385 /* Some Variables required in the macro */ 386 Mat A = baij->A; 387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 388 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 389 MatScalar *aa=a->a; 390 391 Mat B = baij->B; 392 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 393 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 394 MatScalar *ba=b->a; 395 396 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 397 int low,high,t,ridx,cidx,bs2=a->bs2; 398 MatScalar *ap,*bap; 399 400 PetscFunctionBegin; 401 for (i=0; i<m; i++) { 402 if (im[i] < 0) continue; 403 #if defined(PETSC_USE_BOPT_g) 404 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 405 #endif 406 if (im[i] >= rstart_orig && im[i] < rend_orig) { 407 row = im[i] - rstart_orig; 408 for (j=0; j<n; j++) { 409 if (in[j] >= cstart_orig && in[j] < cend_orig){ 410 col = in[j] - cstart_orig; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 413 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 414 } else if (in[j] < 0) continue; 415 #if defined(PETSC_USE_BOPT_g) 416 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);} 417 #endif 418 else { 419 if (mat->was_assembled) { 420 if (!baij->colmap) { 421 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 422 } 423 #if defined (PETSC_USE_CTABLE) 424 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 425 col = col - 1; 426 #else 427 col = baij->colmap[in[j]/bs] - 1; 428 #endif 429 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 430 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 431 col = in[j]; 432 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 433 B = baij->B; 434 b = (Mat_SeqBAIJ*)(B)->data; 435 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 436 ba=b->a; 437 } else col += in[j]%bs; 438 } else col = in[j]; 439 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 440 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 441 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 442 } 443 } 444 } else { 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 460 { 461 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 462 const MatScalar *value; 463 MatScalar *barray=baij->barray; 464 PetscTruth roworiented = baij->roworiented; 465 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 466 int rend=baij->rend,cstart=baij->cstart,stepval; 467 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 468 469 PetscFunctionBegin; 470 if(!barray) { 471 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 472 baij->barray = barray; 473 } 474 475 if (roworiented) { 476 stepval = (n-1)*bs; 477 } else { 478 stepval = (m-1)*bs; 479 } 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_BOPT_g) 483 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 for (j=0; j<n; j++) { 488 /* If NumCol = 1 then a copy is not required */ 489 if ((roworiented) && (n == 1)) { 490 barray = (MatScalar*)v + i*bs2; 491 } else if((!roworiented) && (m == 1)) { 492 barray = (MatScalar*)v + j*bs2; 493 } else { /* Here a copy is required */ 494 if (roworiented) { 495 value = v + i*(stepval+bs)*bs + j*bs; 496 } else { 497 value = v + j*(stepval+bs)*bs + i*bs; 498 } 499 for (ii=0; ii<bs; ii++,value+=stepval) { 500 for (jj=0; jj<bs; jj++) { 501 *barray++ = *value++; 502 } 503 } 504 barray -=bs2; 505 } 506 507 if (in[j] >= cstart && in[j] < cend){ 508 col = in[j] - cstart; 509 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 510 } 511 else if (in[j] < 0) continue; 512 #if defined(PETSC_USE_BOPT_g) 513 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 514 #endif 515 else { 516 if (mat->was_assembled) { 517 if (!baij->colmap) { 518 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 519 } 520 521 #if defined(PETSC_USE_BOPT_g) 522 #if defined (PETSC_USE_CTABLE) 523 { int data; 524 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 525 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 526 } 527 #else 528 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 529 #endif 530 #endif 531 #if defined (PETSC_USE_CTABLE) 532 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 533 col = (col - 1)/bs; 534 #else 535 col = (baij->colmap[in[j]] - 1)/bs; 536 #endif 537 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 538 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 539 col = in[j]; 540 } 541 } 542 else col = in[j]; 543 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 544 } 545 } 546 } else { 547 if (!baij->donotstash) { 548 if (roworiented) { 549 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 550 } else { 551 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 552 } 553 } 554 } 555 } 556 PetscFunctionReturn(0); 557 } 558 559 #define HASH_KEY 0.6180339887 560 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 561 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 562 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 565 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 566 { 567 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 568 PetscTruth roworiented = baij->roworiented; 569 int ierr,i,j,row,col; 570 int rstart_orig=baij->rstart_bs; 571 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 572 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 573 PetscReal tmp; 574 MatScalar **HD = baij->hd,value; 575 #if defined(PETSC_USE_BOPT_g) 576 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 577 #endif 578 579 PetscFunctionBegin; 580 581 for (i=0; i<m; i++) { 582 #if defined(PETSC_USE_BOPT_g) 583 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 584 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 585 #endif 586 row = im[i]; 587 if (row >= rstart_orig && row < rend_orig) { 588 for (j=0; j<n; j++) { 589 col = in[j]; 590 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 591 /* Look up into the Hash Table */ 592 key = (row/bs)*Nbs+(col/bs)+1; 593 h1 = HASH(size,key,tmp); 594 595 596 idx = h1; 597 #if defined(PETSC_USE_BOPT_g) 598 insert_ct++; 599 total_ct++; 600 if (HT[idx] != key) { 601 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 602 if (idx == size) { 603 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 604 if (idx == h1) { 605 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 606 } 607 } 608 } 609 #else 610 if (HT[idx] != key) { 611 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 612 if (idx == size) { 613 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 614 if (idx == h1) { 615 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 616 } 617 } 618 } 619 #endif 620 /* A HASH table entry is found, so insert the values at the correct address */ 621 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 622 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 623 } 624 } else { 625 if (!baij->donotstash) { 626 if (roworiented) { 627 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 628 } else { 629 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 630 } 631 } 632 } 633 } 634 #if defined(PETSC_USE_BOPT_g) 635 baij->ht_total_ct = total_ct; 636 baij->ht_insert_ct = insert_ct; 637 #endif 638 PetscFunctionReturn(0); 639 } 640 641 #undef __FUNCT__ 642 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 643 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 644 { 645 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 646 PetscTruth roworiented = baij->roworiented; 647 int ierr,i,j,ii,jj,row,col; 648 int rstart=baij->rstart ; 649 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 650 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 651 PetscReal tmp; 652 MatScalar **HD = baij->hd,*baij_a; 653 const MatScalar *v_t,*value; 654 #if defined(PETSC_USE_BOPT_g) 655 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 656 #endif 657 658 PetscFunctionBegin; 659 660 if (roworiented) { 661 stepval = (n-1)*bs; 662 } else { 663 stepval = (m-1)*bs; 664 } 665 for (i=0; i<m; i++) { 666 #if defined(PETSC_USE_BOPT_g) 667 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]); 668 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1); 669 #endif 670 row = im[i]; 671 v_t = v + i*bs2; 672 if (row >= rstart && row < rend) { 673 for (j=0; j<n; j++) { 674 col = in[j]; 675 676 /* Look up into the Hash Table */ 677 key = row*Nbs+col+1; 678 h1 = HASH(size,key,tmp); 679 680 idx = h1; 681 #if defined(PETSC_USE_BOPT_g) 682 total_ct++; 683 insert_ct++; 684 if (HT[idx] != key) { 685 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 686 if (idx == size) { 687 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 688 if (idx == h1) { 689 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 690 } 691 } 692 } 693 #else 694 if (HT[idx] != key) { 695 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 696 if (idx == size) { 697 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 698 if (idx == h1) { 699 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 700 } 701 } 702 } 703 #endif 704 baij_a = HD[idx]; 705 if (roworiented) { 706 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 707 /* value = v + (i*(stepval+bs)+j)*bs; */ 708 value = v_t; 709 v_t += bs; 710 if (addv == ADD_VALUES) { 711 for (ii=0; ii<bs; ii++,value+=stepval) { 712 for (jj=ii; jj<bs2; jj+=bs) { 713 baij_a[jj] += *value++; 714 } 715 } 716 } else { 717 for (ii=0; ii<bs; ii++,value+=stepval) { 718 for (jj=ii; jj<bs2; jj+=bs) { 719 baij_a[jj] = *value++; 720 } 721 } 722 } 723 } else { 724 value = v + j*(stepval+bs)*bs + i*bs; 725 if (addv == ADD_VALUES) { 726 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 727 for (jj=0; jj<bs; jj++) { 728 baij_a[jj] += *value++; 729 } 730 } 731 } else { 732 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 733 for (jj=0; jj<bs; jj++) { 734 baij_a[jj] = *value++; 735 } 736 } 737 } 738 } 739 } 740 } else { 741 if (!baij->donotstash) { 742 if (roworiented) { 743 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 744 } else { 745 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 746 } 747 } 748 } 749 } 750 #if defined(PETSC_USE_BOPT_g) 751 baij->ht_total_ct = total_ct; 752 baij->ht_insert_ct = insert_ct; 753 #endif 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatGetValues_MPIBAIJ" 759 int MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 760 { 761 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 762 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 763 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 764 765 PetscFunctionBegin; 766 for (i=0; i<m; i++) { 767 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 768 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 769 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 770 row = idxm[i] - bsrstart; 771 for (j=0; j<n; j++) { 772 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 773 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 774 if (idxn[j] >= bscstart && idxn[j] < bscend){ 775 col = idxn[j] - bscstart; 776 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 777 } else { 778 if (!baij->colmap) { 779 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 780 } 781 #if defined (PETSC_USE_CTABLE) 782 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 783 data --; 784 #else 785 data = baij->colmap[idxn[j]/bs]-1; 786 #endif 787 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 788 else { 789 col = data + idxn[j]%bs; 790 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 791 } 792 } 793 } 794 } else { 795 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 796 } 797 } 798 PetscFunctionReturn(0); 799 } 800 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatNorm_MPIBAIJ" 803 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 804 { 805 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 806 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 807 int ierr,i,bs2=baij->bs2; 808 PetscReal sum = 0.0; 809 MatScalar *v; 810 811 PetscFunctionBegin; 812 if (baij->size == 1) { 813 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 814 } else { 815 if (type == NORM_FROBENIUS) { 816 v = amat->a; 817 for (i=0; i<amat->nz*bs2; i++) { 818 #if defined(PETSC_USE_COMPLEX) 819 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 820 #else 821 sum += (*v)*(*v); v++; 822 #endif 823 } 824 v = bmat->a; 825 for (i=0; i<bmat->nz*bs2; i++) { 826 #if defined(PETSC_USE_COMPLEX) 827 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 828 #else 829 sum += (*v)*(*v); v++; 830 #endif 831 } 832 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 833 *nrm = sqrt(*nrm); 834 } else { 835 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 836 } 837 } 838 PetscFunctionReturn(0); 839 } 840 841 842 /* 843 Creates the hash table, and sets the table 844 This table is created only once. 845 If new entried need to be added to the matrix 846 then the hash table has to be destroyed and 847 recreated. 848 */ 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 851 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 852 { 853 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 854 Mat A = baij->A,B=baij->B; 855 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 856 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 857 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 858 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 859 int *HT,key; 860 MatScalar **HD; 861 PetscReal tmp; 862 #if defined(PETSC_USE_BOPT_g) 863 int ct=0,max=0; 864 #endif 865 866 PetscFunctionBegin; 867 baij->ht_size=(int)(factor*nz); 868 size = baij->ht_size; 869 870 if (baij->ht) { 871 PetscFunctionReturn(0); 872 } 873 874 /* Allocate Memory for Hash Table */ 875 ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 876 baij->ht = (int*)(baij->hd + size); 877 HD = baij->hd; 878 HT = baij->ht; 879 880 881 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr); 882 883 884 /* Loop Over A */ 885 for (i=0; i<a->mbs; i++) { 886 for (j=ai[i]; j<ai[i+1]; j++) { 887 row = i+rstart; 888 col = aj[j]+cstart; 889 890 key = row*Nbs + col + 1; 891 h1 = HASH(size,key,tmp); 892 for (k=0; k<size; k++){ 893 if (HT[(h1+k)%size] == 0.0) { 894 HT[(h1+k)%size] = key; 895 HD[(h1+k)%size] = a->a + j*bs2; 896 break; 897 #if defined(PETSC_USE_BOPT_g) 898 } else { 899 ct++; 900 #endif 901 } 902 } 903 #if defined(PETSC_USE_BOPT_g) 904 if (k> max) max = k; 905 #endif 906 } 907 } 908 /* Loop Over B */ 909 for (i=0; i<b->mbs; i++) { 910 for (j=bi[i]; j<bi[i+1]; j++) { 911 row = i+rstart; 912 col = garray[bj[j]]; 913 key = row*Nbs + col + 1; 914 h1 = HASH(size,key,tmp); 915 for (k=0; k<size; k++){ 916 if (HT[(h1+k)%size] == 0.0) { 917 HT[(h1+k)%size] = key; 918 HD[(h1+k)%size] = b->a + j*bs2; 919 break; 920 #if defined(PETSC_USE_BOPT_g) 921 } else { 922 ct++; 923 #endif 924 } 925 } 926 #if defined(PETSC_USE_BOPT_g) 927 if (k> max) max = k; 928 #endif 929 } 930 } 931 932 /* Print Summary */ 933 #if defined(PETSC_USE_BOPT_g) 934 for (i=0,j=0; i<size; i++) { 935 if (HT[i]) {j++;} 936 } 937 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 938 #endif 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 944 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 945 { 946 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 947 int ierr,nstash,reallocs; 948 InsertMode addv; 949 950 PetscFunctionBegin; 951 if (baij->donotstash) { 952 PetscFunctionReturn(0); 953 } 954 955 /* make sure all processors are either in INSERTMODE or ADDMODE */ 956 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 957 if (addv == (ADD_VALUES|INSERT_VALUES)) { 958 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 959 } 960 mat->insertmode = addv; /* in case this processor had no cache */ 961 962 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 963 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 964 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 965 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 966 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 967 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 973 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 974 { 975 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 976 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 977 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 978 int *row,*col,other_disassembled; 979 PetscTruth r1,r2,r3; 980 MatScalar *val; 981 InsertMode addv = mat->insertmode; 982 983 PetscFunctionBegin; 984 if (!baij->donotstash) { 985 while (1) { 986 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 987 if (!flg) break; 988 989 for (i=0; i<n;) { 990 /* Now identify the consecutive vals belonging to the same row */ 991 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 992 if (j < n) ncols = j-i; 993 else ncols = n-i; 994 /* Now assemble all these values with a single function call */ 995 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 996 i = j; 997 } 998 } 999 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1000 /* Now process the block-stash. Since the values are stashed column-oriented, 1001 set the roworiented flag to column oriented, and after MatSetValues() 1002 restore the original flags */ 1003 r1 = baij->roworiented; 1004 r2 = a->roworiented; 1005 r3 = b->roworiented; 1006 baij->roworiented = PETSC_FALSE; 1007 a->roworiented = PETSC_FALSE; 1008 b->roworiented = PETSC_FALSE; 1009 while (1) { 1010 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1011 if (!flg) break; 1012 1013 for (i=0; i<n;) { 1014 /* Now identify the consecutive vals belonging to the same row */ 1015 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1016 if (j < n) ncols = j-i; 1017 else ncols = n-i; 1018 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1019 i = j; 1020 } 1021 } 1022 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1023 baij->roworiented = r1; 1024 a->roworiented = r2; 1025 b->roworiented = r3; 1026 } 1027 1028 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1029 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1030 1031 /* determine if any processor has disassembled, if so we must 1032 also disassemble ourselfs, in order that we may reassemble. */ 1033 /* 1034 if nonzero structure of submatrix B cannot change then we know that 1035 no processor disassembled thus we can skip this stuff 1036 */ 1037 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1038 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1039 if (mat->was_assembled && !other_disassembled) { 1040 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1041 } 1042 } 1043 1044 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1045 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1046 } 1047 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1048 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1049 1050 #if defined(PETSC_USE_BOPT_g) 1051 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1052 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1053 baij->ht_total_ct = 0; 1054 baij->ht_insert_ct = 0; 1055 } 1056 #endif 1057 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1058 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1059 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1060 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1061 } 1062 1063 if (baij->rowvalues) { 1064 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1065 baij->rowvalues = 0; 1066 } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1072 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1073 { 1074 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1075 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 1076 PetscTruth isascii,isdraw; 1077 PetscViewer sviewer; 1078 PetscViewerFormat format; 1079 1080 PetscFunctionBegin; 1081 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1082 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1083 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1084 if (isascii) { 1085 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1086 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1087 MatInfo info; 1088 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1089 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1090 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1091 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1092 baij->bs,(int)info.memory);CHKERRQ(ierr); 1093 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1094 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1095 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1096 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1097 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1098 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1101 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1104 PetscFunctionReturn(0); 1105 } 1106 } 1107 1108 if (isdraw) { 1109 PetscDraw draw; 1110 PetscTruth isnull; 1111 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1112 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1113 } 1114 1115 if (size == 1) { 1116 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1117 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1118 } else { 1119 /* assemble the entire matrix onto first processor. */ 1120 Mat A; 1121 Mat_SeqBAIJ *Aloc; 1122 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1123 MatScalar *a; 1124 1125 if (!rank) { 1126 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1127 } else { 1128 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1129 } 1130 PetscLogObjectParent(mat,A); 1131 1132 /* copy over the A part */ 1133 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1134 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1135 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1136 1137 for (i=0; i<mbs; i++) { 1138 rvals[0] = bs*(baij->rstart + i); 1139 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1140 for (j=ai[i]; j<ai[i+1]; j++) { 1141 col = (baij->cstart+aj[j])*bs; 1142 for (k=0; k<bs; k++) { 1143 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1144 col++; a += bs; 1145 } 1146 } 1147 } 1148 /* copy over the B part */ 1149 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1150 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1151 for (i=0; i<mbs; i++) { 1152 rvals[0] = bs*(baij->rstart + i); 1153 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1154 for (j=ai[i]; j<ai[i+1]; j++) { 1155 col = baij->garray[aj[j]]*bs; 1156 for (k=0; k<bs; k++) { 1157 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1158 col++; a += bs; 1159 } 1160 } 1161 } 1162 ierr = PetscFree(rvals);CHKERRQ(ierr); 1163 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1164 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1165 /* 1166 Everyone has to call to draw the matrix since the graphics waits are 1167 synchronized across all processors that share the PetscDraw object 1168 */ 1169 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1170 if (!rank) { 1171 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1172 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1173 } 1174 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1175 ierr = MatDestroy(A);CHKERRQ(ierr); 1176 } 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatView_MPIBAIJ" 1182 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1183 { 1184 int ierr; 1185 PetscTruth isascii,isdraw,issocket,isbinary; 1186 1187 PetscFunctionBegin; 1188 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1189 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1190 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1191 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1192 if (isascii || isdraw || issocket || isbinary) { 1193 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1194 } else { 1195 SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1196 } 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1202 int MatDestroy_MPIBAIJ(Mat mat) 1203 { 1204 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1205 int ierr; 1206 1207 PetscFunctionBegin; 1208 #if defined(PETSC_USE_LOG) 1209 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 1210 #endif 1211 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1212 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1213 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1214 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1215 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1216 #if defined (PETSC_USE_CTABLE) 1217 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1218 #else 1219 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1220 #endif 1221 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1222 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1223 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1224 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1225 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1226 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1227 #if defined(PETSC_USE_MAT_SINGLE) 1228 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1229 #endif 1230 ierr = PetscFree(baij);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatMult_MPIBAIJ" 1236 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1237 { 1238 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1239 int ierr,nt; 1240 1241 PetscFunctionBegin; 1242 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1243 if (nt != A->n) { 1244 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1245 } 1246 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1247 if (nt != A->m) { 1248 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1249 } 1250 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1251 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1252 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1253 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1254 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1260 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1261 { 1262 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1263 int ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1267 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1268 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1269 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1275 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1276 { 1277 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1278 int ierr; 1279 1280 PetscFunctionBegin; 1281 /* do nondiagonal part */ 1282 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1283 /* send it on its way */ 1284 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1285 /* do local part */ 1286 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1287 /* receive remote parts: note this assumes the values are not actually */ 1288 /* inserted in yy until the next line, which is true for my implementation*/ 1289 /* but is not perhaps always true. */ 1290 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNCT__ 1295 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1296 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1297 { 1298 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1299 int ierr; 1300 1301 PetscFunctionBegin; 1302 /* do nondiagonal part */ 1303 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1304 /* send it on its way */ 1305 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1306 /* do local part */ 1307 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1308 /* receive remote parts: note this assumes the values are not actually */ 1309 /* inserted in yy until the next line, which is true for my implementation*/ 1310 /* but is not perhaps always true. */ 1311 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1312 PetscFunctionReturn(0); 1313 } 1314 1315 /* 1316 This only works correctly for square matrices where the subblock A->A is the 1317 diagonal block 1318 */ 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1321 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1322 { 1323 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1324 int ierr; 1325 1326 PetscFunctionBegin; 1327 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1328 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatScale_MPIBAIJ" 1334 int MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1335 { 1336 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1337 int ierr; 1338 1339 PetscFunctionBegin; 1340 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1341 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1347 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1348 { 1349 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1350 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1351 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1352 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1353 int *cmap,*idx_p,cstart = mat->cstart; 1354 1355 PetscFunctionBegin; 1356 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1357 mat->getrowactive = PETSC_TRUE; 1358 1359 if (!mat->rowvalues && (idx || v)) { 1360 /* 1361 allocate enough space to hold information from the longest row. 1362 */ 1363 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1364 int max = 1,mbs = mat->mbs,tmp; 1365 for (i=0; i<mbs; i++) { 1366 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1367 if (max < tmp) { max = tmp; } 1368 } 1369 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1370 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1371 } 1372 1373 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1374 lrow = row - brstart; 1375 1376 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1377 if (!v) {pvA = 0; pvB = 0;} 1378 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1379 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1380 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1381 nztot = nzA + nzB; 1382 1383 cmap = mat->garray; 1384 if (v || idx) { 1385 if (nztot) { 1386 /* Sort by increasing column numbers, assuming A and B already sorted */ 1387 int imark = -1; 1388 if (v) { 1389 *v = v_p = mat->rowvalues; 1390 for (i=0; i<nzB; i++) { 1391 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1392 else break; 1393 } 1394 imark = i; 1395 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1396 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1397 } 1398 if (idx) { 1399 *idx = idx_p = mat->rowindices; 1400 if (imark > -1) { 1401 for (i=0; i<imark; i++) { 1402 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1403 } 1404 } else { 1405 for (i=0; i<nzB; i++) { 1406 if (cmap[cworkB[i]/bs] < cstart) 1407 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1408 else break; 1409 } 1410 imark = i; 1411 } 1412 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1413 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1414 } 1415 } else { 1416 if (idx) *idx = 0; 1417 if (v) *v = 0; 1418 } 1419 } 1420 *nz = nztot; 1421 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1422 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1428 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1429 { 1430 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1431 1432 PetscFunctionBegin; 1433 if (baij->getrowactive == PETSC_FALSE) { 1434 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1435 } 1436 baij->getrowactive = PETSC_FALSE; 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1442 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1443 { 1444 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1445 1446 PetscFunctionBegin; 1447 *bs = baij->bs; 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1453 int MatZeroEntries_MPIBAIJ(Mat A) 1454 { 1455 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1456 int ierr; 1457 1458 PetscFunctionBegin; 1459 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1460 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1466 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1467 { 1468 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1469 Mat A = a->A,B = a->B; 1470 int ierr; 1471 PetscReal isend[5],irecv[5]; 1472 1473 PetscFunctionBegin; 1474 info->block_size = (PetscReal)a->bs; 1475 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1476 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1477 isend[3] = info->memory; isend[4] = info->mallocs; 1478 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1479 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1480 isend[3] += info->memory; isend[4] += info->mallocs; 1481 if (flag == MAT_LOCAL) { 1482 info->nz_used = isend[0]; 1483 info->nz_allocated = isend[1]; 1484 info->nz_unneeded = isend[2]; 1485 info->memory = isend[3]; 1486 info->mallocs = isend[4]; 1487 } else if (flag == MAT_GLOBAL_MAX) { 1488 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1489 info->nz_used = irecv[0]; 1490 info->nz_allocated = irecv[1]; 1491 info->nz_unneeded = irecv[2]; 1492 info->memory = irecv[3]; 1493 info->mallocs = irecv[4]; 1494 } else if (flag == MAT_GLOBAL_SUM) { 1495 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1496 info->nz_used = irecv[0]; 1497 info->nz_allocated = irecv[1]; 1498 info->nz_unneeded = irecv[2]; 1499 info->memory = irecv[3]; 1500 info->mallocs = irecv[4]; 1501 } else { 1502 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1503 } 1504 info->rows_global = (PetscReal)A->M; 1505 info->columns_global = (PetscReal)A->N; 1506 info->rows_local = (PetscReal)A->m; 1507 info->columns_local = (PetscReal)A->N; 1508 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1509 info->fill_ratio_needed = 0; 1510 info->factor_mallocs = 0; 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1516 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1517 { 1518 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1519 int ierr; 1520 1521 PetscFunctionBegin; 1522 switch (op) { 1523 case MAT_NO_NEW_NONZERO_LOCATIONS: 1524 case MAT_YES_NEW_NONZERO_LOCATIONS: 1525 case MAT_COLUMNS_UNSORTED: 1526 case MAT_COLUMNS_SORTED: 1527 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1528 case MAT_KEEP_ZEROED_ROWS: 1529 case MAT_NEW_NONZERO_LOCATION_ERR: 1530 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1531 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1532 break; 1533 case MAT_ROW_ORIENTED: 1534 a->roworiented = PETSC_TRUE; 1535 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1536 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1537 break; 1538 case MAT_ROWS_SORTED: 1539 case MAT_ROWS_UNSORTED: 1540 case MAT_YES_NEW_DIAGONALS: 1541 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1542 break; 1543 case MAT_COLUMN_ORIENTED: 1544 a->roworiented = PETSC_FALSE; 1545 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1546 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1547 break; 1548 case MAT_IGNORE_OFF_PROC_ENTRIES: 1549 a->donotstash = PETSC_TRUE; 1550 break; 1551 case MAT_NO_NEW_DIAGONALS: 1552 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1553 case MAT_USE_HASH_TABLE: 1554 a->ht_flag = PETSC_TRUE; 1555 break; 1556 case MAT_SYMMETRIC: 1557 case MAT_STRUCTURALLY_SYMMETRIC: 1558 case MAT_NOT_SYMMETRIC: 1559 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1560 case MAT_HERMITIAN: 1561 case MAT_NOT_HERMITIAN: 1562 case MAT_SYMMETRY_ETERNAL: 1563 case MAT_NOT_SYMMETRY_ETERNAL: 1564 break; 1565 default: 1566 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1573 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1574 { 1575 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1576 Mat_SeqBAIJ *Aloc; 1577 Mat B; 1578 int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1579 int bs=baij->bs,mbs=baij->mbs; 1580 MatScalar *a; 1581 1582 PetscFunctionBegin; 1583 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1584 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1585 1586 /* copy over the A part */ 1587 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1588 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1589 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1590 1591 for (i=0; i<mbs; i++) { 1592 rvals[0] = bs*(baij->rstart + i); 1593 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1594 for (j=ai[i]; j<ai[i+1]; j++) { 1595 col = (baij->cstart+aj[j])*bs; 1596 for (k=0; k<bs; k++) { 1597 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1598 col++; a += bs; 1599 } 1600 } 1601 } 1602 /* copy over the B part */ 1603 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1604 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1605 for (i=0; i<mbs; i++) { 1606 rvals[0] = bs*(baij->rstart + i); 1607 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1608 for (j=ai[i]; j<ai[i+1]; j++) { 1609 col = baij->garray[aj[j]]*bs; 1610 for (k=0; k<bs; k++) { 1611 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1612 col++; a += bs; 1613 } 1614 } 1615 } 1616 ierr = PetscFree(rvals);CHKERRQ(ierr); 1617 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1618 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1619 1620 if (matout) { 1621 *matout = B; 1622 } else { 1623 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1630 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1631 { 1632 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1633 Mat a = baij->A,b = baij->B; 1634 int ierr,s1,s2,s3; 1635 1636 PetscFunctionBegin; 1637 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1638 if (rr) { 1639 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1640 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1641 /* Overlap communication with computation. */ 1642 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1643 } 1644 if (ll) { 1645 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1646 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1647 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1648 } 1649 /* scale the diagonal block */ 1650 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1651 1652 if (rr) { 1653 /* Do a scatter end and then right scale the off-diagonal block */ 1654 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1655 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1656 } 1657 1658 PetscFunctionReturn(0); 1659 } 1660 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1663 int MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1664 { 1665 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1666 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1667 int *nprocs,j,idx,nsends,row; 1668 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1669 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1670 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1671 MPI_Comm comm = A->comm; 1672 MPI_Request *send_waits,*recv_waits; 1673 MPI_Status recv_status,*send_status; 1674 IS istmp; 1675 PetscTruth found; 1676 1677 PetscFunctionBegin; 1678 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1679 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1680 1681 /* first count number of contributors to each processor */ 1682 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1683 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1684 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1685 for (i=0; i<N; i++) { 1686 idx = rows[i]; 1687 found = PETSC_FALSE; 1688 for (j=0; j<size; j++) { 1689 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1690 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 1691 } 1692 } 1693 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1694 } 1695 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1696 1697 /* inform other processors of number of messages and max length*/ 1698 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1699 1700 /* post receives: */ 1701 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1702 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1703 for (i=0; i<nrecvs; i++) { 1704 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1705 } 1706 1707 /* do sends: 1708 1) starts[i] gives the starting index in svalues for stuff going to 1709 the ith processor 1710 */ 1711 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1712 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1713 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1714 starts[0] = 0; 1715 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1716 for (i=0; i<N; i++) { 1717 svalues[starts[owner[i]]++] = rows[i]; 1718 } 1719 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1720 1721 starts[0] = 0; 1722 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1723 count = 0; 1724 for (i=0; i<size; i++) { 1725 if (nprocs[2*i+1]) { 1726 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1727 } 1728 } 1729 ierr = PetscFree(starts);CHKERRQ(ierr); 1730 1731 base = owners[rank]*bs; 1732 1733 /* wait on receives */ 1734 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1735 source = lens + nrecvs; 1736 count = nrecvs; slen = 0; 1737 while (count) { 1738 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1739 /* unpack receives into our local space */ 1740 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1741 source[imdex] = recv_status.MPI_SOURCE; 1742 lens[imdex] = n; 1743 slen += n; 1744 count--; 1745 } 1746 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1747 1748 /* move the data into the send scatter */ 1749 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1750 count = 0; 1751 for (i=0; i<nrecvs; i++) { 1752 values = rvalues + i*nmax; 1753 for (j=0; j<lens[i]; j++) { 1754 lrows[count++] = values[j] - base; 1755 } 1756 } 1757 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1758 ierr = PetscFree(lens);CHKERRQ(ierr); 1759 ierr = PetscFree(owner);CHKERRQ(ierr); 1760 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1761 1762 /* actually zap the local rows */ 1763 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1764 PetscLogObjectParent(A,istmp); 1765 1766 /* 1767 Zero the required rows. If the "diagonal block" of the matrix 1768 is square and the user wishes to set the diagonal we use seperate 1769 code so that MatSetValues() is not called for each diagonal allocating 1770 new memory, thus calling lots of mallocs and slowing things down. 1771 1772 Contributed by: Mathew Knepley 1773 */ 1774 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1775 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1776 if (diag && (l->A->M == l->A->N)) { 1777 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1778 } else if (diag) { 1779 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1780 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1781 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1782 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1783 } 1784 for (i=0; i<slen; i++) { 1785 row = lrows[i] + rstart_bs; 1786 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1787 } 1788 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1789 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1790 } else { 1791 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1792 } 1793 1794 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1795 ierr = PetscFree(lrows);CHKERRQ(ierr); 1796 1797 /* wait on sends */ 1798 if (nsends) { 1799 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1800 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1801 ierr = PetscFree(send_status);CHKERRQ(ierr); 1802 } 1803 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1804 ierr = PetscFree(svalues);CHKERRQ(ierr); 1805 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1811 int MatPrintHelp_MPIBAIJ(Mat A) 1812 { 1813 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1814 MPI_Comm comm = A->comm; 1815 static int called = 0; 1816 int ierr; 1817 1818 PetscFunctionBegin; 1819 if (!a->rank) { 1820 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1821 } 1822 if (called) {PetscFunctionReturn(0);} else called = 1; 1823 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1824 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1825 PetscFunctionReturn(0); 1826 } 1827 1828 #undef __FUNCT__ 1829 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1830 int MatSetUnfactored_MPIBAIJ(Mat A) 1831 { 1832 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1833 int ierr; 1834 1835 PetscFunctionBegin; 1836 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1837 PetscFunctionReturn(0); 1838 } 1839 1840 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1841 1842 #undef __FUNCT__ 1843 #define __FUNCT__ "MatEqual_MPIBAIJ" 1844 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1845 { 1846 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1847 Mat a,b,c,d; 1848 PetscTruth flg; 1849 int ierr; 1850 1851 PetscFunctionBegin; 1852 a = matA->A; b = matA->B; 1853 c = matB->A; d = matB->B; 1854 1855 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1856 if (flg == PETSC_TRUE) { 1857 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1858 } 1859 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1860 PetscFunctionReturn(0); 1861 } 1862 1863 1864 #undef __FUNCT__ 1865 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1866 int MatSetUpPreallocation_MPIBAIJ(Mat A) 1867 { 1868 int ierr; 1869 1870 PetscFunctionBegin; 1871 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1872 PetscFunctionReturn(0); 1873 } 1874 1875 /* -------------------------------------------------------------------*/ 1876 static struct _MatOps MatOps_Values = { 1877 MatSetValues_MPIBAIJ, 1878 MatGetRow_MPIBAIJ, 1879 MatRestoreRow_MPIBAIJ, 1880 MatMult_MPIBAIJ, 1881 /* 4*/ MatMultAdd_MPIBAIJ, 1882 MatMultTranspose_MPIBAIJ, 1883 MatMultTransposeAdd_MPIBAIJ, 1884 0, 1885 0, 1886 0, 1887 /*10*/ 0, 1888 0, 1889 0, 1890 0, 1891 MatTranspose_MPIBAIJ, 1892 /*15*/ MatGetInfo_MPIBAIJ, 1893 MatEqual_MPIBAIJ, 1894 MatGetDiagonal_MPIBAIJ, 1895 MatDiagonalScale_MPIBAIJ, 1896 MatNorm_MPIBAIJ, 1897 /*20*/ MatAssemblyBegin_MPIBAIJ, 1898 MatAssemblyEnd_MPIBAIJ, 1899 0, 1900 MatSetOption_MPIBAIJ, 1901 MatZeroEntries_MPIBAIJ, 1902 /*25*/ MatZeroRows_MPIBAIJ, 1903 0, 1904 0, 1905 0, 1906 0, 1907 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1908 0, 1909 0, 1910 0, 1911 0, 1912 /*35*/ MatDuplicate_MPIBAIJ, 1913 0, 1914 0, 1915 0, 1916 0, 1917 /*40*/ 0, 1918 MatGetSubMatrices_MPIBAIJ, 1919 MatIncreaseOverlap_MPIBAIJ, 1920 MatGetValues_MPIBAIJ, 1921 0, 1922 /*45*/ MatPrintHelp_MPIBAIJ, 1923 MatScale_MPIBAIJ, 1924 0, 1925 0, 1926 0, 1927 /*50*/ MatGetBlockSize_MPIBAIJ, 1928 0, 1929 0, 1930 0, 1931 0, 1932 /*55*/ 0, 1933 0, 1934 MatSetUnfactored_MPIBAIJ, 1935 0, 1936 MatSetValuesBlocked_MPIBAIJ, 1937 /*60*/ 0, 1938 MatDestroy_MPIBAIJ, 1939 MatView_MPIBAIJ, 1940 MatGetPetscMaps_Petsc, 1941 0, 1942 /*65*/ 0, 1943 0, 1944 0, 1945 0, 1946 0, 1947 /*70*/ MatGetRowMax_MPIBAIJ, 1948 0, 1949 0, 1950 0, 1951 0, 1952 /*75*/ 0, 1953 0, 1954 0, 1955 0, 1956 0, 1957 /*80*/ 0, 1958 0, 1959 0, 1960 0, 1961 0, 1962 /*85*/ MatLoad_MPIBAIJ 1963 }; 1964 1965 1966 EXTERN_C_BEGIN 1967 #undef __FUNCT__ 1968 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 1969 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1970 { 1971 PetscFunctionBegin; 1972 *a = ((Mat_MPIBAIJ *)A->data)->A; 1973 *iscopy = PETSC_FALSE; 1974 PetscFunctionReturn(0); 1975 } 1976 EXTERN_C_END 1977 1978 EXTERN_C_BEGIN 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 1981 int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1982 { 1983 Mat_MPIBAIJ *b; 1984 int ierr,i; 1985 1986 PetscFunctionBegin; 1987 B->preallocated = PETSC_TRUE; 1988 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1989 1990 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1991 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1992 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1993 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1994 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1995 if (d_nnz) { 1996 for (i=0; i<B->m/bs; i++) { 1997 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1998 } 1999 } 2000 if (o_nnz) { 2001 for (i=0; i<B->m/bs; i++) { 2002 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2003 } 2004 } 2005 2006 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2007 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2008 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2009 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2010 2011 b = (Mat_MPIBAIJ*)B->data; 2012 b->bs = bs; 2013 b->bs2 = bs*bs; 2014 b->mbs = B->m/bs; 2015 b->nbs = B->n/bs; 2016 b->Mbs = B->M/bs; 2017 b->Nbs = B->N/bs; 2018 2019 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2020 b->rowners[0] = 0; 2021 for (i=2; i<=b->size; i++) { 2022 b->rowners[i] += b->rowners[i-1]; 2023 } 2024 b->rstart = b->rowners[b->rank]; 2025 b->rend = b->rowners[b->rank+1]; 2026 2027 ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2028 b->cowners[0] = 0; 2029 for (i=2; i<=b->size; i++) { 2030 b->cowners[i] += b->cowners[i-1]; 2031 } 2032 b->cstart = b->cowners[b->rank]; 2033 b->cend = b->cowners[b->rank+1]; 2034 2035 for (i=0; i<=b->size; i++) { 2036 b->rowners_bs[i] = b->rowners[i]*bs; 2037 } 2038 b->rstart_bs = b->rstart*bs; 2039 b->rend_bs = b->rend*bs; 2040 b->cstart_bs = b->cstart*bs; 2041 b->cend_bs = b->cend*bs; 2042 2043 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2044 PetscLogObjectParent(B,b->A); 2045 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2046 PetscLogObjectParent(B,b->B); 2047 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2048 2049 PetscFunctionReturn(0); 2050 } 2051 EXTERN_C_END 2052 2053 EXTERN_C_BEGIN 2054 EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2055 EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2056 EXTERN_C_END 2057 2058 /*MC 2059 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2060 2061 Options Database Keys: 2062 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2063 2064 Level: beginner 2065 2066 .seealso: MatCreateMPIBAIJ 2067 M*/ 2068 2069 EXTERN_C_BEGIN 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatCreate_MPIBAIJ" 2072 int MatCreate_MPIBAIJ(Mat B) 2073 { 2074 Mat_MPIBAIJ *b; 2075 int ierr; 2076 PetscTruth flg; 2077 2078 PetscFunctionBegin; 2079 2080 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2081 B->data = (void*)b; 2082 2083 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2084 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2085 B->mapping = 0; 2086 B->factor = 0; 2087 B->assembled = PETSC_FALSE; 2088 2089 B->insertmode = NOT_SET_VALUES; 2090 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2091 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2092 2093 /* build local table of row and column ownerships */ 2094 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 2095 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2096 b->cowners = b->rowners + b->size + 2; 2097 b->rowners_bs = b->cowners + b->size + 2; 2098 2099 /* build cache for off array entries formed */ 2100 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2101 b->donotstash = PETSC_FALSE; 2102 b->colmap = PETSC_NULL; 2103 b->garray = PETSC_NULL; 2104 b->roworiented = PETSC_TRUE; 2105 2106 #if defined(PETSC_USE_MAT_SINGLE) 2107 /* stuff for MatSetValues_XXX in single precision */ 2108 b->setvalueslen = 0; 2109 b->setvaluescopy = PETSC_NULL; 2110 #endif 2111 2112 /* stuff used in block assembly */ 2113 b->barray = 0; 2114 2115 /* stuff used for matrix vector multiply */ 2116 b->lvec = 0; 2117 b->Mvctx = 0; 2118 2119 /* stuff for MatGetRow() */ 2120 b->rowindices = 0; 2121 b->rowvalues = 0; 2122 b->getrowactive = PETSC_FALSE; 2123 2124 /* hash table stuff */ 2125 b->ht = 0; 2126 b->hd = 0; 2127 b->ht_size = 0; 2128 b->ht_flag = PETSC_FALSE; 2129 b->ht_fact = 0; 2130 b->ht_total_ct = 0; 2131 b->ht_insert_ct = 0; 2132 2133 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2134 if (flg) { 2135 PetscReal fact = 1.39; 2136 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2137 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2138 if (fact <= 1.0) fact = 1.39; 2139 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2140 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2141 } 2142 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2143 "MatStoreValues_MPIBAIJ", 2144 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2145 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2146 "MatRetrieveValues_MPIBAIJ", 2147 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2148 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2149 "MatGetDiagonalBlock_MPIBAIJ", 2150 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2151 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2152 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2153 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2154 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2155 "MatDiagonalScaleLocal_MPIBAIJ", 2156 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2157 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2158 "MatSetHashTableFactor_MPIBAIJ", 2159 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2160 PetscFunctionReturn(0); 2161 } 2162 EXTERN_C_END 2163 2164 /*MC 2165 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2166 2167 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2168 and MATMPIBAIJ otherwise. 2169 2170 Options Database Keys: 2171 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2172 2173 Level: beginner 2174 2175 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ 2176 M*/ 2177 2178 EXTERN_C_BEGIN 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatCreate_BAIJ" 2181 int MatCreate_BAIJ(Mat A) { 2182 int ierr,size; 2183 2184 PetscFunctionBegin; 2185 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2186 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2187 if (size == 1) { 2188 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2189 } else { 2190 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2191 } 2192 PetscFunctionReturn(0); 2193 } 2194 EXTERN_C_END 2195 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2198 /*@C 2199 MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2200 (block compressed row). For good matrix assembly performance 2201 the user should preallocate the matrix storage by setting the parameters 2202 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2203 performance can be increased by more than a factor of 50. 2204 2205 Collective on Mat 2206 2207 Input Parameters: 2208 + A - the matrix 2209 . bs - size of blockk 2210 . d_nz - number of block nonzeros per block row in diagonal portion of local 2211 submatrix (same for all local rows) 2212 . d_nnz - array containing the number of block nonzeros in the various block rows 2213 of the in diagonal portion of the local (possibly different for each block 2214 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2215 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2216 submatrix (same for all local rows). 2217 - o_nnz - array containing the number of nonzeros in the various block rows of the 2218 off-diagonal portion of the local submatrix (possibly different for 2219 each block row) or PETSC_NULL. 2220 2221 Output Parameter: 2222 2223 2224 Options Database Keys: 2225 . -mat_no_unroll - uses code that does not unroll the loops in the 2226 block calculations (much slower) 2227 . -mat_block_size - size of the blocks to use 2228 2229 Notes: 2230 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2231 than it must be used on all processors that share the object for that argument. 2232 2233 Storage Information: 2234 For a square global matrix we define each processor's diagonal portion 2235 to be its local rows and the corresponding columns (a square submatrix); 2236 each processor's off-diagonal portion encompasses the remainder of the 2237 local matrix (a rectangular submatrix). 2238 2239 The user can specify preallocated storage for the diagonal part of 2240 the local submatrix with either d_nz or d_nnz (not both). Set 2241 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2242 memory allocation. Likewise, specify preallocated storage for the 2243 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2244 2245 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2246 the figure below we depict these three local rows and all columns (0-11). 2247 2248 .vb 2249 0 1 2 3 4 5 6 7 8 9 10 11 2250 ------------------- 2251 row 3 | o o o d d d o o o o o o 2252 row 4 | o o o d d d o o o o o o 2253 row 5 | o o o d d d o o o o o o 2254 ------------------- 2255 .ve 2256 2257 Thus, any entries in the d locations are stored in the d (diagonal) 2258 submatrix, and any entries in the o locations are stored in the 2259 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2260 stored simply in the MATSEQBAIJ format for compressed row storage. 2261 2262 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2263 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2264 In general, for PDE problems in which most nonzeros are near the diagonal, 2265 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2266 or you will get TERRIBLE performance; see the users' manual chapter on 2267 matrices. 2268 2269 Level: intermediate 2270 2271 .keywords: matrix, block, aij, compressed row, sparse, parallel 2272 2273 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2274 @*/ 2275 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2276 { 2277 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 2278 2279 PetscFunctionBegin; 2280 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2281 if (f) { 2282 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2283 } 2284 PetscFunctionReturn(0); 2285 } 2286 2287 #undef __FUNCT__ 2288 #define __FUNCT__ "MatCreateMPIBAIJ" 2289 /*@C 2290 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2291 (block compressed row). For good matrix assembly performance 2292 the user should preallocate the matrix storage by setting the parameters 2293 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2294 performance can be increased by more than a factor of 50. 2295 2296 Collective on MPI_Comm 2297 2298 Input Parameters: 2299 + comm - MPI communicator 2300 . bs - size of blockk 2301 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2302 This value should be the same as the local size used in creating the 2303 y vector for the matrix-vector product y = Ax. 2304 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2305 This value should be the same as the local size used in creating the 2306 x vector for the matrix-vector product y = Ax. 2307 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2308 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2309 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2310 submatrix (same for all local rows) 2311 . d_nnz - array containing the number of nonzero blocks in the various block rows 2312 of the in diagonal portion of the local (possibly different for each block 2313 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2314 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2315 submatrix (same for all local rows). 2316 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2317 off-diagonal portion of the local submatrix (possibly different for 2318 each block row) or PETSC_NULL. 2319 2320 Output Parameter: 2321 . A - the matrix 2322 2323 Options Database Keys: 2324 . -mat_no_unroll - uses code that does not unroll the loops in the 2325 block calculations (much slower) 2326 . -mat_block_size - size of the blocks to use 2327 2328 Notes: 2329 A nonzero block is any block that as 1 or more nonzeros in it 2330 2331 The user MUST specify either the local or global matrix dimensions 2332 (possibly both). 2333 2334 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2335 than it must be used on all processors that share the object for that argument. 2336 2337 Storage Information: 2338 For a square global matrix we define each processor's diagonal portion 2339 to be its local rows and the corresponding columns (a square submatrix); 2340 each processor's off-diagonal portion encompasses the remainder of the 2341 local matrix (a rectangular submatrix). 2342 2343 The user can specify preallocated storage for the diagonal part of 2344 the local submatrix with either d_nz or d_nnz (not both). Set 2345 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2346 memory allocation. Likewise, specify preallocated storage for the 2347 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2348 2349 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2350 the figure below we depict these three local rows and all columns (0-11). 2351 2352 .vb 2353 0 1 2 3 4 5 6 7 8 9 10 11 2354 ------------------- 2355 row 3 | o o o d d d o o o o o o 2356 row 4 | o o o d d d o o o o o o 2357 row 5 | o o o d d d o o o o o o 2358 ------------------- 2359 .ve 2360 2361 Thus, any entries in the d locations are stored in the d (diagonal) 2362 submatrix, and any entries in the o locations are stored in the 2363 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2364 stored simply in the MATSEQBAIJ format for compressed row storage. 2365 2366 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2367 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2368 In general, for PDE problems in which most nonzeros are near the diagonal, 2369 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2370 or you will get TERRIBLE performance; see the users' manual chapter on 2371 matrices. 2372 2373 Level: intermediate 2374 2375 .keywords: matrix, block, aij, compressed row, sparse, parallel 2376 2377 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2378 @*/ 2379 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2380 { 2381 int ierr,size; 2382 2383 PetscFunctionBegin; 2384 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2385 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2386 if (size > 1) { 2387 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2388 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2389 } else { 2390 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2391 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2392 } 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2398 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2399 { 2400 Mat mat; 2401 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2402 int ierr,len=0; 2403 2404 PetscFunctionBegin; 2405 *newmat = 0; 2406 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2407 ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr); 2408 mat->preallocated = PETSC_TRUE; 2409 mat->assembled = PETSC_TRUE; 2410 a = (Mat_MPIBAIJ*)mat->data; 2411 a->bs = oldmat->bs; 2412 a->bs2 = oldmat->bs2; 2413 a->mbs = oldmat->mbs; 2414 a->nbs = oldmat->nbs; 2415 a->Mbs = oldmat->Mbs; 2416 a->Nbs = oldmat->Nbs; 2417 2418 a->rstart = oldmat->rstart; 2419 a->rend = oldmat->rend; 2420 a->cstart = oldmat->cstart; 2421 a->cend = oldmat->cend; 2422 a->size = oldmat->size; 2423 a->rank = oldmat->rank; 2424 a->donotstash = oldmat->donotstash; 2425 a->roworiented = oldmat->roworiented; 2426 a->rowindices = 0; 2427 a->rowvalues = 0; 2428 a->getrowactive = PETSC_FALSE; 2429 a->barray = 0; 2430 a->rstart_bs = oldmat->rstart_bs; 2431 a->rend_bs = oldmat->rend_bs; 2432 a->cstart_bs = oldmat->cstart_bs; 2433 a->cend_bs = oldmat->cend_bs; 2434 2435 /* hash table stuff */ 2436 a->ht = 0; 2437 a->hd = 0; 2438 a->ht_size = 0; 2439 a->ht_flag = oldmat->ht_flag; 2440 a->ht_fact = oldmat->ht_fact; 2441 a->ht_total_ct = 0; 2442 a->ht_insert_ct = 0; 2443 2444 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2445 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2446 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2447 if (oldmat->colmap) { 2448 #if defined (PETSC_USE_CTABLE) 2449 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2450 #else 2451 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2452 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2453 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2454 #endif 2455 } else a->colmap = 0; 2456 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2457 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2458 PetscLogObjectMemory(mat,len*sizeof(int)); 2459 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2460 } else a->garray = 0; 2461 2462 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2463 PetscLogObjectParent(mat,a->lvec); 2464 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2465 2466 PetscLogObjectParent(mat,a->Mvctx); 2467 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2468 PetscLogObjectParent(mat,a->A); 2469 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2470 PetscLogObjectParent(mat,a->B); 2471 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2472 *newmat = mat; 2473 PetscFunctionReturn(0); 2474 } 2475 2476 #include "petscsys.h" 2477 2478 #undef __FUNCT__ 2479 #define __FUNCT__ "MatLoad_MPIBAIJ" 2480 int MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2481 { 2482 Mat A; 2483 int i,nz,ierr,j,rstart,rend,fd; 2484 PetscScalar *vals,*buf; 2485 MPI_Comm comm = ((PetscObject)viewer)->comm; 2486 MPI_Status status; 2487 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2488 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2489 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2490 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2491 int dcount,kmax,k,nzcount,tmp; 2492 2493 PetscFunctionBegin; 2494 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2495 2496 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2497 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2498 if (!rank) { 2499 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2500 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2501 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2502 if (header[3] < 0) { 2503 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2504 } 2505 } 2506 2507 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2508 M = header[1]; N = header[2]; 2509 2510 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2511 2512 /* 2513 This code adds extra rows to make sure the number of rows is 2514 divisible by the blocksize 2515 */ 2516 Mbs = M/bs; 2517 extra_rows = bs - M + bs*(Mbs); 2518 if (extra_rows == bs) extra_rows = 0; 2519 else Mbs++; 2520 if (extra_rows &&!rank) { 2521 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2522 } 2523 2524 /* determine ownership of all rows */ 2525 mbs = Mbs/size + ((Mbs % size) > rank); 2526 m = mbs*bs; 2527 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2528 browners = rowners + size + 1; 2529 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2530 rowners[0] = 0; 2531 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2532 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2533 rstart = rowners[rank]; 2534 rend = rowners[rank+1]; 2535 2536 /* distribute row lengths to all processors */ 2537 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2538 if (!rank) { 2539 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2540 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2541 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2542 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2543 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2544 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2545 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2546 } else { 2547 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2548 } 2549 2550 if (!rank) { 2551 /* calculate the number of nonzeros on each processor */ 2552 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2553 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2554 for (i=0; i<size; i++) { 2555 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2556 procsnz[i] += rowlengths[j]; 2557 } 2558 } 2559 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2560 2561 /* determine max buffer needed and allocate it */ 2562 maxnz = 0; 2563 for (i=0; i<size; i++) { 2564 maxnz = PetscMax(maxnz,procsnz[i]); 2565 } 2566 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2567 2568 /* read in my part of the matrix column indices */ 2569 nz = procsnz[0]; 2570 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2571 mycols = ibuf; 2572 if (size == 1) nz -= extra_rows; 2573 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2574 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2575 2576 /* read in every ones (except the last) and ship off */ 2577 for (i=1; i<size-1; i++) { 2578 nz = procsnz[i]; 2579 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2580 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2581 } 2582 /* read in the stuff for the last proc */ 2583 if (size != 1) { 2584 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2585 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2586 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2587 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2588 } 2589 ierr = PetscFree(cols);CHKERRQ(ierr); 2590 } else { 2591 /* determine buffer space needed for message */ 2592 nz = 0; 2593 for (i=0; i<m; i++) { 2594 nz += locrowlens[i]; 2595 } 2596 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2597 mycols = ibuf; 2598 /* receive message of column indices*/ 2599 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2600 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2601 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2602 } 2603 2604 /* loop over local rows, determining number of off diagonal entries */ 2605 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2606 odlens = dlens + (rend-rstart); 2607 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2608 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2609 masked1 = mask + Mbs; 2610 masked2 = masked1 + Mbs; 2611 rowcount = 0; nzcount = 0; 2612 for (i=0; i<mbs; i++) { 2613 dcount = 0; 2614 odcount = 0; 2615 for (j=0; j<bs; j++) { 2616 kmax = locrowlens[rowcount]; 2617 for (k=0; k<kmax; k++) { 2618 tmp = mycols[nzcount++]/bs; 2619 if (!mask[tmp]) { 2620 mask[tmp] = 1; 2621 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2622 else masked1[dcount++] = tmp; 2623 } 2624 } 2625 rowcount++; 2626 } 2627 2628 dlens[i] = dcount; 2629 odlens[i] = odcount; 2630 2631 /* zero out the mask elements we set */ 2632 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2633 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2634 } 2635 2636 /* create our matrix */ 2637 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2638 ierr = MatSetType(A,type);CHKERRQ(ierr) 2639 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2640 2641 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2642 MatSetOption(A,MAT_COLUMNS_SORTED); 2643 2644 if (!rank) { 2645 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2646 /* read in my part of the matrix numerical values */ 2647 nz = procsnz[0]; 2648 vals = buf; 2649 mycols = ibuf; 2650 if (size == 1) nz -= extra_rows; 2651 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2652 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2653 2654 /* insert into matrix */ 2655 jj = rstart*bs; 2656 for (i=0; i<m; i++) { 2657 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2658 mycols += locrowlens[i]; 2659 vals += locrowlens[i]; 2660 jj++; 2661 } 2662 /* read in other processors (except the last one) and ship out */ 2663 for (i=1; i<size-1; i++) { 2664 nz = procsnz[i]; 2665 vals = buf; 2666 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2667 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2668 } 2669 /* the last proc */ 2670 if (size != 1){ 2671 nz = procsnz[i] - extra_rows; 2672 vals = buf; 2673 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2674 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2675 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2676 } 2677 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2678 } else { 2679 /* receive numeric values */ 2680 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2681 2682 /* receive message of values*/ 2683 vals = buf; 2684 mycols = ibuf; 2685 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2686 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2687 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2688 2689 /* insert into matrix */ 2690 jj = rstart*bs; 2691 for (i=0; i<m; i++) { 2692 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2693 mycols += locrowlens[i]; 2694 vals += locrowlens[i]; 2695 jj++; 2696 } 2697 } 2698 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2699 ierr = PetscFree(buf);CHKERRQ(ierr); 2700 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2701 ierr = PetscFree(rowners);CHKERRQ(ierr); 2702 ierr = PetscFree(dlens);CHKERRQ(ierr); 2703 ierr = PetscFree(mask);CHKERRQ(ierr); 2704 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2705 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2706 2707 *newmat = A; 2708 PetscFunctionReturn(0); 2709 } 2710 2711 #undef __FUNCT__ 2712 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2713 /*@ 2714 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2715 2716 Input Parameters: 2717 . mat - the matrix 2718 . fact - factor 2719 2720 Collective on Mat 2721 2722 Level: advanced 2723 2724 Notes: 2725 This can also be set by the command line option: -mat_use_hash_table fact 2726 2727 .keywords: matrix, hashtable, factor, HT 2728 2729 .seealso: MatSetOption() 2730 @*/ 2731 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2732 { 2733 int ierr,(*f)(Mat,PetscReal); 2734 2735 PetscFunctionBegin; 2736 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2737 if (f) { 2738 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2739 } 2740 PetscFunctionReturn(0); 2741 } 2742 2743 #undef __FUNCT__ 2744 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2745 int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2746 { 2747 Mat_MPIBAIJ *baij; 2748 2749 PetscFunctionBegin; 2750 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2751 baij = (Mat_MPIBAIJ*)mat->data; 2752 baij->ht_fact = fact; 2753 PetscFunctionReturn(0); 2754 } 2755 2756 #undef __FUNCT__ 2757 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2758 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2759 { 2760 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2761 PetscFunctionBegin; 2762 *Ad = a->A; 2763 *Ao = a->B; 2764 *colmap = a->garray; 2765 PetscFunctionReturn(0); 2766 } 2767