1 /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN int DisAssemble_MPIBAIJ(Mat); 8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int); 9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []); 11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode); 12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 15 EXTERN int MatPrintHelp_SeqBAIJ(Mat); 16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 17 18 /* UGLY, ugly, ugly 19 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 20 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 21 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 22 converts the entries into single precision and then calls ..._MatScalar() to put them 23 into the single precision data structures. 24 */ 25 #if defined(PETSC_USE_MAT_SINGLE) 26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 31 #else 32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 33 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 35 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 37 #endif 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v) 42 { 43 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 44 int ierr,i; 45 PetscScalar *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<A->m; i++){ 58 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 71 int MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 int ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 86 int MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 int ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNCT__ 105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 106 static int CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 int nbs = B->nbs,i,bs=B->bs,ierr; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 120 PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (int*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (int*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatSetValues_MPIBAIJ" 285 int MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 int ierr,i,N = m*n; 289 MatScalar *vsingle; 290 291 PetscFunctionBegin; 292 if (N > b->setvalueslen) { 293 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 294 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 295 b->setvalueslen = N; 296 } 297 vsingle = b->setvaluescopy; 298 299 for (i=0; i<N; i++) { 300 vsingle[i] = v[i]; 301 } 302 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 309 { 310 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 311 int ierr,i,N = m*n*b->bs2; 312 MatScalar *vsingle; 313 314 PetscFunctionBegin; 315 if (N > b->setvalueslen) { 316 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 317 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 318 b->setvalueslen = N; 319 } 320 vsingle = b->setvaluescopy; 321 for (i=0; i<N; i++) { 322 vsingle[i] = v[i]; 323 } 324 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 331 { 332 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 333 int ierr,i,N = m*n; 334 MatScalar *vsingle; 335 336 PetscFunctionBegin; 337 if (N > b->setvalueslen) { 338 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 339 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 340 b->setvalueslen = N; 341 } 342 vsingle = b->setvaluescopy; 343 for (i=0; i<N; i++) { 344 vsingle[i] = v[i]; 345 } 346 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 353 { 354 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 355 int ierr,i,N = m*n*b->bs2; 356 MatScalar *vsingle; 357 358 PetscFunctionBegin; 359 if (N > b->setvalueslen) { 360 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 361 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 362 b->setvalueslen = N; 363 } 364 vsingle = b->setvaluescopy; 365 for (i=0; i<N; i++) { 366 vsingle[i] = v[i]; 367 } 368 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 #endif 372 373 #undef __FUNCT__ 374 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 376 { 377 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 378 MatScalar value; 379 PetscTruth roworiented = baij->roworiented; 380 int ierr,i,j,row,col; 381 int rstart_orig=baij->rstart_bs; 382 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 383 int cend_orig=baij->cend_bs,bs=baij->bs; 384 385 /* Some Variables required in the macro */ 386 Mat A = baij->A; 387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 388 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 389 MatScalar *aa=a->a; 390 391 Mat B = baij->B; 392 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 393 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 394 MatScalar *ba=b->a; 395 396 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 397 int low,high,t,ridx,cidx,bs2=a->bs2; 398 MatScalar *ap,*bap; 399 400 PetscFunctionBegin; 401 for (i=0; i<m; i++) { 402 if (im[i] < 0) continue; 403 #if defined(PETSC_USE_BOPT_g) 404 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 405 #endif 406 if (im[i] >= rstart_orig && im[i] < rend_orig) { 407 row = im[i] - rstart_orig; 408 for (j=0; j<n; j++) { 409 if (in[j] >= cstart_orig && in[j] < cend_orig){ 410 col = in[j] - cstart_orig; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 413 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 414 } else if (in[j] < 0) continue; 415 #if defined(PETSC_USE_BOPT_g) 416 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);} 417 #endif 418 else { 419 if (mat->was_assembled) { 420 if (!baij->colmap) { 421 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 422 } 423 #if defined (PETSC_USE_CTABLE) 424 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 425 col = col - 1; 426 #else 427 col = baij->colmap[in[j]/bs] - 1; 428 #endif 429 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 430 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 431 col = in[j]; 432 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 433 B = baij->B; 434 b = (Mat_SeqBAIJ*)(B)->data; 435 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 436 ba=b->a; 437 } else col += in[j]%bs; 438 } else col = in[j]; 439 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 440 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 441 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 442 } 443 } 444 } else { 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 460 { 461 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 462 const MatScalar *value; 463 MatScalar *barray=baij->barray; 464 PetscTruth roworiented = baij->roworiented; 465 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 466 int rend=baij->rend,cstart=baij->cstart,stepval; 467 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 468 469 PetscFunctionBegin; 470 if(!barray) { 471 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 472 baij->barray = barray; 473 } 474 475 if (roworiented) { 476 stepval = (n-1)*bs; 477 } else { 478 stepval = (m-1)*bs; 479 } 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_BOPT_g) 483 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 for (j=0; j<n; j++) { 488 /* If NumCol = 1 then a copy is not required */ 489 if ((roworiented) && (n == 1)) { 490 barray = (MatScalar*)v + i*bs2; 491 } else if((!roworiented) && (m == 1)) { 492 barray = (MatScalar*)v + j*bs2; 493 } else { /* Here a copy is required */ 494 if (roworiented) { 495 value = v + i*(stepval+bs)*bs + j*bs; 496 } else { 497 value = v + j*(stepval+bs)*bs + i*bs; 498 } 499 for (ii=0; ii<bs; ii++,value+=stepval) { 500 for (jj=0; jj<bs; jj++) { 501 *barray++ = *value++; 502 } 503 } 504 barray -=bs2; 505 } 506 507 if (in[j] >= cstart && in[j] < cend){ 508 col = in[j] - cstart; 509 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 510 } 511 else if (in[j] < 0) continue; 512 #if defined(PETSC_USE_BOPT_g) 513 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 514 #endif 515 else { 516 if (mat->was_assembled) { 517 if (!baij->colmap) { 518 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 519 } 520 521 #if defined(PETSC_USE_BOPT_g) 522 #if defined (PETSC_USE_CTABLE) 523 { int data; 524 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 525 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 526 } 527 #else 528 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 529 #endif 530 #endif 531 #if defined (PETSC_USE_CTABLE) 532 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 533 col = (col - 1)/bs; 534 #else 535 col = (baij->colmap[in[j]] - 1)/bs; 536 #endif 537 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 538 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 539 col = in[j]; 540 } 541 } 542 else col = in[j]; 543 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 544 } 545 } 546 } else { 547 if (!baij->donotstash) { 548 if (roworiented) { 549 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 550 } else { 551 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 552 } 553 } 554 } 555 } 556 PetscFunctionReturn(0); 557 } 558 559 #define HASH_KEY 0.6180339887 560 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 561 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 562 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 565 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 566 { 567 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 568 PetscTruth roworiented = baij->roworiented; 569 int ierr,i,j,row,col; 570 int rstart_orig=baij->rstart_bs; 571 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 572 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 573 PetscReal tmp; 574 MatScalar **HD = baij->hd,value; 575 #if defined(PETSC_USE_BOPT_g) 576 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 577 #endif 578 579 PetscFunctionBegin; 580 581 for (i=0; i<m; i++) { 582 #if defined(PETSC_USE_BOPT_g) 583 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 584 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 585 #endif 586 row = im[i]; 587 if (row >= rstart_orig && row < rend_orig) { 588 for (j=0; j<n; j++) { 589 col = in[j]; 590 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 591 /* Look up into the Hash Table */ 592 key = (row/bs)*Nbs+(col/bs)+1; 593 h1 = HASH(size,key,tmp); 594 595 596 idx = h1; 597 #if defined(PETSC_USE_BOPT_g) 598 insert_ct++; 599 total_ct++; 600 if (HT[idx] != key) { 601 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 602 if (idx == size) { 603 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 604 if (idx == h1) { 605 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 606 } 607 } 608 } 609 #else 610 if (HT[idx] != key) { 611 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 612 if (idx == size) { 613 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 614 if (idx == h1) { 615 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 616 } 617 } 618 } 619 #endif 620 /* A HASH table entry is found, so insert the values at the correct address */ 621 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 622 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 623 } 624 } else { 625 if (!baij->donotstash) { 626 if (roworiented) { 627 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 628 } else { 629 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 630 } 631 } 632 } 633 } 634 #if defined(PETSC_USE_BOPT_g) 635 baij->ht_total_ct = total_ct; 636 baij->ht_insert_ct = insert_ct; 637 #endif 638 PetscFunctionReturn(0); 639 } 640 641 #undef __FUNCT__ 642 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 643 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 644 { 645 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 646 PetscTruth roworiented = baij->roworiented; 647 int ierr,i,j,ii,jj,row,col; 648 int rstart=baij->rstart ; 649 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 650 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 651 PetscReal tmp; 652 MatScalar **HD = baij->hd,*baij_a; 653 const MatScalar *v_t,*value; 654 #if defined(PETSC_USE_BOPT_g) 655 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 656 #endif 657 658 PetscFunctionBegin; 659 660 if (roworiented) { 661 stepval = (n-1)*bs; 662 } else { 663 stepval = (m-1)*bs; 664 } 665 for (i=0; i<m; i++) { 666 #if defined(PETSC_USE_BOPT_g) 667 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]); 668 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1); 669 #endif 670 row = im[i]; 671 v_t = v + i*bs2; 672 if (row >= rstart && row < rend) { 673 for (j=0; j<n; j++) { 674 col = in[j]; 675 676 /* Look up into the Hash Table */ 677 key = row*Nbs+col+1; 678 h1 = HASH(size,key,tmp); 679 680 idx = h1; 681 #if defined(PETSC_USE_BOPT_g) 682 total_ct++; 683 insert_ct++; 684 if (HT[idx] != key) { 685 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 686 if (idx == size) { 687 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 688 if (idx == h1) { 689 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 690 } 691 } 692 } 693 #else 694 if (HT[idx] != key) { 695 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 696 if (idx == size) { 697 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 698 if (idx == h1) { 699 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 700 } 701 } 702 } 703 #endif 704 baij_a = HD[idx]; 705 if (roworiented) { 706 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 707 /* value = v + (i*(stepval+bs)+j)*bs; */ 708 value = v_t; 709 v_t += bs; 710 if (addv == ADD_VALUES) { 711 for (ii=0; ii<bs; ii++,value+=stepval) { 712 for (jj=ii; jj<bs2; jj+=bs) { 713 baij_a[jj] += *value++; 714 } 715 } 716 } else { 717 for (ii=0; ii<bs; ii++,value+=stepval) { 718 for (jj=ii; jj<bs2; jj+=bs) { 719 baij_a[jj] = *value++; 720 } 721 } 722 } 723 } else { 724 value = v + j*(stepval+bs)*bs + i*bs; 725 if (addv == ADD_VALUES) { 726 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 727 for (jj=0; jj<bs; jj++) { 728 baij_a[jj] += *value++; 729 } 730 } 731 } else { 732 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 733 for (jj=0; jj<bs; jj++) { 734 baij_a[jj] = *value++; 735 } 736 } 737 } 738 } 739 } 740 } else { 741 if (!baij->donotstash) { 742 if (roworiented) { 743 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 744 } else { 745 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 746 } 747 } 748 } 749 } 750 #if defined(PETSC_USE_BOPT_g) 751 baij->ht_total_ct = total_ct; 752 baij->ht_insert_ct = insert_ct; 753 #endif 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatGetValues_MPIBAIJ" 759 int MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 760 { 761 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 762 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 763 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 764 765 PetscFunctionBegin; 766 for (i=0; i<m; i++) { 767 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 768 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 769 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 770 row = idxm[i] - bsrstart; 771 for (j=0; j<n; j++) { 772 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 773 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 774 if (idxn[j] >= bscstart && idxn[j] < bscend){ 775 col = idxn[j] - bscstart; 776 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 777 } else { 778 if (!baij->colmap) { 779 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 780 } 781 #if defined (PETSC_USE_CTABLE) 782 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 783 data --; 784 #else 785 data = baij->colmap[idxn[j]/bs]-1; 786 #endif 787 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 788 else { 789 col = data + idxn[j]%bs; 790 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 791 } 792 } 793 } 794 } else { 795 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 796 } 797 } 798 PetscFunctionReturn(0); 799 } 800 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatNorm_MPIBAIJ" 803 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 804 { 805 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 806 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 807 int ierr,i,bs2=baij->bs2; 808 PetscReal sum = 0.0; 809 MatScalar *v; 810 811 PetscFunctionBegin; 812 if (baij->size == 1) { 813 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 814 } else { 815 if (type == NORM_FROBENIUS) { 816 v = amat->a; 817 for (i=0; i<amat->nz*bs2; i++) { 818 #if defined(PETSC_USE_COMPLEX) 819 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 820 #else 821 sum += (*v)*(*v); v++; 822 #endif 823 } 824 v = bmat->a; 825 for (i=0; i<bmat->nz*bs2; i++) { 826 #if defined(PETSC_USE_COMPLEX) 827 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 828 #else 829 sum += (*v)*(*v); v++; 830 #endif 831 } 832 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 833 *nrm = sqrt(*nrm); 834 } else { 835 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 836 } 837 } 838 PetscFunctionReturn(0); 839 } 840 841 842 /* 843 Creates the hash table, and sets the table 844 This table is created only once. 845 If new entried need to be added to the matrix 846 then the hash table has to be destroyed and 847 recreated. 848 */ 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 851 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 852 { 853 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 854 Mat A = baij->A,B=baij->B; 855 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 856 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 857 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 858 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 859 int *HT,key; 860 MatScalar **HD; 861 PetscReal tmp; 862 #if defined(PETSC_USE_BOPT_g) 863 int ct=0,max=0; 864 #endif 865 866 PetscFunctionBegin; 867 baij->ht_size=(int)(factor*nz); 868 size = baij->ht_size; 869 870 if (baij->ht) { 871 PetscFunctionReturn(0); 872 } 873 874 /* Allocate Memory for Hash Table */ 875 ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 876 baij->ht = (int*)(baij->hd + size); 877 HD = baij->hd; 878 HT = baij->ht; 879 880 881 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr); 882 883 884 /* Loop Over A */ 885 for (i=0; i<a->mbs; i++) { 886 for (j=ai[i]; j<ai[i+1]; j++) { 887 row = i+rstart; 888 col = aj[j]+cstart; 889 890 key = row*Nbs + col + 1; 891 h1 = HASH(size,key,tmp); 892 for (k=0; k<size; k++){ 893 if (HT[(h1+k)%size] == 0.0) { 894 HT[(h1+k)%size] = key; 895 HD[(h1+k)%size] = a->a + j*bs2; 896 break; 897 #if defined(PETSC_USE_BOPT_g) 898 } else { 899 ct++; 900 #endif 901 } 902 } 903 #if defined(PETSC_USE_BOPT_g) 904 if (k> max) max = k; 905 #endif 906 } 907 } 908 /* Loop Over B */ 909 for (i=0; i<b->mbs; i++) { 910 for (j=bi[i]; j<bi[i+1]; j++) { 911 row = i+rstart; 912 col = garray[bj[j]]; 913 key = row*Nbs + col + 1; 914 h1 = HASH(size,key,tmp); 915 for (k=0; k<size; k++){ 916 if (HT[(h1+k)%size] == 0.0) { 917 HT[(h1+k)%size] = key; 918 HD[(h1+k)%size] = b->a + j*bs2; 919 break; 920 #if defined(PETSC_USE_BOPT_g) 921 } else { 922 ct++; 923 #endif 924 } 925 } 926 #if defined(PETSC_USE_BOPT_g) 927 if (k> max) max = k; 928 #endif 929 } 930 } 931 932 /* Print Summary */ 933 #if defined(PETSC_USE_BOPT_g) 934 for (i=0,j=0; i<size; i++) { 935 if (HT[i]) {j++;} 936 } 937 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 938 #endif 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 944 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 945 { 946 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 947 int ierr,nstash,reallocs; 948 InsertMode addv; 949 950 PetscFunctionBegin; 951 if (baij->donotstash) { 952 PetscFunctionReturn(0); 953 } 954 955 /* make sure all processors are either in INSERTMODE or ADDMODE */ 956 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 957 if (addv == (ADD_VALUES|INSERT_VALUES)) { 958 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 959 } 960 mat->insertmode = addv; /* in case this processor had no cache */ 961 962 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 963 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 964 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 965 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 966 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 967 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 968 PetscFunctionReturn(0); 969 } 970 971 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 974 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 975 { 976 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 977 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 978 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 979 int *row,*col,other_disassembled; 980 PetscTruth r1,r2,r3; 981 MatScalar *val; 982 InsertMode addv = mat->insertmode; 983 #if defined(PETSC_HAVE_DSCPACK) 984 PetscTruth flag; 985 #endif 986 987 PetscFunctionBegin; 988 if (!baij->donotstash) { 989 while (1) { 990 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 991 if (!flg) break; 992 993 for (i=0; i<n;) { 994 /* Now identify the consecutive vals belonging to the same row */ 995 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 996 if (j < n) ncols = j-i; 997 else ncols = n-i; 998 /* Now assemble all these values with a single function call */ 999 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1000 i = j; 1001 } 1002 } 1003 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1004 /* Now process the block-stash. Since the values are stashed column-oriented, 1005 set the roworiented flag to column oriented, and after MatSetValues() 1006 restore the original flags */ 1007 r1 = baij->roworiented; 1008 r2 = a->roworiented; 1009 r3 = b->roworiented; 1010 baij->roworiented = PETSC_FALSE; 1011 a->roworiented = PETSC_FALSE; 1012 b->roworiented = PETSC_FALSE; 1013 while (1) { 1014 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1015 if (!flg) break; 1016 1017 for (i=0; i<n;) { 1018 /* Now identify the consecutive vals belonging to the same row */ 1019 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1020 if (j < n) ncols = j-i; 1021 else ncols = n-i; 1022 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1023 i = j; 1024 } 1025 } 1026 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1027 baij->roworiented = r1; 1028 a->roworiented = r2; 1029 b->roworiented = r3; 1030 } 1031 1032 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1033 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1034 1035 /* determine if any processor has disassembled, if so we must 1036 also disassemble ourselfs, in order that we may reassemble. */ 1037 /* 1038 if nonzero structure of submatrix B cannot change then we know that 1039 no processor disassembled thus we can skip this stuff 1040 */ 1041 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1042 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1043 if (mat->was_assembled && !other_disassembled) { 1044 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1045 } 1046 } 1047 1048 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1049 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1050 } 1051 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1053 1054 #if defined(PETSC_USE_BOPT_g) 1055 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1056 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1057 baij->ht_total_ct = 0; 1058 baij->ht_insert_ct = 0; 1059 } 1060 #endif 1061 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1062 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1063 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1064 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1065 } 1066 1067 if (baij->rowvalues) { 1068 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1069 baij->rowvalues = 0; 1070 } 1071 #if defined(PETSC_HAVE_DSCPACK) 1072 ierr = PetscOptionsHasName(mat->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1073 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(mat);CHKERRQ(ierr); } 1074 #endif 1075 PetscFunctionReturn(0); 1076 } 1077 1078 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1082 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1083 { 1084 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1085 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 1086 PetscTruth isascii,isdraw; 1087 PetscViewer sviewer; 1088 PetscViewerFormat format; 1089 1090 PetscFunctionBegin; 1091 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1092 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1093 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1094 if (isascii) { 1095 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1096 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1097 MatInfo info; 1098 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1099 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1100 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1101 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1102 baij->bs,(int)info.memory);CHKERRQ(ierr); 1103 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1104 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1105 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1106 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1107 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1108 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1111 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1114 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 1115 ierr = MatMPIBAIJFactorInfo_DSCPACK(mat,viewer);CHKERRQ(ierr); 1116 #endif 1117 PetscFunctionReturn(0); 1118 } 1119 } 1120 1121 if (isdraw) { 1122 PetscDraw draw; 1123 PetscTruth isnull; 1124 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1125 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1126 } 1127 1128 if (size == 1) { 1129 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1130 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1131 } else { 1132 /* assemble the entire matrix onto first processor. */ 1133 Mat A; 1134 Mat_SeqBAIJ *Aloc; 1135 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1136 MatScalar *a; 1137 1138 if (!rank) { 1139 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1140 } else { 1141 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1142 } 1143 PetscLogObjectParent(mat,A); 1144 1145 /* copy over the A part */ 1146 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1147 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1148 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1149 1150 for (i=0; i<mbs; i++) { 1151 rvals[0] = bs*(baij->rstart + i); 1152 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1153 for (j=ai[i]; j<ai[i+1]; j++) { 1154 col = (baij->cstart+aj[j])*bs; 1155 for (k=0; k<bs; k++) { 1156 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1157 col++; a += bs; 1158 } 1159 } 1160 } 1161 /* copy over the B part */ 1162 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1163 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1164 for (i=0; i<mbs; i++) { 1165 rvals[0] = bs*(baij->rstart + i); 1166 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1167 for (j=ai[i]; j<ai[i+1]; j++) { 1168 col = baij->garray[aj[j]]*bs; 1169 for (k=0; k<bs; k++) { 1170 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1171 col++; a += bs; 1172 } 1173 } 1174 } 1175 ierr = PetscFree(rvals);CHKERRQ(ierr); 1176 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1177 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1178 /* 1179 Everyone has to call to draw the matrix since the graphics waits are 1180 synchronized across all processors that share the PetscDraw object 1181 */ 1182 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 if (!rank) { 1184 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1185 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1186 } 1187 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1188 ierr = MatDestroy(A);CHKERRQ(ierr); 1189 } 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "MatView_MPIBAIJ" 1195 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1196 { 1197 int ierr; 1198 PetscTruth isascii,isdraw,issocket,isbinary; 1199 1200 PetscFunctionBegin; 1201 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1202 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1203 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1204 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1205 if (isascii || isdraw || issocket || isbinary) { 1206 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1207 } else { 1208 SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1209 } 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1215 int MatDestroy_MPIBAIJ(Mat mat) 1216 { 1217 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1218 int ierr; 1219 1220 PetscFunctionBegin; 1221 #if defined(PETSC_USE_LOG) 1222 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 1223 #endif 1224 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1225 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1226 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1227 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1228 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1229 #if defined (PETSC_USE_CTABLE) 1230 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1231 #else 1232 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1233 #endif 1234 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1235 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1236 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1237 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1238 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1239 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1240 #if defined(PETSC_USE_MAT_SINGLE) 1241 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1242 #endif 1243 ierr = PetscFree(baij);CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatMult_MPIBAIJ" 1249 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1250 { 1251 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1252 int ierr,nt; 1253 1254 PetscFunctionBegin; 1255 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1256 if (nt != A->n) { 1257 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1258 } 1259 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1260 if (nt != A->m) { 1261 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1262 } 1263 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1264 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1265 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1266 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1267 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1273 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1274 { 1275 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1276 int ierr; 1277 1278 PetscFunctionBegin; 1279 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1280 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1281 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1282 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1288 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1289 { 1290 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1291 int ierr; 1292 1293 PetscFunctionBegin; 1294 /* do nondiagonal part */ 1295 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1296 /* send it on its way */ 1297 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1298 /* do local part */ 1299 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1300 /* receive remote parts: note this assumes the values are not actually */ 1301 /* inserted in yy until the next line, which is true for my implementation*/ 1302 /* but is not perhaps always true. */ 1303 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1309 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1310 { 1311 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1312 int ierr; 1313 1314 PetscFunctionBegin; 1315 /* do nondiagonal part */ 1316 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1317 /* send it on its way */ 1318 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1319 /* do local part */ 1320 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1321 /* receive remote parts: note this assumes the values are not actually */ 1322 /* inserted in yy until the next line, which is true for my implementation*/ 1323 /* but is not perhaps always true. */ 1324 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 /* 1329 This only works correctly for square matrices where the subblock A->A is the 1330 diagonal block 1331 */ 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1334 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1335 { 1336 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1337 int ierr; 1338 1339 PetscFunctionBegin; 1340 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1341 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatScale_MPIBAIJ" 1347 int MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1348 { 1349 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1350 int ierr; 1351 1352 PetscFunctionBegin; 1353 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1354 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1360 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1361 { 1362 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1363 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1364 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1365 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1366 int *cmap,*idx_p,cstart = mat->cstart; 1367 1368 PetscFunctionBegin; 1369 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1370 mat->getrowactive = PETSC_TRUE; 1371 1372 if (!mat->rowvalues && (idx || v)) { 1373 /* 1374 allocate enough space to hold information from the longest row. 1375 */ 1376 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1377 int max = 1,mbs = mat->mbs,tmp; 1378 for (i=0; i<mbs; i++) { 1379 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1380 if (max < tmp) { max = tmp; } 1381 } 1382 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1383 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1384 } 1385 1386 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1387 lrow = row - brstart; 1388 1389 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1390 if (!v) {pvA = 0; pvB = 0;} 1391 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1392 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1393 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1394 nztot = nzA + nzB; 1395 1396 cmap = mat->garray; 1397 if (v || idx) { 1398 if (nztot) { 1399 /* Sort by increasing column numbers, assuming A and B already sorted */ 1400 int imark = -1; 1401 if (v) { 1402 *v = v_p = mat->rowvalues; 1403 for (i=0; i<nzB; i++) { 1404 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1405 else break; 1406 } 1407 imark = i; 1408 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1409 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1410 } 1411 if (idx) { 1412 *idx = idx_p = mat->rowindices; 1413 if (imark > -1) { 1414 for (i=0; i<imark; i++) { 1415 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1416 } 1417 } else { 1418 for (i=0; i<nzB; i++) { 1419 if (cmap[cworkB[i]/bs] < cstart) 1420 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1421 else break; 1422 } 1423 imark = i; 1424 } 1425 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1426 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1427 } 1428 } else { 1429 if (idx) *idx = 0; 1430 if (v) *v = 0; 1431 } 1432 } 1433 *nz = nztot; 1434 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1435 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1441 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1442 { 1443 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1444 1445 PetscFunctionBegin; 1446 if (baij->getrowactive == PETSC_FALSE) { 1447 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1448 } 1449 baij->getrowactive = PETSC_FALSE; 1450 PetscFunctionReturn(0); 1451 } 1452 1453 #undef __FUNCT__ 1454 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1455 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1456 { 1457 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1458 1459 PetscFunctionBegin; 1460 *bs = baij->bs; 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1466 int MatZeroEntries_MPIBAIJ(Mat A) 1467 { 1468 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1469 int ierr; 1470 1471 PetscFunctionBegin; 1472 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1473 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 #undef __FUNCT__ 1478 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1479 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1480 { 1481 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1482 Mat A = a->A,B = a->B; 1483 int ierr; 1484 PetscReal isend[5],irecv[5]; 1485 1486 PetscFunctionBegin; 1487 info->block_size = (PetscReal)a->bs; 1488 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1489 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1490 isend[3] = info->memory; isend[4] = info->mallocs; 1491 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1492 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1493 isend[3] += info->memory; isend[4] += info->mallocs; 1494 if (flag == MAT_LOCAL) { 1495 info->nz_used = isend[0]; 1496 info->nz_allocated = isend[1]; 1497 info->nz_unneeded = isend[2]; 1498 info->memory = isend[3]; 1499 info->mallocs = isend[4]; 1500 } else if (flag == MAT_GLOBAL_MAX) { 1501 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1502 info->nz_used = irecv[0]; 1503 info->nz_allocated = irecv[1]; 1504 info->nz_unneeded = irecv[2]; 1505 info->memory = irecv[3]; 1506 info->mallocs = irecv[4]; 1507 } else if (flag == MAT_GLOBAL_SUM) { 1508 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1509 info->nz_used = irecv[0]; 1510 info->nz_allocated = irecv[1]; 1511 info->nz_unneeded = irecv[2]; 1512 info->memory = irecv[3]; 1513 info->mallocs = irecv[4]; 1514 } else { 1515 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1516 } 1517 info->rows_global = (PetscReal)A->M; 1518 info->columns_global = (PetscReal)A->N; 1519 info->rows_local = (PetscReal)A->m; 1520 info->columns_local = (PetscReal)A->N; 1521 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1522 info->fill_ratio_needed = 0; 1523 info->factor_mallocs = 0; 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1529 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1530 { 1531 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1532 int ierr; 1533 1534 PetscFunctionBegin; 1535 switch (op) { 1536 case MAT_NO_NEW_NONZERO_LOCATIONS: 1537 case MAT_YES_NEW_NONZERO_LOCATIONS: 1538 case MAT_COLUMNS_UNSORTED: 1539 case MAT_COLUMNS_SORTED: 1540 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1541 case MAT_KEEP_ZEROED_ROWS: 1542 case MAT_NEW_NONZERO_LOCATION_ERR: 1543 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1544 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1545 break; 1546 case MAT_ROW_ORIENTED: 1547 a->roworiented = PETSC_TRUE; 1548 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1549 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1550 break; 1551 case MAT_ROWS_SORTED: 1552 case MAT_ROWS_UNSORTED: 1553 case MAT_YES_NEW_DIAGONALS: 1554 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1555 break; 1556 case MAT_COLUMN_ORIENTED: 1557 a->roworiented = PETSC_FALSE; 1558 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1559 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1560 break; 1561 case MAT_IGNORE_OFF_PROC_ENTRIES: 1562 a->donotstash = PETSC_TRUE; 1563 break; 1564 case MAT_NO_NEW_DIAGONALS: 1565 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1566 case MAT_USE_HASH_TABLE: 1567 a->ht_flag = PETSC_TRUE; 1568 break; 1569 default: 1570 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1571 } 1572 PetscFunctionReturn(0); 1573 } 1574 1575 #undef __FUNCT__ 1576 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1577 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1578 { 1579 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1580 Mat_SeqBAIJ *Aloc; 1581 Mat B; 1582 int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1583 int bs=baij->bs,mbs=baij->mbs; 1584 MatScalar *a; 1585 1586 PetscFunctionBegin; 1587 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1588 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1589 1590 /* copy over the A part */ 1591 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1592 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1593 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1594 1595 for (i=0; i<mbs; i++) { 1596 rvals[0] = bs*(baij->rstart + i); 1597 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1598 for (j=ai[i]; j<ai[i+1]; j++) { 1599 col = (baij->cstart+aj[j])*bs; 1600 for (k=0; k<bs; k++) { 1601 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1602 col++; a += bs; 1603 } 1604 } 1605 } 1606 /* copy over the B part */ 1607 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1608 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1609 for (i=0; i<mbs; i++) { 1610 rvals[0] = bs*(baij->rstart + i); 1611 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1612 for (j=ai[i]; j<ai[i+1]; j++) { 1613 col = baij->garray[aj[j]]*bs; 1614 for (k=0; k<bs; k++) { 1615 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1616 col++; a += bs; 1617 } 1618 } 1619 } 1620 ierr = PetscFree(rvals);CHKERRQ(ierr); 1621 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1622 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1623 1624 if (matout) { 1625 *matout = B; 1626 } else { 1627 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1628 } 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1634 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1635 { 1636 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1637 Mat a = baij->A,b = baij->B; 1638 int ierr,s1,s2,s3; 1639 1640 PetscFunctionBegin; 1641 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1642 if (rr) { 1643 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1644 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1645 /* Overlap communication with computation. */ 1646 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1647 } 1648 if (ll) { 1649 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1650 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1651 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1652 } 1653 /* scale the diagonal block */ 1654 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1655 1656 if (rr) { 1657 /* Do a scatter end and then right scale the off-diagonal block */ 1658 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1659 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1660 } 1661 1662 PetscFunctionReturn(0); 1663 } 1664 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1667 int MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1668 { 1669 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1670 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1671 int *nprocs,j,idx,nsends,row; 1672 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1673 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1674 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1675 MPI_Comm comm = A->comm; 1676 MPI_Request *send_waits,*recv_waits; 1677 MPI_Status recv_status,*send_status; 1678 IS istmp; 1679 PetscTruth found; 1680 1681 PetscFunctionBegin; 1682 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1683 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1684 1685 /* first count number of contributors to each processor */ 1686 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1687 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1688 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1689 for (i=0; i<N; i++) { 1690 idx = rows[i]; 1691 found = PETSC_FALSE; 1692 for (j=0; j<size; j++) { 1693 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1694 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 1695 } 1696 } 1697 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1698 } 1699 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1700 1701 /* inform other processors of number of messages and max length*/ 1702 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1703 1704 /* post receives: */ 1705 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1706 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1707 for (i=0; i<nrecvs; i++) { 1708 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1709 } 1710 1711 /* do sends: 1712 1) starts[i] gives the starting index in svalues for stuff going to 1713 the ith processor 1714 */ 1715 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1716 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1717 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1718 starts[0] = 0; 1719 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1720 for (i=0; i<N; i++) { 1721 svalues[starts[owner[i]]++] = rows[i]; 1722 } 1723 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1724 1725 starts[0] = 0; 1726 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1727 count = 0; 1728 for (i=0; i<size; i++) { 1729 if (nprocs[2*i+1]) { 1730 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1731 } 1732 } 1733 ierr = PetscFree(starts);CHKERRQ(ierr); 1734 1735 base = owners[rank]*bs; 1736 1737 /* wait on receives */ 1738 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1739 source = lens + nrecvs; 1740 count = nrecvs; slen = 0; 1741 while (count) { 1742 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1743 /* unpack receives into our local space */ 1744 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1745 source[imdex] = recv_status.MPI_SOURCE; 1746 lens[imdex] = n; 1747 slen += n; 1748 count--; 1749 } 1750 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1751 1752 /* move the data into the send scatter */ 1753 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1754 count = 0; 1755 for (i=0; i<nrecvs; i++) { 1756 values = rvalues + i*nmax; 1757 for (j=0; j<lens[i]; j++) { 1758 lrows[count++] = values[j] - base; 1759 } 1760 } 1761 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1762 ierr = PetscFree(lens);CHKERRQ(ierr); 1763 ierr = PetscFree(owner);CHKERRQ(ierr); 1764 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1765 1766 /* actually zap the local rows */ 1767 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1768 PetscLogObjectParent(A,istmp); 1769 1770 /* 1771 Zero the required rows. If the "diagonal block" of the matrix 1772 is square and the user wishes to set the diagonal we use seperate 1773 code so that MatSetValues() is not called for each diagonal allocating 1774 new memory, thus calling lots of mallocs and slowing things down. 1775 1776 Contributed by: Mathew Knepley 1777 */ 1778 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1779 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1780 if (diag && (l->A->M == l->A->N)) { 1781 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1782 } else if (diag) { 1783 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1784 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1785 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1786 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1787 } 1788 for (i=0; i<slen; i++) { 1789 row = lrows[i] + rstart_bs; 1790 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1791 } 1792 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1793 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1794 } else { 1795 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1796 } 1797 1798 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1799 ierr = PetscFree(lrows);CHKERRQ(ierr); 1800 1801 /* wait on sends */ 1802 if (nsends) { 1803 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1804 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1805 ierr = PetscFree(send_status);CHKERRQ(ierr); 1806 } 1807 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1808 ierr = PetscFree(svalues);CHKERRQ(ierr); 1809 1810 PetscFunctionReturn(0); 1811 } 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1815 int MatPrintHelp_MPIBAIJ(Mat A) 1816 { 1817 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1818 MPI_Comm comm = A->comm; 1819 static int called = 0; 1820 int ierr; 1821 1822 PetscFunctionBegin; 1823 if (!a->rank) { 1824 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1825 } 1826 if (called) {PetscFunctionReturn(0);} else called = 1; 1827 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1828 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1829 PetscFunctionReturn(0); 1830 } 1831 1832 #undef __FUNCT__ 1833 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1834 int MatSetUnfactored_MPIBAIJ(Mat A) 1835 { 1836 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1837 int ierr; 1838 1839 PetscFunctionBegin; 1840 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1841 PetscFunctionReturn(0); 1842 } 1843 1844 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1845 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "MatEqual_MPIBAIJ" 1848 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1849 { 1850 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1851 Mat a,b,c,d; 1852 PetscTruth flg; 1853 int ierr; 1854 1855 PetscFunctionBegin; 1856 a = matA->A; b = matA->B; 1857 c = matB->A; d = matB->B; 1858 1859 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1860 if (flg == PETSC_TRUE) { 1861 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1862 } 1863 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1864 PetscFunctionReturn(0); 1865 } 1866 1867 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1870 int MatSetUpPreallocation_MPIBAIJ(Mat A) 1871 { 1872 int ierr; 1873 1874 PetscFunctionBegin; 1875 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1876 PetscFunctionReturn(0); 1877 } 1878 1879 /* -------------------------------------------------------------------*/ 1880 static struct _MatOps MatOps_Values = { 1881 MatSetValues_MPIBAIJ, 1882 MatGetRow_MPIBAIJ, 1883 MatRestoreRow_MPIBAIJ, 1884 MatMult_MPIBAIJ, 1885 MatMultAdd_MPIBAIJ, 1886 MatMultTranspose_MPIBAIJ, 1887 MatMultTransposeAdd_MPIBAIJ, 1888 0, 1889 0, 1890 0, 1891 0, 1892 0, 1893 0, 1894 0, 1895 MatTranspose_MPIBAIJ, 1896 MatGetInfo_MPIBAIJ, 1897 MatEqual_MPIBAIJ, 1898 MatGetDiagonal_MPIBAIJ, 1899 MatDiagonalScale_MPIBAIJ, 1900 MatNorm_MPIBAIJ, 1901 MatAssemblyBegin_MPIBAIJ, 1902 MatAssemblyEnd_MPIBAIJ, 1903 0, 1904 MatSetOption_MPIBAIJ, 1905 MatZeroEntries_MPIBAIJ, 1906 MatZeroRows_MPIBAIJ, 1907 0, 1908 0, 1909 0, 1910 0, 1911 MatSetUpPreallocation_MPIBAIJ, 1912 0, 1913 0, 1914 0, 1915 0, 1916 MatDuplicate_MPIBAIJ, 1917 0, 1918 0, 1919 0, 1920 0, 1921 0, 1922 MatGetSubMatrices_MPIBAIJ, 1923 MatIncreaseOverlap_MPIBAIJ, 1924 MatGetValues_MPIBAIJ, 1925 0, 1926 MatPrintHelp_MPIBAIJ, 1927 MatScale_MPIBAIJ, 1928 0, 1929 0, 1930 0, 1931 MatGetBlockSize_MPIBAIJ, 1932 0, 1933 0, 1934 0, 1935 0, 1936 0, 1937 0, 1938 MatSetUnfactored_MPIBAIJ, 1939 0, 1940 MatSetValuesBlocked_MPIBAIJ, 1941 0, 1942 MatDestroy_MPIBAIJ, 1943 MatView_MPIBAIJ, 1944 MatGetPetscMaps_Petsc, 1945 0, 1946 0, 1947 0, 1948 0, 1949 0, 1950 0, 1951 MatGetRowMax_MPIBAIJ}; 1952 1953 1954 EXTERN_C_BEGIN 1955 #undef __FUNCT__ 1956 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 1957 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1958 { 1959 PetscFunctionBegin; 1960 *a = ((Mat_MPIBAIJ *)A->data)->A; 1961 *iscopy = PETSC_FALSE; 1962 PetscFunctionReturn(0); 1963 } 1964 EXTERN_C_END 1965 1966 EXTERN_C_BEGIN 1967 #undef __FUNCT__ 1968 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 1969 int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1970 { 1971 Mat_MPIBAIJ *b; 1972 int ierr,i; 1973 1974 PetscFunctionBegin; 1975 B->preallocated = PETSC_TRUE; 1976 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1977 1978 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1979 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1980 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1981 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1982 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1983 if (d_nnz) { 1984 for (i=0; i<B->m/bs; i++) { 1985 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1986 } 1987 } 1988 if (o_nnz) { 1989 for (i=0; i<B->m/bs; i++) { 1990 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1991 } 1992 } 1993 1994 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1995 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1996 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1997 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1998 1999 b = (Mat_MPIBAIJ*)B->data; 2000 b->bs = bs; 2001 b->bs2 = bs*bs; 2002 b->mbs = B->m/bs; 2003 b->nbs = B->n/bs; 2004 b->Mbs = B->M/bs; 2005 b->Nbs = B->N/bs; 2006 2007 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2008 b->rowners[0] = 0; 2009 for (i=2; i<=b->size; i++) { 2010 b->rowners[i] += b->rowners[i-1]; 2011 } 2012 b->rstart = b->rowners[b->rank]; 2013 b->rend = b->rowners[b->rank+1]; 2014 2015 ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2016 b->cowners[0] = 0; 2017 for (i=2; i<=b->size; i++) { 2018 b->cowners[i] += b->cowners[i-1]; 2019 } 2020 b->cstart = b->cowners[b->rank]; 2021 b->cend = b->cowners[b->rank+1]; 2022 2023 for (i=0; i<=b->size; i++) { 2024 b->rowners_bs[i] = b->rowners[i]*bs; 2025 } 2026 b->rstart_bs = b->rstart*bs; 2027 b->rend_bs = b->rend*bs; 2028 b->cstart_bs = b->cstart*bs; 2029 b->cend_bs = b->cend*bs; 2030 2031 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2032 PetscLogObjectParent(B,b->A); 2033 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2034 PetscLogObjectParent(B,b->B); 2035 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2036 2037 PetscFunctionReturn(0); 2038 } 2039 EXTERN_C_END 2040 2041 EXTERN_C_BEGIN 2042 EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2043 EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2044 EXTERN_C_END 2045 2046 EXTERN_C_BEGIN 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "MatCreate_MPIBAIJ" 2049 int MatCreate_MPIBAIJ(Mat B) 2050 { 2051 Mat_MPIBAIJ *b; 2052 int ierr; 2053 PetscTruth flg; 2054 2055 PetscFunctionBegin; 2056 2057 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2058 B->data = (void*)b; 2059 2060 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2061 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2062 B->mapping = 0; 2063 B->factor = 0; 2064 B->assembled = PETSC_FALSE; 2065 2066 B->insertmode = NOT_SET_VALUES; 2067 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2068 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2069 2070 /* build local table of row and column ownerships */ 2071 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 2072 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2073 b->cowners = b->rowners + b->size + 2; 2074 b->rowners_bs = b->cowners + b->size + 2; 2075 2076 /* build cache for off array entries formed */ 2077 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2078 b->donotstash = PETSC_FALSE; 2079 b->colmap = PETSC_NULL; 2080 b->garray = PETSC_NULL; 2081 b->roworiented = PETSC_TRUE; 2082 2083 #if defined(PETSC_USE_MAT_SINGLE) 2084 /* stuff for MatSetValues_XXX in single precision */ 2085 b->setvalueslen = 0; 2086 b->setvaluescopy = PETSC_NULL; 2087 #endif 2088 2089 /* stuff used in block assembly */ 2090 b->barray = 0; 2091 2092 /* stuff used for matrix vector multiply */ 2093 b->lvec = 0; 2094 b->Mvctx = 0; 2095 2096 /* stuff for MatGetRow() */ 2097 b->rowindices = 0; 2098 b->rowvalues = 0; 2099 b->getrowactive = PETSC_FALSE; 2100 2101 /* hash table stuff */ 2102 b->ht = 0; 2103 b->hd = 0; 2104 b->ht_size = 0; 2105 b->ht_flag = PETSC_FALSE; 2106 b->ht_fact = 0; 2107 b->ht_total_ct = 0; 2108 b->ht_insert_ct = 0; 2109 2110 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2111 if (flg) { 2112 PetscReal fact = 1.39; 2113 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2114 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2115 if (fact <= 1.0) fact = 1.39; 2116 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2117 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2118 } 2119 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2120 "MatStoreValues_MPIBAIJ", 2121 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2122 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2123 "MatRetrieveValues_MPIBAIJ", 2124 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2125 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2126 "MatGetDiagonalBlock_MPIBAIJ", 2127 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2128 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2129 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2130 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2131 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2132 "MatDiagonalScaleLocal_MPIBAIJ", 2133 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2134 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2135 "MatSetHashTableFactor_MPIBAIJ", 2136 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2137 PetscFunctionReturn(0); 2138 } 2139 EXTERN_C_END 2140 2141 #undef __FUNCT__ 2142 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2143 /*@C 2144 MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2145 (block compressed row). For good matrix assembly performance 2146 the user should preallocate the matrix storage by setting the parameters 2147 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2148 performance can be increased by more than a factor of 50. 2149 2150 Collective on Mat 2151 2152 Input Parameters: 2153 + A - the matrix 2154 . bs - size of blockk 2155 . d_nz - number of block nonzeros per block row in diagonal portion of local 2156 submatrix (same for all local rows) 2157 . d_nnz - array containing the number of block nonzeros in the various block rows 2158 of the in diagonal portion of the local (possibly different for each block 2159 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2160 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2161 submatrix (same for all local rows). 2162 - o_nnz - array containing the number of nonzeros in the various block rows of the 2163 off-diagonal portion of the local submatrix (possibly different for 2164 each block row) or PETSC_NULL. 2165 2166 Output Parameter: 2167 2168 2169 Options Database Keys: 2170 . -mat_no_unroll - uses code that does not unroll the loops in the 2171 block calculations (much slower) 2172 . -mat_block_size - size of the blocks to use 2173 2174 Notes: 2175 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2176 than it must be used on all processors that share the object for that argument. 2177 2178 Storage Information: 2179 For a square global matrix we define each processor's diagonal portion 2180 to be its local rows and the corresponding columns (a square submatrix); 2181 each processor's off-diagonal portion encompasses the remainder of the 2182 local matrix (a rectangular submatrix). 2183 2184 The user can specify preallocated storage for the diagonal part of 2185 the local submatrix with either d_nz or d_nnz (not both). Set 2186 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2187 memory allocation. Likewise, specify preallocated storage for the 2188 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2189 2190 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2191 the figure below we depict these three local rows and all columns (0-11). 2192 2193 .vb 2194 0 1 2 3 4 5 6 7 8 9 10 11 2195 ------------------- 2196 row 3 | o o o d d d o o o o o o 2197 row 4 | o o o d d d o o o o o o 2198 row 5 | o o o d d d o o o o o o 2199 ------------------- 2200 .ve 2201 2202 Thus, any entries in the d locations are stored in the d (diagonal) 2203 submatrix, and any entries in the o locations are stored in the 2204 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2205 stored simply in the MATSEQBAIJ format for compressed row storage. 2206 2207 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2208 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2209 In general, for PDE problems in which most nonzeros are near the diagonal, 2210 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2211 or you will get TERRIBLE performance; see the users' manual chapter on 2212 matrices. 2213 2214 Level: intermediate 2215 2216 .keywords: matrix, block, aij, compressed row, sparse, parallel 2217 2218 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2219 @*/ 2220 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2221 { 2222 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 2223 2224 PetscFunctionBegin; 2225 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2226 if (f) { 2227 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2228 } 2229 PetscFunctionReturn(0); 2230 } 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "MatCreateMPIBAIJ" 2234 /*@C 2235 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2236 (block compressed row). For good matrix assembly performance 2237 the user should preallocate the matrix storage by setting the parameters 2238 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2239 performance can be increased by more than a factor of 50. 2240 2241 Collective on MPI_Comm 2242 2243 Input Parameters: 2244 + comm - MPI communicator 2245 . bs - size of blockk 2246 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2247 This value should be the same as the local size used in creating the 2248 y vector for the matrix-vector product y = Ax. 2249 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2250 This value should be the same as the local size used in creating the 2251 x vector for the matrix-vector product y = Ax. 2252 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2253 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2254 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2255 submatrix (same for all local rows) 2256 . d_nnz - array containing the number of nonzero blocks in the various block rows 2257 of the in diagonal portion of the local (possibly different for each block 2258 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2259 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2260 submatrix (same for all local rows). 2261 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2262 off-diagonal portion of the local submatrix (possibly different for 2263 each block row) or PETSC_NULL. 2264 2265 Output Parameter: 2266 . A - the matrix 2267 2268 Options Database Keys: 2269 . -mat_no_unroll - uses code that does not unroll the loops in the 2270 block calculations (much slower) 2271 . -mat_block_size - size of the blocks to use 2272 2273 Notes: 2274 A nonzero block is any block that as 1 or more nonzeros in it 2275 2276 The user MUST specify either the local or global matrix dimensions 2277 (possibly both). 2278 2279 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2280 than it must be used on all processors that share the object for that argument. 2281 2282 Storage Information: 2283 For a square global matrix we define each processor's diagonal portion 2284 to be its local rows and the corresponding columns (a square submatrix); 2285 each processor's off-diagonal portion encompasses the remainder of the 2286 local matrix (a rectangular submatrix). 2287 2288 The user can specify preallocated storage for the diagonal part of 2289 the local submatrix with either d_nz or d_nnz (not both). Set 2290 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2291 memory allocation. Likewise, specify preallocated storage for the 2292 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2293 2294 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2295 the figure below we depict these three local rows and all columns (0-11). 2296 2297 .vb 2298 0 1 2 3 4 5 6 7 8 9 10 11 2299 ------------------- 2300 row 3 | o o o d d d o o o o o o 2301 row 4 | o o o d d d o o o o o o 2302 row 5 | o o o d d d o o o o o o 2303 ------------------- 2304 .ve 2305 2306 Thus, any entries in the d locations are stored in the d (diagonal) 2307 submatrix, and any entries in the o locations are stored in the 2308 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2309 stored simply in the MATSEQBAIJ format for compressed row storage. 2310 2311 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2312 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2313 In general, for PDE problems in which most nonzeros are near the diagonal, 2314 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2315 or you will get TERRIBLE performance; see the users' manual chapter on 2316 matrices. 2317 2318 Level: intermediate 2319 2320 .keywords: matrix, block, aij, compressed row, sparse, parallel 2321 2322 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2323 @*/ 2324 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2325 { 2326 int ierr,size; 2327 2328 PetscFunctionBegin; 2329 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2330 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2331 if (size > 1) { 2332 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2333 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2334 } else { 2335 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2336 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2337 } 2338 PetscFunctionReturn(0); 2339 } 2340 2341 #undef __FUNCT__ 2342 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2343 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2344 { 2345 Mat mat; 2346 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2347 int ierr,len=0; 2348 2349 PetscFunctionBegin; 2350 *newmat = 0; 2351 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2352 ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr); 2353 mat->preallocated = PETSC_TRUE; 2354 mat->assembled = PETSC_TRUE; 2355 a = (Mat_MPIBAIJ*)mat->data; 2356 a->bs = oldmat->bs; 2357 a->bs2 = oldmat->bs2; 2358 a->mbs = oldmat->mbs; 2359 a->nbs = oldmat->nbs; 2360 a->Mbs = oldmat->Mbs; 2361 a->Nbs = oldmat->Nbs; 2362 2363 a->rstart = oldmat->rstart; 2364 a->rend = oldmat->rend; 2365 a->cstart = oldmat->cstart; 2366 a->cend = oldmat->cend; 2367 a->size = oldmat->size; 2368 a->rank = oldmat->rank; 2369 a->donotstash = oldmat->donotstash; 2370 a->roworiented = oldmat->roworiented; 2371 a->rowindices = 0; 2372 a->rowvalues = 0; 2373 a->getrowactive = PETSC_FALSE; 2374 a->barray = 0; 2375 a->rstart_bs = oldmat->rstart_bs; 2376 a->rend_bs = oldmat->rend_bs; 2377 a->cstart_bs = oldmat->cstart_bs; 2378 a->cend_bs = oldmat->cend_bs; 2379 2380 /* hash table stuff */ 2381 a->ht = 0; 2382 a->hd = 0; 2383 a->ht_size = 0; 2384 a->ht_flag = oldmat->ht_flag; 2385 a->ht_fact = oldmat->ht_fact; 2386 a->ht_total_ct = 0; 2387 a->ht_insert_ct = 0; 2388 2389 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2390 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2391 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2392 if (oldmat->colmap) { 2393 #if defined (PETSC_USE_CTABLE) 2394 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2395 #else 2396 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2397 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2398 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2399 #endif 2400 } else a->colmap = 0; 2401 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2402 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2403 PetscLogObjectMemory(mat,len*sizeof(int)); 2404 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2405 } else a->garray = 0; 2406 2407 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2408 PetscLogObjectParent(mat,a->lvec); 2409 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2410 2411 PetscLogObjectParent(mat,a->Mvctx); 2412 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2413 PetscLogObjectParent(mat,a->A); 2414 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2415 PetscLogObjectParent(mat,a->B); 2416 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2417 *newmat = mat; 2418 PetscFunctionReturn(0); 2419 } 2420 2421 #include "petscsys.h" 2422 2423 EXTERN_C_BEGIN 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "MatLoad_MPIBAIJ" 2426 int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2427 { 2428 Mat A; 2429 int i,nz,ierr,j,rstart,rend,fd; 2430 PetscScalar *vals,*buf; 2431 MPI_Comm comm = ((PetscObject)viewer)->comm; 2432 MPI_Status status; 2433 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2434 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2435 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2436 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2437 int dcount,kmax,k,nzcount,tmp; 2438 #if defined(PETSC_HAVE_DSCPACK) 2439 PetscTruth flag; 2440 #endif 2441 2442 PetscFunctionBegin; 2443 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2444 2445 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2446 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2447 if (!rank) { 2448 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2449 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2450 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2451 if (header[3] < 0) { 2452 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2453 } 2454 } 2455 2456 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2457 M = header[1]; N = header[2]; 2458 2459 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2460 2461 /* 2462 This code adds extra rows to make sure the number of rows is 2463 divisible by the blocksize 2464 */ 2465 Mbs = M/bs; 2466 extra_rows = bs - M + bs*(Mbs); 2467 if (extra_rows == bs) extra_rows = 0; 2468 else Mbs++; 2469 if (extra_rows &&!rank) { 2470 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2471 } 2472 2473 /* determine ownership of all rows */ 2474 mbs = Mbs/size + ((Mbs % size) > rank); 2475 m = mbs*bs; 2476 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2477 browners = rowners + size + 1; 2478 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2479 rowners[0] = 0; 2480 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2481 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2482 rstart = rowners[rank]; 2483 rend = rowners[rank+1]; 2484 2485 /* distribute row lengths to all processors */ 2486 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2487 if (!rank) { 2488 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2489 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2490 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2491 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2492 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2493 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2494 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2495 } else { 2496 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2497 } 2498 2499 if (!rank) { 2500 /* calculate the number of nonzeros on each processor */ 2501 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2502 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2503 for (i=0; i<size; i++) { 2504 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2505 procsnz[i] += rowlengths[j]; 2506 } 2507 } 2508 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2509 2510 /* determine max buffer needed and allocate it */ 2511 maxnz = 0; 2512 for (i=0; i<size; i++) { 2513 maxnz = PetscMax(maxnz,procsnz[i]); 2514 } 2515 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2516 2517 /* read in my part of the matrix column indices */ 2518 nz = procsnz[0]; 2519 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2520 mycols = ibuf; 2521 if (size == 1) nz -= extra_rows; 2522 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2523 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2524 2525 /* read in every ones (except the last) and ship off */ 2526 for (i=1; i<size-1; i++) { 2527 nz = procsnz[i]; 2528 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2529 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2530 } 2531 /* read in the stuff for the last proc */ 2532 if (size != 1) { 2533 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2534 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2535 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2536 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2537 } 2538 ierr = PetscFree(cols);CHKERRQ(ierr); 2539 } else { 2540 /* determine buffer space needed for message */ 2541 nz = 0; 2542 for (i=0; i<m; i++) { 2543 nz += locrowlens[i]; 2544 } 2545 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2546 mycols = ibuf; 2547 /* receive message of column indices*/ 2548 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2549 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2550 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2551 } 2552 2553 /* loop over local rows, determining number of off diagonal entries */ 2554 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2555 odlens = dlens + (rend-rstart); 2556 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2557 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2558 masked1 = mask + Mbs; 2559 masked2 = masked1 + Mbs; 2560 rowcount = 0; nzcount = 0; 2561 for (i=0; i<mbs; i++) { 2562 dcount = 0; 2563 odcount = 0; 2564 for (j=0; j<bs; j++) { 2565 kmax = locrowlens[rowcount]; 2566 for (k=0; k<kmax; k++) { 2567 tmp = mycols[nzcount++]/bs; 2568 if (!mask[tmp]) { 2569 mask[tmp] = 1; 2570 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2571 else masked1[dcount++] = tmp; 2572 } 2573 } 2574 rowcount++; 2575 } 2576 2577 dlens[i] = dcount; 2578 odlens[i] = odcount; 2579 2580 /* zero out the mask elements we set */ 2581 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2582 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2583 } 2584 2585 /* create our matrix */ 2586 ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2587 A = *newmat; 2588 MatSetOption(A,MAT_COLUMNS_SORTED); 2589 2590 if (!rank) { 2591 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2592 /* read in my part of the matrix numerical values */ 2593 nz = procsnz[0]; 2594 vals = buf; 2595 mycols = ibuf; 2596 if (size == 1) nz -= extra_rows; 2597 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2598 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2599 2600 /* insert into matrix */ 2601 jj = rstart*bs; 2602 for (i=0; i<m; i++) { 2603 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2604 mycols += locrowlens[i]; 2605 vals += locrowlens[i]; 2606 jj++; 2607 } 2608 /* read in other processors (except the last one) and ship out */ 2609 for (i=1; i<size-1; i++) { 2610 nz = procsnz[i]; 2611 vals = buf; 2612 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2613 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2614 } 2615 /* the last proc */ 2616 if (size != 1){ 2617 nz = procsnz[i] - extra_rows; 2618 vals = buf; 2619 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2620 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2621 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2622 } 2623 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2624 } else { 2625 /* receive numeric values */ 2626 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2627 2628 /* receive message of values*/ 2629 vals = buf; 2630 mycols = ibuf; 2631 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2632 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2633 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2634 2635 /* insert into matrix */ 2636 jj = rstart*bs; 2637 for (i=0; i<m; i++) { 2638 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2639 mycols += locrowlens[i]; 2640 vals += locrowlens[i]; 2641 jj++; 2642 } 2643 } 2644 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2645 ierr = PetscFree(buf);CHKERRQ(ierr); 2646 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2647 ierr = PetscFree(rowners);CHKERRQ(ierr); 2648 ierr = PetscFree(dlens);CHKERRQ(ierr); 2649 ierr = PetscFree(mask);CHKERRQ(ierr); 2650 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2651 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2652 #if defined(PETSC_HAVE_DSCPACK) 2653 ierr = PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 2654 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 2655 #endif 2656 PetscFunctionReturn(0); 2657 } 2658 EXTERN_C_END 2659 2660 #undef __FUNCT__ 2661 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2662 /*@ 2663 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2664 2665 Input Parameters: 2666 . mat - the matrix 2667 . fact - factor 2668 2669 Collective on Mat 2670 2671 Level: advanced 2672 2673 Notes: 2674 This can also be set by the command line option: -mat_use_hash_table fact 2675 2676 .keywords: matrix, hashtable, factor, HT 2677 2678 .seealso: MatSetOption() 2679 @*/ 2680 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2681 { 2682 int ierr,(*f)(Mat,PetscReal); 2683 2684 PetscFunctionBegin; 2685 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2686 if (f) { 2687 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2688 } 2689 PetscFunctionReturn(0); 2690 } 2691 2692 #undef __FUNCT__ 2693 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2694 int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2695 { 2696 Mat_MPIBAIJ *baij; 2697 2698 PetscFunctionBegin; 2699 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2700 baij = (Mat_MPIBAIJ*)mat->data; 2701 baij->ht_fact = fact; 2702 PetscFunctionReturn(0); 2703 } 2704 2705 #undef __FUNCT__ 2706 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2707 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2708 { 2709 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2710 PetscFunctionBegin; 2711 *Ad = a->A; 2712 *Ao = a->B; 2713 *colmap = a->garray; 2714 PetscFunctionReturn(0); 2715 } 2716