1 /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN int DisAssemble_MPIBAIJ(Mat); 8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *); 11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode); 12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**); 14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**); 15 EXTERN int MatPrintHelp_SeqBAIJ(Mat); 16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*); 17 18 /* UGLY, ugly, ugly 19 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 20 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 21 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 22 converts the entries into single precision and then calls ..._MatScalar() to put them 23 into the single precision data structures. 24 */ 25 #if defined(PETSC_USE_MAT_SINGLE) 26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 #else 32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 33 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 35 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 37 #endif 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v) 42 { 43 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 44 int ierr,i; 45 PetscScalar *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<A->m; i++){ 58 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 71 int MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 int ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 86 int MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 int ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNCT__ 105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 106 static int CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 int nbs = B->nbs,i,bs=B->bs,ierr; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 120 PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (int*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (int*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatSetValues_MPIBAIJ" 285 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 int ierr,i,N = m*n; 289 MatScalar *vsingle; 290 291 PetscFunctionBegin; 292 if (N > b->setvalueslen) { 293 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 294 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 295 b->setvalueslen = N; 296 } 297 vsingle = b->setvaluescopy; 298 299 for (i=0; i<N; i++) { 300 vsingle[i] = v[i]; 301 } 302 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 309 { 310 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 311 int ierr,i,N = m*n*b->bs2; 312 MatScalar *vsingle; 313 314 PetscFunctionBegin; 315 if (N > b->setvalueslen) { 316 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 317 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 318 b->setvalueslen = N; 319 } 320 vsingle = b->setvaluescopy; 321 for (i=0; i<N; i++) { 322 vsingle[i] = v[i]; 323 } 324 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 331 { 332 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 333 int ierr,i,N = m*n; 334 MatScalar *vsingle; 335 336 PetscFunctionBegin; 337 if (N > b->setvalueslen) { 338 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 339 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 340 b->setvalueslen = N; 341 } 342 vsingle = b->setvaluescopy; 343 for (i=0; i<N; i++) { 344 vsingle[i] = v[i]; 345 } 346 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 353 { 354 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 355 int ierr,i,N = m*n*b->bs2; 356 MatScalar *vsingle; 357 358 PetscFunctionBegin; 359 if (N > b->setvalueslen) { 360 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 361 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 362 b->setvalueslen = N; 363 } 364 vsingle = b->setvaluescopy; 365 for (i=0; i<N; i++) { 366 vsingle[i] = v[i]; 367 } 368 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 #endif 372 373 #undef __FUNCT__ 374 #define __FUNCT__ "MatSetValues_MPIBAIJ" 375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 376 { 377 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 378 MatScalar value; 379 PetscTruth roworiented = baij->roworiented; 380 int ierr,i,j,row,col; 381 int rstart_orig=baij->rstart_bs; 382 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 383 int cend_orig=baij->cend_bs,bs=baij->bs; 384 385 /* Some Variables required in the macro */ 386 Mat A = baij->A; 387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 388 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 389 MatScalar *aa=a->a; 390 391 Mat B = baij->B; 392 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 393 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 394 MatScalar *ba=b->a; 395 396 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 397 int low,high,t,ridx,cidx,bs2=a->bs2; 398 MatScalar *ap,*bap; 399 400 PetscFunctionBegin; 401 for (i=0; i<m; i++) { 402 if (im[i] < 0) continue; 403 #if defined(PETSC_USE_BOPT_g) 404 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 405 #endif 406 if (im[i] >= rstart_orig && im[i] < rend_orig) { 407 row = im[i] - rstart_orig; 408 for (j=0; j<n; j++) { 409 if (in[j] >= cstart_orig && in[j] < cend_orig){ 410 col = in[j] - cstart_orig; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 413 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 414 } else if (in[j] < 0) continue; 415 #if defined(PETSC_USE_BOPT_g) 416 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 417 #endif 418 else { 419 if (mat->was_assembled) { 420 if (!baij->colmap) { 421 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 422 } 423 #if defined (PETSC_USE_CTABLE) 424 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 425 col = col - 1; 426 #else 427 col = baij->colmap[in[j]/bs] - 1; 428 #endif 429 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 430 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 431 col = in[j]; 432 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 433 B = baij->B; 434 b = (Mat_SeqBAIJ*)(B)->data; 435 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 436 ba=b->a; 437 } else col += in[j]%bs; 438 } else col = in[j]; 439 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 440 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 441 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 442 } 443 } 444 } else { 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 460 { 461 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 462 MatScalar *value,*barray=baij->barray; 463 PetscTruth roworiented = baij->roworiented; 464 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 465 int rend=baij->rend,cstart=baij->cstart,stepval; 466 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 467 468 PetscFunctionBegin; 469 if(!barray) { 470 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 471 baij->barray = barray; 472 } 473 474 if (roworiented) { 475 stepval = (n-1)*bs; 476 } else { 477 stepval = (m-1)*bs; 478 } 479 for (i=0; i<m; i++) { 480 if (im[i] < 0) continue; 481 #if defined(PETSC_USE_BOPT_g) 482 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs); 483 #endif 484 if (im[i] >= rstart && im[i] < rend) { 485 row = im[i] - rstart; 486 for (j=0; j<n; j++) { 487 /* If NumCol = 1 then a copy is not required */ 488 if ((roworiented) && (n == 1)) { 489 barray = v + i*bs2; 490 } else if((!roworiented) && (m == 1)) { 491 barray = v + j*bs2; 492 } else { /* Here a copy is required */ 493 if (roworiented) { 494 value = v + i*(stepval+bs)*bs + j*bs; 495 } else { 496 value = v + j*(stepval+bs)*bs + i*bs; 497 } 498 for (ii=0; ii<bs; ii++,value+=stepval) { 499 for (jj=0; jj<bs; jj++) { 500 *barray++ = *value++; 501 } 502 } 503 barray -=bs2; 504 } 505 506 if (in[j] >= cstart && in[j] < cend){ 507 col = in[j] - cstart; 508 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 509 } 510 else if (in[j] < 0) continue; 511 #if defined(PETSC_USE_BOPT_g) 512 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);} 513 #endif 514 else { 515 if (mat->was_assembled) { 516 if (!baij->colmap) { 517 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 518 } 519 520 #if defined(PETSC_USE_BOPT_g) 521 #if defined (PETSC_USE_CTABLE) 522 { int data; 523 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 524 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 525 } 526 #else 527 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 528 #endif 529 #endif 530 #if defined (PETSC_USE_CTABLE) 531 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 532 col = (col - 1)/bs; 533 #else 534 col = (baij->colmap[in[j]] - 1)/bs; 535 #endif 536 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 537 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 538 col = in[j]; 539 } 540 } 541 else col = in[j]; 542 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 543 } 544 } 545 } else { 546 if (!baij->donotstash) { 547 if (roworiented) { 548 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 PetscFunctionReturn(0); 556 } 557 558 #define HASH_KEY 0.6180339887 559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 564 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 565 { 566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567 PetscTruth roworiented = baij->roworiented; 568 int ierr,i,j,row,col; 569 int rstart_orig=baij->rstart_bs; 570 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 571 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 572 PetscReal tmp; 573 MatScalar **HD = baij->hd,value; 574 #if defined(PETSC_USE_BOPT_g) 575 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 576 #endif 577 578 PetscFunctionBegin; 579 580 for (i=0; i<m; i++) { 581 #if defined(PETSC_USE_BOPT_g) 582 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 583 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 584 #endif 585 row = im[i]; 586 if (row >= rstart_orig && row < rend_orig) { 587 for (j=0; j<n; j++) { 588 col = in[j]; 589 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 590 /* Look up into the Hash Table */ 591 key = (row/bs)*Nbs+(col/bs)+1; 592 h1 = HASH(size,key,tmp); 593 594 595 idx = h1; 596 #if defined(PETSC_USE_BOPT_g) 597 insert_ct++; 598 total_ct++; 599 if (HT[idx] != key) { 600 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 601 if (idx == size) { 602 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 603 if (idx == h1) { 604 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 605 } 606 } 607 } 608 #else 609 if (HT[idx] != key) { 610 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 611 if (idx == size) { 612 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 613 if (idx == h1) { 614 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 615 } 616 } 617 } 618 #endif 619 /* A HASH table entry is found, so insert the values at the correct address */ 620 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 621 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 622 } 623 } else { 624 if (!baij->donotstash) { 625 if (roworiented) { 626 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 627 } else { 628 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 629 } 630 } 631 } 632 } 633 #if defined(PETSC_USE_BOPT_g) 634 baij->ht_total_ct = total_ct; 635 baij->ht_insert_ct = insert_ct; 636 #endif 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 642 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 643 { 644 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 645 PetscTruth roworiented = baij->roworiented; 646 int ierr,i,j,ii,jj,row,col; 647 int rstart=baij->rstart ; 648 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 649 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 650 PetscReal tmp; 651 MatScalar **HD = baij->hd,*baij_a; 652 MatScalar *v_t,*value; 653 #if defined(PETSC_USE_BOPT_g) 654 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 655 #endif 656 657 PetscFunctionBegin; 658 659 if (roworiented) { 660 stepval = (n-1)*bs; 661 } else { 662 stepval = (m-1)*bs; 663 } 664 for (i=0; i<m; i++) { 665 #if defined(PETSC_USE_BOPT_g) 666 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 667 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 668 #endif 669 row = im[i]; 670 v_t = v + i*bs2; 671 if (row >= rstart && row < rend) { 672 for (j=0; j<n; j++) { 673 col = in[j]; 674 675 /* Look up into the Hash Table */ 676 key = row*Nbs+col+1; 677 h1 = HASH(size,key,tmp); 678 679 idx = h1; 680 #if defined(PETSC_USE_BOPT_g) 681 total_ct++; 682 insert_ct++; 683 if (HT[idx] != key) { 684 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 685 if (idx == size) { 686 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 687 if (idx == h1) { 688 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 689 } 690 } 691 } 692 #else 693 if (HT[idx] != key) { 694 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 695 if (idx == size) { 696 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 697 if (idx == h1) { 698 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 699 } 700 } 701 } 702 #endif 703 baij_a = HD[idx]; 704 if (roworiented) { 705 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 706 /* value = v + (i*(stepval+bs)+j)*bs; */ 707 value = v_t; 708 v_t += bs; 709 if (addv == ADD_VALUES) { 710 for (ii=0; ii<bs; ii++,value+=stepval) { 711 for (jj=ii; jj<bs2; jj+=bs) { 712 baij_a[jj] += *value++; 713 } 714 } 715 } else { 716 for (ii=0; ii<bs; ii++,value+=stepval) { 717 for (jj=ii; jj<bs2; jj+=bs) { 718 baij_a[jj] = *value++; 719 } 720 } 721 } 722 } else { 723 value = v + j*(stepval+bs)*bs + i*bs; 724 if (addv == ADD_VALUES) { 725 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 726 for (jj=0; jj<bs; jj++) { 727 baij_a[jj] += *value++; 728 } 729 } 730 } else { 731 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 732 for (jj=0; jj<bs; jj++) { 733 baij_a[jj] = *value++; 734 } 735 } 736 } 737 } 738 } 739 } else { 740 if (!baij->donotstash) { 741 if (roworiented) { 742 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 743 } else { 744 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 745 } 746 } 747 } 748 } 749 #if defined(PETSC_USE_BOPT_g) 750 baij->ht_total_ct = total_ct; 751 baij->ht_insert_ct = insert_ct; 752 #endif 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatGetValues_MPIBAIJ" 758 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 759 { 760 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 761 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 762 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 763 764 PetscFunctionBegin; 765 for (i=0; i<m; i++) { 766 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 767 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 768 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 769 row = idxm[i] - bsrstart; 770 for (j=0; j<n; j++) { 771 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 772 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 773 if (idxn[j] >= bscstart && idxn[j] < bscend){ 774 col = idxn[j] - bscstart; 775 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 776 } else { 777 if (!baij->colmap) { 778 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 779 } 780 #if defined (PETSC_USE_CTABLE) 781 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 782 data --; 783 #else 784 data = baij->colmap[idxn[j]/bs]-1; 785 #endif 786 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 787 else { 788 col = data + idxn[j]%bs; 789 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 790 } 791 } 792 } 793 } else { 794 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 } 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatNorm_MPIBAIJ" 802 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 803 { 804 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 805 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 806 int ierr,i,bs2=baij->bs2; 807 PetscReal sum = 0.0; 808 MatScalar *v; 809 810 PetscFunctionBegin; 811 if (baij->size == 1) { 812 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 813 } else { 814 if (type == NORM_FROBENIUS) { 815 v = amat->a; 816 for (i=0; i<amat->nz*bs2; i++) { 817 #if defined(PETSC_USE_COMPLEX) 818 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 819 #else 820 sum += (*v)*(*v); v++; 821 #endif 822 } 823 v = bmat->a; 824 for (i=0; i<bmat->nz*bs2; i++) { 825 #if defined(PETSC_USE_COMPLEX) 826 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 827 #else 828 sum += (*v)*(*v); v++; 829 #endif 830 } 831 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 832 *norm = sqrt(*norm); 833 } else { 834 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 835 } 836 } 837 PetscFunctionReturn(0); 838 } 839 840 841 /* 842 Creates the hash table, and sets the table 843 This table is created only once. 844 If new entried need to be added to the matrix 845 then the hash table has to be destroyed and 846 recreated. 847 */ 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 850 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 851 { 852 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853 Mat A = baij->A,B=baij->B; 854 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 855 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 856 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 857 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 858 int *HT,key; 859 MatScalar **HD; 860 PetscReal tmp; 861 #if defined(PETSC_USE_BOPT_g) 862 int ct=0,max=0; 863 #endif 864 865 PetscFunctionBegin; 866 baij->ht_size=(int)(factor*nz); 867 size = baij->ht_size; 868 869 if (baij->ht) { 870 PetscFunctionReturn(0); 871 } 872 873 /* Allocate Memory for Hash Table */ 874 ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 875 baij->ht = (int*)(baij->hd + size); 876 HD = baij->hd; 877 HT = baij->ht; 878 879 880 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr); 881 882 883 /* Loop Over A */ 884 for (i=0; i<a->mbs; i++) { 885 for (j=ai[i]; j<ai[i+1]; j++) { 886 row = i+rstart; 887 col = aj[j]+cstart; 888 889 key = row*Nbs + col + 1; 890 h1 = HASH(size,key,tmp); 891 for (k=0; k<size; k++){ 892 if (HT[(h1+k)%size] == 0.0) { 893 HT[(h1+k)%size] = key; 894 HD[(h1+k)%size] = a->a + j*bs2; 895 break; 896 #if defined(PETSC_USE_BOPT_g) 897 } else { 898 ct++; 899 #endif 900 } 901 } 902 #if defined(PETSC_USE_BOPT_g) 903 if (k> max) max = k; 904 #endif 905 } 906 } 907 /* Loop Over B */ 908 for (i=0; i<b->mbs; i++) { 909 for (j=bi[i]; j<bi[i+1]; j++) { 910 row = i+rstart; 911 col = garray[bj[j]]; 912 key = row*Nbs + col + 1; 913 h1 = HASH(size,key,tmp); 914 for (k=0; k<size; k++){ 915 if (HT[(h1+k)%size] == 0.0) { 916 HT[(h1+k)%size] = key; 917 HD[(h1+k)%size] = b->a + j*bs2; 918 break; 919 #if defined(PETSC_USE_BOPT_g) 920 } else { 921 ct++; 922 #endif 923 } 924 } 925 #if defined(PETSC_USE_BOPT_g) 926 if (k> max) max = k; 927 #endif 928 } 929 } 930 931 /* Print Summary */ 932 #if defined(PETSC_USE_BOPT_g) 933 for (i=0,j=0; i<size; i++) { 934 if (HT[i]) {j++;} 935 } 936 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 937 #endif 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 943 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 944 { 945 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 946 int ierr,nstash,reallocs; 947 InsertMode addv; 948 949 PetscFunctionBegin; 950 if (baij->donotstash) { 951 PetscFunctionReturn(0); 952 } 953 954 /* make sure all processors are either in INSERTMODE or ADDMODE */ 955 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 956 if (addv == (ADD_VALUES|INSERT_VALUES)) { 957 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 958 } 959 mat->insertmode = addv; /* in case this processor had no cache */ 960 961 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 962 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 963 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 964 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 965 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 966 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 967 PetscFunctionReturn(0); 968 } 969 970 #undef __FUNCT__ 971 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 972 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 973 { 974 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 975 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 976 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 977 int *row,*col,other_disassembled; 978 PetscTruth r1,r2,r3; 979 MatScalar *val; 980 InsertMode addv = mat->insertmode; 981 982 PetscFunctionBegin; 983 if (!baij->donotstash) { 984 while (1) { 985 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 986 if (!flg) break; 987 988 for (i=0; i<n;) { 989 /* Now identify the consecutive vals belonging to the same row */ 990 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 991 if (j < n) ncols = j-i; 992 else ncols = n-i; 993 /* Now assemble all these values with a single function call */ 994 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 995 i = j; 996 } 997 } 998 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 999 /* Now process the block-stash. Since the values are stashed column-oriented, 1000 set the roworiented flag to column oriented, and after MatSetValues() 1001 restore the original flags */ 1002 r1 = baij->roworiented; 1003 r2 = a->roworiented; 1004 r3 = b->roworiented; 1005 baij->roworiented = PETSC_FALSE; 1006 a->roworiented = PETSC_FALSE; 1007 b->roworiented = PETSC_FALSE; 1008 while (1) { 1009 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1010 if (!flg) break; 1011 1012 for (i=0; i<n;) { 1013 /* Now identify the consecutive vals belonging to the same row */ 1014 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1015 if (j < n) ncols = j-i; 1016 else ncols = n-i; 1017 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1018 i = j; 1019 } 1020 } 1021 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1022 baij->roworiented = r1; 1023 a->roworiented = r2; 1024 b->roworiented = r3; 1025 } 1026 1027 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1028 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1029 1030 /* determine if any processor has disassembled, if so we must 1031 also disassemble ourselfs, in order that we may reassemble. */ 1032 /* 1033 if nonzero structure of submatrix B cannot change then we know that 1034 no processor disassembled thus we can skip this stuff 1035 */ 1036 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1037 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1038 if (mat->was_assembled && !other_disassembled) { 1039 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1040 } 1041 } 1042 1043 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1044 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1045 } 1046 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1047 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1048 1049 #if defined(PETSC_USE_BOPT_g) 1050 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1051 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1052 baij->ht_total_ct = 0; 1053 baij->ht_insert_ct = 0; 1054 } 1055 #endif 1056 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1057 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1058 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1059 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1060 } 1061 1062 if (baij->rowvalues) { 1063 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1064 baij->rowvalues = 0; 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1071 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1072 { 1073 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1074 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 1075 PetscTruth isascii,isdraw; 1076 PetscViewer sviewer; 1077 PetscViewerFormat format; 1078 1079 PetscFunctionBegin; 1080 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1081 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1082 if (isascii) { 1083 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1084 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 1085 MatInfo info; 1086 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1087 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1089 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1090 baij->bs,(int)info.memory);CHKERRQ(ierr); 1091 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1092 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1093 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1094 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1095 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1096 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1099 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 } 1103 1104 if (isdraw) { 1105 PetscDraw draw; 1106 PetscTruth isnull; 1107 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1108 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1109 } 1110 1111 if (size == 1) { 1112 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1113 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1114 } else { 1115 /* assemble the entire matrix onto first processor. */ 1116 Mat A; 1117 Mat_SeqBAIJ *Aloc; 1118 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1119 MatScalar *a; 1120 1121 if (!rank) { 1122 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1123 } else { 1124 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1125 } 1126 PetscLogObjectParent(mat,A); 1127 1128 /* copy over the A part */ 1129 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1130 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1131 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1132 1133 for (i=0; i<mbs; i++) { 1134 rvals[0] = bs*(baij->rstart + i); 1135 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1136 for (j=ai[i]; j<ai[i+1]; j++) { 1137 col = (baij->cstart+aj[j])*bs; 1138 for (k=0; k<bs; k++) { 1139 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1140 col++; a += bs; 1141 } 1142 } 1143 } 1144 /* copy over the B part */ 1145 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1146 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1147 for (i=0; i<mbs; i++) { 1148 rvals[0] = bs*(baij->rstart + i); 1149 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1150 for (j=ai[i]; j<ai[i+1]; j++) { 1151 col = baij->garray[aj[j]]*bs; 1152 for (k=0; k<bs; k++) { 1153 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1154 col++; a += bs; 1155 } 1156 } 1157 } 1158 ierr = PetscFree(rvals);CHKERRQ(ierr); 1159 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1160 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1161 /* 1162 Everyone has to call to draw the matrix since the graphics waits are 1163 synchronized across all processors that share the PetscDraw object 1164 */ 1165 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1166 if (!rank) { 1167 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1168 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1169 } 1170 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1171 ierr = MatDestroy(A);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatView_MPIBAIJ" 1178 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1179 { 1180 int ierr; 1181 PetscTruth isascii,isdraw,issocket,isbinary; 1182 1183 PetscFunctionBegin; 1184 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1185 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1186 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1187 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1188 if (isascii || isdraw || issocket || isbinary) { 1189 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1190 } else { 1191 SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1192 } 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1198 int MatDestroy_MPIBAIJ(Mat mat) 1199 { 1200 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1201 int ierr; 1202 1203 PetscFunctionBegin; 1204 #if defined(PETSC_USE_LOG) 1205 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 1206 #endif 1207 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1208 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1209 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1210 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1211 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1212 #if defined (PETSC_USE_CTABLE) 1213 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1214 #else 1215 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1216 #endif 1217 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1218 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1219 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1220 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1221 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1222 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1223 #if defined(PETSC_USE_MAT_SINGLE) 1224 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1225 #endif 1226 ierr = PetscFree(baij);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatMult_MPIBAIJ" 1232 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1233 { 1234 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1235 int ierr,nt; 1236 1237 PetscFunctionBegin; 1238 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1239 if (nt != A->n) { 1240 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1241 } 1242 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1243 if (nt != A->m) { 1244 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1245 } 1246 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1247 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1248 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1249 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1250 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1256 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1257 { 1258 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1259 int ierr; 1260 1261 PetscFunctionBegin; 1262 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1263 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1264 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1265 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1271 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1272 { 1273 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1274 int ierr; 1275 1276 PetscFunctionBegin; 1277 /* do nondiagonal part */ 1278 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1279 /* send it on its way */ 1280 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1281 /* do local part */ 1282 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1283 /* receive remote parts: note this assumes the values are not actually */ 1284 /* inserted in yy until the next line, which is true for my implementation*/ 1285 /* but is not perhaps always true. */ 1286 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1292 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1293 { 1294 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1295 int ierr; 1296 1297 PetscFunctionBegin; 1298 /* do nondiagonal part */ 1299 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1300 /* send it on its way */ 1301 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1302 /* do local part */ 1303 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1304 /* receive remote parts: note this assumes the values are not actually */ 1305 /* inserted in yy until the next line, which is true for my implementation*/ 1306 /* but is not perhaps always true. */ 1307 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 /* 1312 This only works correctly for square matrices where the subblock A->A is the 1313 diagonal block 1314 */ 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1317 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1318 { 1319 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1320 int ierr; 1321 1322 PetscFunctionBegin; 1323 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1324 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatScale_MPIBAIJ" 1330 int MatScale_MPIBAIJ(PetscScalar *aa,Mat A) 1331 { 1332 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1333 int ierr; 1334 1335 PetscFunctionBegin; 1336 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1337 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1343 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1344 { 1345 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1346 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1347 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1348 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1349 int *cmap,*idx_p,cstart = mat->cstart; 1350 1351 PetscFunctionBegin; 1352 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1353 mat->getrowactive = PETSC_TRUE; 1354 1355 if (!mat->rowvalues && (idx || v)) { 1356 /* 1357 allocate enough space to hold information from the longest row. 1358 */ 1359 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1360 int max = 1,mbs = mat->mbs,tmp; 1361 for (i=0; i<mbs; i++) { 1362 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1363 if (max < tmp) { max = tmp; } 1364 } 1365 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1366 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1367 } 1368 1369 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1370 lrow = row - brstart; 1371 1372 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1373 if (!v) {pvA = 0; pvB = 0;} 1374 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1375 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1376 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1377 nztot = nzA + nzB; 1378 1379 cmap = mat->garray; 1380 if (v || idx) { 1381 if (nztot) { 1382 /* Sort by increasing column numbers, assuming A and B already sorted */ 1383 int imark = -1; 1384 if (v) { 1385 *v = v_p = mat->rowvalues; 1386 for (i=0; i<nzB; i++) { 1387 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1388 else break; 1389 } 1390 imark = i; 1391 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1392 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1393 } 1394 if (idx) { 1395 *idx = idx_p = mat->rowindices; 1396 if (imark > -1) { 1397 for (i=0; i<imark; i++) { 1398 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1399 } 1400 } else { 1401 for (i=0; i<nzB; i++) { 1402 if (cmap[cworkB[i]/bs] < cstart) 1403 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1404 else break; 1405 } 1406 imark = i; 1407 } 1408 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1409 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1410 } 1411 } else { 1412 if (idx) *idx = 0; 1413 if (v) *v = 0; 1414 } 1415 } 1416 *nz = nztot; 1417 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1418 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1424 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1425 { 1426 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1427 1428 PetscFunctionBegin; 1429 if (baij->getrowactive == PETSC_FALSE) { 1430 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1431 } 1432 baij->getrowactive = PETSC_FALSE; 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1438 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1439 { 1440 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1441 1442 PetscFunctionBegin; 1443 *bs = baij->bs; 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1449 int MatZeroEntries_MPIBAIJ(Mat A) 1450 { 1451 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1452 int ierr; 1453 1454 PetscFunctionBegin; 1455 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1456 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1457 PetscFunctionReturn(0); 1458 } 1459 1460 #undef __FUNCT__ 1461 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1462 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1463 { 1464 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1465 Mat A = a->A,B = a->B; 1466 int ierr; 1467 PetscReal isend[5],irecv[5]; 1468 1469 PetscFunctionBegin; 1470 info->block_size = (PetscReal)a->bs; 1471 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1472 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1473 isend[3] = info->memory; isend[4] = info->mallocs; 1474 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1475 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1476 isend[3] += info->memory; isend[4] += info->mallocs; 1477 if (flag == MAT_LOCAL) { 1478 info->nz_used = isend[0]; 1479 info->nz_allocated = isend[1]; 1480 info->nz_unneeded = isend[2]; 1481 info->memory = isend[3]; 1482 info->mallocs = isend[4]; 1483 } else if (flag == MAT_GLOBAL_MAX) { 1484 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1485 info->nz_used = irecv[0]; 1486 info->nz_allocated = irecv[1]; 1487 info->nz_unneeded = irecv[2]; 1488 info->memory = irecv[3]; 1489 info->mallocs = irecv[4]; 1490 } else if (flag == MAT_GLOBAL_SUM) { 1491 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1492 info->nz_used = irecv[0]; 1493 info->nz_allocated = irecv[1]; 1494 info->nz_unneeded = irecv[2]; 1495 info->memory = irecv[3]; 1496 info->mallocs = irecv[4]; 1497 } else { 1498 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1499 } 1500 info->rows_global = (PetscReal)A->M; 1501 info->columns_global = (PetscReal)A->N; 1502 info->rows_local = (PetscReal)A->m; 1503 info->columns_local = (PetscReal)A->N; 1504 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1505 info->fill_ratio_needed = 0; 1506 info->factor_mallocs = 0; 1507 PetscFunctionReturn(0); 1508 } 1509 1510 #undef __FUNCT__ 1511 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1512 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1513 { 1514 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1515 int ierr; 1516 1517 PetscFunctionBegin; 1518 switch (op) { 1519 case MAT_NO_NEW_NONZERO_LOCATIONS: 1520 case MAT_YES_NEW_NONZERO_LOCATIONS: 1521 case MAT_COLUMNS_UNSORTED: 1522 case MAT_COLUMNS_SORTED: 1523 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1524 case MAT_KEEP_ZEROED_ROWS: 1525 case MAT_NEW_NONZERO_LOCATION_ERR: 1526 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1527 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1528 break; 1529 case MAT_ROW_ORIENTED: 1530 a->roworiented = PETSC_TRUE; 1531 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1532 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1533 break; 1534 case MAT_ROWS_SORTED: 1535 case MAT_ROWS_UNSORTED: 1536 case MAT_YES_NEW_DIAGONALS: 1537 case MAT_USE_SINGLE_PRECISION_SOLVES: 1538 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1539 break; 1540 case MAT_COLUMN_ORIENTED: 1541 a->roworiented = PETSC_FALSE; 1542 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1543 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1544 break; 1545 case MAT_IGNORE_OFF_PROC_ENTRIES: 1546 a->donotstash = PETSC_TRUE; 1547 break; 1548 case MAT_NO_NEW_DIAGONALS: 1549 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1550 break; 1551 case MAT_USE_HASH_TABLE: 1552 a->ht_flag = PETSC_TRUE; 1553 break; 1554 default: 1555 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1556 break; 1557 } 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1563 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1564 { 1565 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1566 Mat_SeqBAIJ *Aloc; 1567 Mat B; 1568 int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1569 int bs=baij->bs,mbs=baij->mbs; 1570 MatScalar *a; 1571 1572 PetscFunctionBegin; 1573 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1574 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1575 1576 /* copy over the A part */ 1577 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1578 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1579 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1580 1581 for (i=0; i<mbs; i++) { 1582 rvals[0] = bs*(baij->rstart + i); 1583 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1584 for (j=ai[i]; j<ai[i+1]; j++) { 1585 col = (baij->cstart+aj[j])*bs; 1586 for (k=0; k<bs; k++) { 1587 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1588 col++; a += bs; 1589 } 1590 } 1591 } 1592 /* copy over the B part */ 1593 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1594 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1595 for (i=0; i<mbs; i++) { 1596 rvals[0] = bs*(baij->rstart + i); 1597 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1598 for (j=ai[i]; j<ai[i+1]; j++) { 1599 col = baij->garray[aj[j]]*bs; 1600 for (k=0; k<bs; k++) { 1601 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1602 col++; a += bs; 1603 } 1604 } 1605 } 1606 ierr = PetscFree(rvals);CHKERRQ(ierr); 1607 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1608 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1609 1610 if (matout) { 1611 *matout = B; 1612 } else { 1613 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1614 } 1615 PetscFunctionReturn(0); 1616 } 1617 1618 #undef __FUNCT__ 1619 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1620 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1621 { 1622 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1623 Mat a = baij->A,b = baij->B; 1624 int ierr,s1,s2,s3; 1625 1626 PetscFunctionBegin; 1627 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1628 if (rr) { 1629 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1630 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1631 /* Overlap communication with computation. */ 1632 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1633 } 1634 if (ll) { 1635 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1636 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1637 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1638 } 1639 /* scale the diagonal block */ 1640 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1641 1642 if (rr) { 1643 /* Do a scatter end and then right scale the off-diagonal block */ 1644 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1645 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1646 } 1647 1648 PetscFunctionReturn(0); 1649 } 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1653 int MatZeroRows_MPIBAIJ(Mat A,IS is,PetscScalar *diag) 1654 { 1655 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1656 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1657 int *procs,*nprocs,j,idx,nsends,*work,row; 1658 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1659 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1660 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1661 MPI_Comm comm = A->comm; 1662 MPI_Request *send_waits,*recv_waits; 1663 MPI_Status recv_status,*send_status; 1664 IS istmp; 1665 PetscTruth found; 1666 1667 PetscFunctionBegin; 1668 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1669 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1670 1671 /* first count number of contributors to each processor */ 1672 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1673 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1674 procs = nprocs + size; 1675 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1676 for (i=0; i<N; i++) { 1677 idx = rows[i]; 1678 found = PETSC_FALSE; 1679 for (j=0; j<size; j++) { 1680 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1681 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 1682 } 1683 } 1684 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1685 } 1686 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1687 1688 /* inform other processors of number of messages and max length*/ 1689 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 1690 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1691 nmax = work[rank]; 1692 nrecvs = work[size+rank]; 1693 ierr = PetscFree(work);CHKERRQ(ierr); 1694 1695 /* post receives: */ 1696 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1697 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1698 for (i=0; i<nrecvs; i++) { 1699 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1700 } 1701 1702 /* do sends: 1703 1) starts[i] gives the starting index in svalues for stuff going to 1704 the ith processor 1705 */ 1706 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1707 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1708 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1709 starts[0] = 0; 1710 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1711 for (i=0; i<N; i++) { 1712 svalues[starts[owner[i]]++] = rows[i]; 1713 } 1714 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1715 1716 starts[0] = 0; 1717 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1718 count = 0; 1719 for (i=0; i<size; i++) { 1720 if (procs[i]) { 1721 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1722 } 1723 } 1724 ierr = PetscFree(starts);CHKERRQ(ierr); 1725 1726 base = owners[rank]*bs; 1727 1728 /* wait on receives */ 1729 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1730 source = lens + nrecvs; 1731 count = nrecvs; slen = 0; 1732 while (count) { 1733 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1734 /* unpack receives into our local space */ 1735 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1736 source[imdex] = recv_status.MPI_SOURCE; 1737 lens[imdex] = n; 1738 slen += n; 1739 count--; 1740 } 1741 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1742 1743 /* move the data into the send scatter */ 1744 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1745 count = 0; 1746 for (i=0; i<nrecvs; i++) { 1747 values = rvalues + i*nmax; 1748 for (j=0; j<lens[i]; j++) { 1749 lrows[count++] = values[j] - base; 1750 } 1751 } 1752 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1753 ierr = PetscFree(lens);CHKERRQ(ierr); 1754 ierr = PetscFree(owner);CHKERRQ(ierr); 1755 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1756 1757 /* actually zap the local rows */ 1758 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1759 PetscLogObjectParent(A,istmp); 1760 1761 /* 1762 Zero the required rows. If the "diagonal block" of the matrix 1763 is square and the user wishes to set the diagonal we use seperate 1764 code so that MatSetValues() is not called for each diagonal allocating 1765 new memory, thus calling lots of mallocs and slowing things down. 1766 1767 Contributed by: Mathew Knepley 1768 */ 1769 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1770 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1771 if (diag && (l->A->M == l->A->N)) { 1772 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1773 } else if (diag) { 1774 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1775 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1776 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1777 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1778 } 1779 for (i=0; i<slen; i++) { 1780 row = lrows[i] + rstart_bs; 1781 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1782 } 1783 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1784 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1785 } else { 1786 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1787 } 1788 1789 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1790 ierr = PetscFree(lrows);CHKERRQ(ierr); 1791 1792 /* wait on sends */ 1793 if (nsends) { 1794 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1795 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1796 ierr = PetscFree(send_status);CHKERRQ(ierr); 1797 } 1798 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1799 ierr = PetscFree(svalues);CHKERRQ(ierr); 1800 1801 PetscFunctionReturn(0); 1802 } 1803 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1806 int MatPrintHelp_MPIBAIJ(Mat A) 1807 { 1808 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1809 MPI_Comm comm = A->comm; 1810 static int called = 0; 1811 int ierr; 1812 1813 PetscFunctionBegin; 1814 if (!a->rank) { 1815 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1816 } 1817 if (called) {PetscFunctionReturn(0);} else called = 1; 1818 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1819 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1825 int MatSetUnfactored_MPIBAIJ(Mat A) 1826 { 1827 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1828 int ierr; 1829 1830 PetscFunctionBegin; 1831 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1832 PetscFunctionReturn(0); 1833 } 1834 1835 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatEqual_MPIBAIJ" 1839 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1840 { 1841 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1842 Mat a,b,c,d; 1843 PetscTruth flg; 1844 int ierr; 1845 1846 PetscFunctionBegin; 1847 ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg);CHKERRQ(ierr); 1848 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1849 a = matA->A; b = matA->B; 1850 c = matB->A; d = matB->B; 1851 1852 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1853 if (flg == PETSC_TRUE) { 1854 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1855 } 1856 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1857 PetscFunctionReturn(0); 1858 } 1859 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1863 int MatSetUpPreallocation_MPIBAIJ(Mat A) 1864 { 1865 int ierr; 1866 1867 PetscFunctionBegin; 1868 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1869 PetscFunctionReturn(0); 1870 } 1871 1872 /* -------------------------------------------------------------------*/ 1873 static struct _MatOps MatOps_Values = { 1874 MatSetValues_MPIBAIJ, 1875 MatGetRow_MPIBAIJ, 1876 MatRestoreRow_MPIBAIJ, 1877 MatMult_MPIBAIJ, 1878 MatMultAdd_MPIBAIJ, 1879 MatMultTranspose_MPIBAIJ, 1880 MatMultTransposeAdd_MPIBAIJ, 1881 0, 1882 0, 1883 0, 1884 0, 1885 0, 1886 0, 1887 0, 1888 MatTranspose_MPIBAIJ, 1889 MatGetInfo_MPIBAIJ, 1890 MatEqual_MPIBAIJ, 1891 MatGetDiagonal_MPIBAIJ, 1892 MatDiagonalScale_MPIBAIJ, 1893 MatNorm_MPIBAIJ, 1894 MatAssemblyBegin_MPIBAIJ, 1895 MatAssemblyEnd_MPIBAIJ, 1896 0, 1897 MatSetOption_MPIBAIJ, 1898 MatZeroEntries_MPIBAIJ, 1899 MatZeroRows_MPIBAIJ, 1900 0, 1901 0, 1902 0, 1903 0, 1904 MatSetUpPreallocation_MPIBAIJ, 1905 0, 1906 0, 1907 0, 1908 0, 1909 MatDuplicate_MPIBAIJ, 1910 0, 1911 0, 1912 0, 1913 0, 1914 0, 1915 MatGetSubMatrices_MPIBAIJ, 1916 MatIncreaseOverlap_MPIBAIJ, 1917 MatGetValues_MPIBAIJ, 1918 0, 1919 MatPrintHelp_MPIBAIJ, 1920 MatScale_MPIBAIJ, 1921 0, 1922 0, 1923 0, 1924 MatGetBlockSize_MPIBAIJ, 1925 0, 1926 0, 1927 0, 1928 0, 1929 0, 1930 0, 1931 MatSetUnfactored_MPIBAIJ, 1932 0, 1933 MatSetValuesBlocked_MPIBAIJ, 1934 0, 1935 MatDestroy_MPIBAIJ, 1936 MatView_MPIBAIJ, 1937 MatGetPetscMaps_Petsc, 1938 0, 1939 0, 1940 0, 1941 0, 1942 0, 1943 0, 1944 MatGetRowMax_MPIBAIJ}; 1945 1946 1947 EXTERN_C_BEGIN 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 1950 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1951 { 1952 PetscFunctionBegin; 1953 *a = ((Mat_MPIBAIJ *)A->data)->A; 1954 *iscopy = PETSC_FALSE; 1955 PetscFunctionReturn(0); 1956 } 1957 EXTERN_C_END 1958 1959 EXTERN_C_BEGIN 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatCreate_MPIBAIJ" 1962 int MatCreate_MPIBAIJ(Mat B) 1963 { 1964 Mat_MPIBAIJ *b; 1965 int ierr; 1966 PetscTruth flg; 1967 1968 PetscFunctionBegin; 1969 1970 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 1971 B->data = (void*)b; 1972 1973 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 1974 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1975 B->mapping = 0; 1976 B->factor = 0; 1977 B->assembled = PETSC_FALSE; 1978 1979 B->insertmode = NOT_SET_VALUES; 1980 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1981 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1982 1983 /* build local table of row and column ownerships */ 1984 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1985 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1986 b->cowners = b->rowners + b->size + 2; 1987 b->rowners_bs = b->cowners + b->size + 2; 1988 1989 /* build cache for off array entries formed */ 1990 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1991 b->donotstash = PETSC_FALSE; 1992 b->colmap = PETSC_NULL; 1993 b->garray = PETSC_NULL; 1994 b->roworiented = PETSC_TRUE; 1995 1996 #if defined(PETSC_USE_MAT_SINGLE) 1997 /* stuff for MatSetValues_XXX in single precision */ 1998 b->setvalueslen = 0; 1999 b->setvaluescopy = PETSC_NULL; 2000 #endif 2001 2002 /* stuff used in block assembly */ 2003 b->barray = 0; 2004 2005 /* stuff used for matrix vector multiply */ 2006 b->lvec = 0; 2007 b->Mvctx = 0; 2008 2009 /* stuff for MatGetRow() */ 2010 b->rowindices = 0; 2011 b->rowvalues = 0; 2012 b->getrowactive = PETSC_FALSE; 2013 2014 /* hash table stuff */ 2015 b->ht = 0; 2016 b->hd = 0; 2017 b->ht_size = 0; 2018 b->ht_flag = PETSC_FALSE; 2019 b->ht_fact = 0; 2020 b->ht_total_ct = 0; 2021 b->ht_insert_ct = 0; 2022 2023 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2024 if (flg) { 2025 PetscReal fact = 1.39; 2026 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2027 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2028 if (fact <= 1.0) fact = 1.39; 2029 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2030 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2031 } 2032 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2033 "MatStoreValues_MPIBAIJ", 2034 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2035 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2036 "MatRetrieveValues_MPIBAIJ", 2037 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2038 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2039 "MatGetDiagonalBlock_MPIBAIJ", 2040 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 EXTERN_C_END 2044 2045 #undef __FUNCT__ 2046 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2047 /*@C 2048 MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2049 (block compressed row). For good matrix assembly performance 2050 the user should preallocate the matrix storage by setting the parameters 2051 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2052 performance can be increased by more than a factor of 50. 2053 2054 Collective on Mat 2055 2056 Input Parameters: 2057 + A - the matrix 2058 . bs - size of blockk 2059 . d_nz - number of block nonzeros per block row in diagonal portion of local 2060 submatrix (same for all local rows) 2061 . d_nnz - array containing the number of block nonzeros in the various block rows 2062 of the in diagonal portion of the local (possibly different for each block 2063 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2064 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2065 submatrix (same for all local rows). 2066 - o_nnz - array containing the number of nonzeros in the various block rows of the 2067 off-diagonal portion of the local submatrix (possibly different for 2068 each block row) or PETSC_NULL. 2069 2070 Output Parameter: 2071 2072 2073 Options Database Keys: 2074 . -mat_no_unroll - uses code that does not unroll the loops in the 2075 block calculations (much slower) 2076 . -mat_block_size - size of the blocks to use 2077 2078 Notes: 2079 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2080 than it must be used on all processors that share the object for that argument. 2081 2082 Storage Information: 2083 For a square global matrix we define each processor's diagonal portion 2084 to be its local rows and the corresponding columns (a square submatrix); 2085 each processor's off-diagonal portion encompasses the remainder of the 2086 local matrix (a rectangular submatrix). 2087 2088 The user can specify preallocated storage for the diagonal part of 2089 the local submatrix with either d_nz or d_nnz (not both). Set 2090 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2091 memory allocation. Likewise, specify preallocated storage for the 2092 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2093 2094 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2095 the figure below we depict these three local rows and all columns (0-11). 2096 2097 .vb 2098 0 1 2 3 4 5 6 7 8 9 10 11 2099 ------------------- 2100 row 3 | o o o d d d o o o o o o 2101 row 4 | o o o d d d o o o o o o 2102 row 5 | o o o d d d o o o o o o 2103 ------------------- 2104 .ve 2105 2106 Thus, any entries in the d locations are stored in the d (diagonal) 2107 submatrix, and any entries in the o locations are stored in the 2108 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2109 stored simply in the MATSEQBAIJ format for compressed row storage. 2110 2111 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2112 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2113 In general, for PDE problems in which most nonzeros are near the diagonal, 2114 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2115 or you will get TERRIBLE performance; see the users' manual chapter on 2116 matrices. 2117 2118 Level: intermediate 2119 2120 .keywords: matrix, block, aij, compressed row, sparse, parallel 2121 2122 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2123 @*/ 2124 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2125 { 2126 Mat_MPIBAIJ *b; 2127 int ierr,i; 2128 PetscTruth flg2; 2129 2130 PetscFunctionBegin; 2131 ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 2132 if (!flg2) PetscFunctionReturn(0); 2133 2134 B->preallocated = PETSC_TRUE; 2135 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2136 2137 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2138 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2139 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2140 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2141 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2142 if (d_nnz) { 2143 for (i=0; i<B->m/bs; i++) { 2144 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2145 } 2146 } 2147 if (o_nnz) { 2148 for (i=0; i<B->m/bs; i++) { 2149 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2150 } 2151 } 2152 2153 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2154 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2155 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2156 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2157 2158 b = (Mat_MPIBAIJ*)B->data; 2159 b->bs = bs; 2160 b->bs2 = bs*bs; 2161 b->mbs = B->m/bs; 2162 b->nbs = B->n/bs; 2163 b->Mbs = B->M/bs; 2164 b->Nbs = B->N/bs; 2165 2166 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2167 b->rowners[0] = 0; 2168 for (i=2; i<=b->size; i++) { 2169 b->rowners[i] += b->rowners[i-1]; 2170 } 2171 b->rstart = b->rowners[b->rank]; 2172 b->rend = b->rowners[b->rank+1]; 2173 2174 ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2175 b->cowners[0] = 0; 2176 for (i=2; i<=b->size; i++) { 2177 b->cowners[i] += b->cowners[i-1]; 2178 } 2179 b->cstart = b->cowners[b->rank]; 2180 b->cend = b->cowners[b->rank+1]; 2181 2182 for (i=0; i<=b->size; i++) { 2183 b->rowners_bs[i] = b->rowners[i]*bs; 2184 } 2185 b->rstart_bs = b->rstart*bs; 2186 b->rend_bs = b->rend*bs; 2187 b->cstart_bs = b->cstart*bs; 2188 b->cend_bs = b->cend*bs; 2189 2190 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2191 PetscLogObjectParent(B,b->A); 2192 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2193 PetscLogObjectParent(B,b->B); 2194 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2195 2196 PetscFunctionReturn(0); 2197 } 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatCreateMPIBAIJ" 2201 /*@C 2202 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2203 (block compressed row). For good matrix assembly performance 2204 the user should preallocate the matrix storage by setting the parameters 2205 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2206 performance can be increased by more than a factor of 50. 2207 2208 Collective on MPI_Comm 2209 2210 Input Parameters: 2211 + comm - MPI communicator 2212 . bs - size of blockk 2213 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2214 This value should be the same as the local size used in creating the 2215 y vector for the matrix-vector product y = Ax. 2216 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2217 This value should be the same as the local size used in creating the 2218 x vector for the matrix-vector product y = Ax. 2219 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2220 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2221 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2222 submatrix (same for all local rows) 2223 . d_nnz - array containing the number of nonzero blocks in the various block rows 2224 of the in diagonal portion of the local (possibly different for each block 2225 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2226 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2227 submatrix (same for all local rows). 2228 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2229 off-diagonal portion of the local submatrix (possibly different for 2230 each block row) or PETSC_NULL. 2231 2232 Output Parameter: 2233 . A - the matrix 2234 2235 Options Database Keys: 2236 . -mat_no_unroll - uses code that does not unroll the loops in the 2237 block calculations (much slower) 2238 . -mat_block_size - size of the blocks to use 2239 2240 Notes: 2241 A nonzero block is any block that as 1 or more nonzeros in it 2242 2243 The user MUST specify either the local or global matrix dimensions 2244 (possibly both). 2245 2246 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2247 than it must be used on all processors that share the object for that argument. 2248 2249 Storage Information: 2250 For a square global matrix we define each processor's diagonal portion 2251 to be its local rows and the corresponding columns (a square submatrix); 2252 each processor's off-diagonal portion encompasses the remainder of the 2253 local matrix (a rectangular submatrix). 2254 2255 The user can specify preallocated storage for the diagonal part of 2256 the local submatrix with either d_nz or d_nnz (not both). Set 2257 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2258 memory allocation. Likewise, specify preallocated storage for the 2259 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2260 2261 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2262 the figure below we depict these three local rows and all columns (0-11). 2263 2264 .vb 2265 0 1 2 3 4 5 6 7 8 9 10 11 2266 ------------------- 2267 row 3 | o o o d d d o o o o o o 2268 row 4 | o o o d d d o o o o o o 2269 row 5 | o o o d d d o o o o o o 2270 ------------------- 2271 .ve 2272 2273 Thus, any entries in the d locations are stored in the d (diagonal) 2274 submatrix, and any entries in the o locations are stored in the 2275 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2276 stored simply in the MATSEQBAIJ format for compressed row storage. 2277 2278 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2279 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2280 In general, for PDE problems in which most nonzeros are near the diagonal, 2281 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2282 or you will get TERRIBLE performance; see the users' manual chapter on 2283 matrices. 2284 2285 Level: intermediate 2286 2287 .keywords: matrix, block, aij, compressed row, sparse, parallel 2288 2289 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2290 @*/ 2291 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2292 { 2293 int ierr,size; 2294 2295 PetscFunctionBegin; 2296 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2297 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2298 if (size > 1) { 2299 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2300 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2301 } else { 2302 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2303 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2304 } 2305 PetscFunctionReturn(0); 2306 } 2307 2308 #undef __FUNCT__ 2309 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2310 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2311 { 2312 Mat mat; 2313 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2314 int ierr,len=0; 2315 2316 PetscFunctionBegin; 2317 *newmat = 0; 2318 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2319 ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr); 2320 mat->preallocated = PETSC_TRUE; 2321 mat->assembled = PETSC_TRUE; 2322 a = (Mat_MPIBAIJ*)mat->data; 2323 a->bs = oldmat->bs; 2324 a->bs2 = oldmat->bs2; 2325 a->mbs = oldmat->mbs; 2326 a->nbs = oldmat->nbs; 2327 a->Mbs = oldmat->Mbs; 2328 a->Nbs = oldmat->Nbs; 2329 2330 a->rstart = oldmat->rstart; 2331 a->rend = oldmat->rend; 2332 a->cstart = oldmat->cstart; 2333 a->cend = oldmat->cend; 2334 a->size = oldmat->size; 2335 a->rank = oldmat->rank; 2336 a->donotstash = oldmat->donotstash; 2337 a->roworiented = oldmat->roworiented; 2338 a->rowindices = 0; 2339 a->rowvalues = 0; 2340 a->getrowactive = PETSC_FALSE; 2341 a->barray = 0; 2342 a->rstart_bs = oldmat->rstart_bs; 2343 a->rend_bs = oldmat->rend_bs; 2344 a->cstart_bs = oldmat->cstart_bs; 2345 a->cend_bs = oldmat->cend_bs; 2346 2347 /* hash table stuff */ 2348 a->ht = 0; 2349 a->hd = 0; 2350 a->ht_size = 0; 2351 a->ht_flag = oldmat->ht_flag; 2352 a->ht_fact = oldmat->ht_fact; 2353 a->ht_total_ct = 0; 2354 a->ht_insert_ct = 0; 2355 2356 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2357 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2358 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2359 if (oldmat->colmap) { 2360 #if defined (PETSC_USE_CTABLE) 2361 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2362 #else 2363 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2364 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2365 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2366 #endif 2367 } else a->colmap = 0; 2368 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2369 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2370 PetscLogObjectMemory(mat,len*sizeof(int)); 2371 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2372 } else a->garray = 0; 2373 2374 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2375 PetscLogObjectParent(mat,a->lvec); 2376 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2377 2378 PetscLogObjectParent(mat,a->Mvctx); 2379 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2380 PetscLogObjectParent(mat,a->A); 2381 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2382 PetscLogObjectParent(mat,a->B); 2383 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2384 *newmat = mat; 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #include "petscsys.h" 2389 2390 EXTERN_C_BEGIN 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatLoad_MPIBAIJ" 2393 int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2394 { 2395 Mat A; 2396 int i,nz,ierr,j,rstart,rend,fd; 2397 PetscScalar *vals,*buf; 2398 MPI_Comm comm = ((PetscObject)viewer)->comm; 2399 MPI_Status status; 2400 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2401 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2402 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2403 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2404 int dcount,kmax,k,nzcount,tmp; 2405 2406 PetscFunctionBegin; 2407 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2408 2409 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2410 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2411 if (!rank) { 2412 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2413 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2414 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2415 if (header[3] < 0) { 2416 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2417 } 2418 } 2419 2420 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2421 M = header[1]; N = header[2]; 2422 2423 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2424 2425 /* 2426 This code adds extra rows to make sure the number of rows is 2427 divisible by the blocksize 2428 */ 2429 Mbs = M/bs; 2430 extra_rows = bs - M + bs*(Mbs); 2431 if (extra_rows == bs) extra_rows = 0; 2432 else Mbs++; 2433 if (extra_rows &&!rank) { 2434 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2435 } 2436 2437 /* determine ownership of all rows */ 2438 mbs = Mbs/size + ((Mbs % size) > rank); 2439 m = mbs*bs; 2440 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2441 browners = rowners + size + 1; 2442 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2443 rowners[0] = 0; 2444 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2445 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2446 rstart = rowners[rank]; 2447 rend = rowners[rank+1]; 2448 2449 /* distribute row lengths to all processors */ 2450 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2451 if (!rank) { 2452 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2453 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2454 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2455 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2456 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2457 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2458 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2459 } else { 2460 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2461 } 2462 2463 if (!rank) { 2464 /* calculate the number of nonzeros on each processor */ 2465 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2466 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2467 for (i=0; i<size; i++) { 2468 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2469 procsnz[i] += rowlengths[j]; 2470 } 2471 } 2472 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2473 2474 /* determine max buffer needed and allocate it */ 2475 maxnz = 0; 2476 for (i=0; i<size; i++) { 2477 maxnz = PetscMax(maxnz,procsnz[i]); 2478 } 2479 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2480 2481 /* read in my part of the matrix column indices */ 2482 nz = procsnz[0]; 2483 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2484 mycols = ibuf; 2485 if (size == 1) nz -= extra_rows; 2486 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2487 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2488 2489 /* read in every ones (except the last) and ship off */ 2490 for (i=1; i<size-1; i++) { 2491 nz = procsnz[i]; 2492 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2493 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2494 } 2495 /* read in the stuff for the last proc */ 2496 if (size != 1) { 2497 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2498 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2499 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2500 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2501 } 2502 ierr = PetscFree(cols);CHKERRQ(ierr); 2503 } else { 2504 /* determine buffer space needed for message */ 2505 nz = 0; 2506 for (i=0; i<m; i++) { 2507 nz += locrowlens[i]; 2508 } 2509 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2510 mycols = ibuf; 2511 /* receive message of column indices*/ 2512 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2513 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2514 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2515 } 2516 2517 /* loop over local rows, determining number of off diagonal entries */ 2518 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2519 odlens = dlens + (rend-rstart); 2520 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2521 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2522 masked1 = mask + Mbs; 2523 masked2 = masked1 + Mbs; 2524 rowcount = 0; nzcount = 0; 2525 for (i=0; i<mbs; i++) { 2526 dcount = 0; 2527 odcount = 0; 2528 for (j=0; j<bs; j++) { 2529 kmax = locrowlens[rowcount]; 2530 for (k=0; k<kmax; k++) { 2531 tmp = mycols[nzcount++]/bs; 2532 if (!mask[tmp]) { 2533 mask[tmp] = 1; 2534 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2535 else masked1[dcount++] = tmp; 2536 } 2537 } 2538 rowcount++; 2539 } 2540 2541 dlens[i] = dcount; 2542 odlens[i] = odcount; 2543 2544 /* zero out the mask elements we set */ 2545 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2546 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2547 } 2548 2549 /* create our matrix */ 2550 ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2551 A = *newmat; 2552 MatSetOption(A,MAT_COLUMNS_SORTED); 2553 2554 if (!rank) { 2555 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2556 /* read in my part of the matrix numerical values */ 2557 nz = procsnz[0]; 2558 vals = buf; 2559 mycols = ibuf; 2560 if (size == 1) nz -= extra_rows; 2561 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2562 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2563 2564 /* insert into matrix */ 2565 jj = rstart*bs; 2566 for (i=0; i<m; i++) { 2567 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2568 mycols += locrowlens[i]; 2569 vals += locrowlens[i]; 2570 jj++; 2571 } 2572 /* read in other processors (except the last one) and ship out */ 2573 for (i=1; i<size-1; i++) { 2574 nz = procsnz[i]; 2575 vals = buf; 2576 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2577 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2578 } 2579 /* the last proc */ 2580 if (size != 1){ 2581 nz = procsnz[i] - extra_rows; 2582 vals = buf; 2583 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2584 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2585 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2586 } 2587 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2588 } else { 2589 /* receive numeric values */ 2590 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2591 2592 /* receive message of values*/ 2593 vals = buf; 2594 mycols = ibuf; 2595 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2596 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2597 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2598 2599 /* insert into matrix */ 2600 jj = rstart*bs; 2601 for (i=0; i<m; i++) { 2602 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2603 mycols += locrowlens[i]; 2604 vals += locrowlens[i]; 2605 jj++; 2606 } 2607 } 2608 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2609 ierr = PetscFree(buf);CHKERRQ(ierr); 2610 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2611 ierr = PetscFree(rowners);CHKERRQ(ierr); 2612 ierr = PetscFree(dlens);CHKERRQ(ierr); 2613 ierr = PetscFree(mask);CHKERRQ(ierr); 2614 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2615 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2616 PetscFunctionReturn(0); 2617 } 2618 EXTERN_C_END 2619 2620 #undef __FUNCT__ 2621 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2622 /*@ 2623 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2624 2625 Input Parameters: 2626 . mat - the matrix 2627 . fact - factor 2628 2629 Collective on Mat 2630 2631 Level: advanced 2632 2633 Notes: 2634 This can also be set by the command line option: -mat_use_hash_table fact 2635 2636 .keywords: matrix, hashtable, factor, HT 2637 2638 .seealso: MatSetOption() 2639 @*/ 2640 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2641 { 2642 Mat_MPIBAIJ *baij; 2643 int ierr; 2644 PetscTruth flg; 2645 2646 PetscFunctionBegin; 2647 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2648 ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg);CHKERRQ(ierr); 2649 if (!flg) { 2650 SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only."); 2651 } 2652 baij = (Mat_MPIBAIJ*)mat->data; 2653 baij->ht_fact = fact; 2654 PetscFunctionReturn(0); 2655 } 2656 2657 #undef __FUNCT__ 2658 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2659 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2660 { 2661 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2662 PetscFunctionBegin; 2663 *Ad = a->A; 2664 *Ao = a->B; 2665 *colmap = a->garray; 2666 PetscFunctionReturn(0); 2667 } 2668