1 2 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 3 4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 PetscInt i; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 52 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 53 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 55 56 for (i=0; i<A->m; i++){ 57 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 58 } 59 60 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 61 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 63 64 PetscFunctionReturn(0); 65 } 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 77 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 EXTERN_C_BEGIN 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 86 { 87 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 92 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 /* 98 Local utility routine that creates a mapping from the global column 99 number to the local number in the off-diagonal part of the local 100 storage of the matrix. This is done in a non scable way since the 101 length of colmap equals the global matrix length. 102 */ 103 #undef __FUNCT__ 104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 106 { 107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 108 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109 PetscErrorCode ierr; 110 PetscInt nbs = B->nbs,i,bs=B->bs; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 120 PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt)); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 PetscInt new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatSetValues_MPIBAIJ" 285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt i,N = m*n; 290 MatScalar *vsingle; 291 292 PetscFunctionBegin; 293 if (N > b->setvalueslen) { 294 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 295 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 296 b->setvalueslen = N; 297 } 298 vsingle = b->setvaluescopy; 299 300 for (i=0; i<N; i++) { 301 vsingle[i] = v[i]; 302 } 303 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 310 { 311 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 312 PetscErrorCode ierr; 313 PetscInt i,N = m*n*b->bs2; 314 MatScalar *vsingle; 315 316 PetscFunctionBegin; 317 if (N > b->setvalueslen) { 318 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 319 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 320 b->setvalueslen = N; 321 } 322 vsingle = b->setvaluescopy; 323 for (i=0; i<N; i++) { 324 vsingle[i] = v[i]; 325 } 326 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 333 { 334 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 335 PetscErrorCode ierr; 336 PetscInt i,N = m*n; 337 MatScalar *vsingle; 338 339 PetscFunctionBegin; 340 if (N > b->setvalueslen) { 341 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 342 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 343 b->setvalueslen = N; 344 } 345 vsingle = b->setvaluescopy; 346 for (i=0; i<N; i++) { 347 vsingle[i] = v[i]; 348 } 349 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 356 { 357 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 358 PetscErrorCode ierr; 359 PetscInt i,N = m*n*b->bs2; 360 MatScalar *vsingle; 361 362 PetscFunctionBegin; 363 if (N > b->setvalueslen) { 364 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 365 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 366 b->setvalueslen = N; 367 } 368 vsingle = b->setvaluescopy; 369 for (i=0; i<N; i++) { 370 vsingle[i] = v[i]; 371 } 372 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 373 PetscFunctionReturn(0); 374 } 375 #endif 376 377 #undef __FUNCT__ 378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 380 { 381 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 382 MatScalar value; 383 PetscTruth roworiented = baij->roworiented; 384 PetscErrorCode ierr; 385 PetscInt i,j,row,col; 386 PetscInt rstart_orig=baij->rstart_bs; 387 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 388 PetscInt cend_orig=baij->cend_bs,bs=baij->bs; 389 390 /* Some Variables required in the macro */ 391 Mat A = baij->A; 392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 393 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 394 MatScalar *aa=a->a; 395 396 Mat B = baij->B; 397 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 398 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 399 MatScalar *ba=b->a; 400 401 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 402 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 403 MatScalar *ap,*bap; 404 405 PetscFunctionBegin; 406 for (i=0; i<m; i++) { 407 if (im[i] < 0) continue; 408 #if defined(PETSC_USE_BOPT_g) 409 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 410 #endif 411 if (im[i] >= rstart_orig && im[i] < rend_orig) { 412 row = im[i] - rstart_orig; 413 for (j=0; j<n; j++) { 414 if (in[j] >= cstart_orig && in[j] < cend_orig){ 415 col = in[j] - cstart_orig; 416 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 417 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 418 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 419 } else if (in[j] < 0) continue; 420 #if defined(PETSC_USE_BOPT_g) 421 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);} 422 #endif 423 else { 424 if (mat->was_assembled) { 425 if (!baij->colmap) { 426 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 427 } 428 #if defined (PETSC_USE_CTABLE) 429 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 430 col = col - 1; 431 #else 432 col = baij->colmap[in[j]/bs] - 1; 433 #endif 434 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 435 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 436 col = in[j]; 437 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 438 B = baij->B; 439 b = (Mat_SeqBAIJ*)(B)->data; 440 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 441 ba=b->a; 442 } else col += in[j]%bs; 443 } else col = in[j]; 444 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 445 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 446 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 447 } 448 } 449 } else { 450 if (!baij->donotstash) { 451 if (roworiented) { 452 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 453 } else { 454 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 455 } 456 } 457 } 458 } 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 const MatScalar *value; 468 MatScalar *barray=baij->barray; 469 PetscTruth roworiented = baij->roworiented; 470 PetscErrorCode ierr; 471 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 472 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 473 PetscInt cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 474 475 PetscFunctionBegin; 476 if(!barray) { 477 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 478 baij->barray = barray; 479 } 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 if (im[i] < 0) continue; 488 #if defined(PETSC_USE_BOPT_g) 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 490 #endif 491 if (im[i] >= rstart && im[i] < rend) { 492 row = im[i] - rstart; 493 for (j=0; j<n; j++) { 494 /* If NumCol = 1 then a copy is not required */ 495 if ((roworiented) && (n == 1)) { 496 barray = (MatScalar*)v + i*bs2; 497 } else if((!roworiented) && (m == 1)) { 498 barray = (MatScalar*)v + j*bs2; 499 } else { /* Here a copy is required */ 500 if (roworiented) { 501 value = v + i*(stepval+bs)*bs + j*bs; 502 } else { 503 value = v + j*(stepval+bs)*bs + i*bs; 504 } 505 for (ii=0; ii<bs; ii++,value+=stepval) { 506 for (jj=0; jj<bs; jj++) { 507 *barray++ = *value++; 508 } 509 } 510 barray -=bs2; 511 } 512 513 if (in[j] >= cstart && in[j] < cend){ 514 col = in[j] - cstart; 515 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 516 } 517 else if (in[j] < 0) continue; 518 #if defined(PETSC_USE_BOPT_g) 519 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 520 #endif 521 else { 522 if (mat->was_assembled) { 523 if (!baij->colmap) { 524 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 525 } 526 527 #if defined(PETSC_USE_BOPT_g) 528 #if defined (PETSC_USE_CTABLE) 529 { PetscInt data; 530 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 531 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 532 } 533 #else 534 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 535 #endif 536 #endif 537 #if defined (PETSC_USE_CTABLE) 538 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 539 col = (col - 1)/bs; 540 #else 541 col = (baij->colmap[in[j]] - 1)/bs; 542 #endif 543 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 544 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 545 col = in[j]; 546 } 547 } 548 else col = in[j]; 549 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 550 } 551 } 552 } else { 553 if (!baij->donotstash) { 554 if (roworiented) { 555 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 556 } else { 557 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 558 } 559 } 560 } 561 } 562 PetscFunctionReturn(0); 563 } 564 565 #define HASH_KEY 0.6180339887 566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 567 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 568 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 572 { 573 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 574 PetscTruth roworiented = baij->roworiented; 575 PetscErrorCode ierr; 576 PetscInt i,j,row,col; 577 PetscInt rstart_orig=baij->rstart_bs; 578 PetscInt rend_orig=baij->rend_bs,Nbs=baij->Nbs; 579 PetscInt h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 580 PetscReal tmp; 581 MatScalar **HD = baij->hd,value; 582 #if defined(PETSC_USE_BOPT_g) 583 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 584 #endif 585 586 PetscFunctionBegin; 587 588 for (i=0; i<m; i++) { 589 #if defined(PETSC_USE_BOPT_g) 590 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 591 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 592 #endif 593 row = im[i]; 594 if (row >= rstart_orig && row < rend_orig) { 595 for (j=0; j<n; j++) { 596 col = in[j]; 597 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 598 /* Look up PetscInto the Hash Table */ 599 key = (row/bs)*Nbs+(col/bs)+1; 600 h1 = HASH(size,key,tmp); 601 602 603 idx = h1; 604 #if defined(PETSC_USE_BOPT_g) 605 insert_ct++; 606 total_ct++; 607 if (HT[idx] != key) { 608 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 609 if (idx == size) { 610 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 611 if (idx == h1) { 612 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 613 } 614 } 615 } 616 #else 617 if (HT[idx] != key) { 618 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 619 if (idx == size) { 620 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 621 if (idx == h1) { 622 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 623 } 624 } 625 } 626 #endif 627 /* A HASH table entry is found, so insert the values at the correct address */ 628 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 629 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 630 } 631 } else { 632 if (!baij->donotstash) { 633 if (roworiented) { 634 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 635 } else { 636 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 637 } 638 } 639 } 640 } 641 #if defined(PETSC_USE_BOPT_g) 642 baij->ht_total_ct = total_ct; 643 baij->ht_insert_ct = insert_ct; 644 #endif 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 651 { 652 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 653 PetscTruth roworiented = baij->roworiented; 654 PetscErrorCode ierr; 655 PetscInt i,j,ii,jj,row,col; 656 PetscInt rstart=baij->rstart ; 657 PetscInt rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 658 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 659 PetscReal tmp; 660 MatScalar **HD = baij->hd,*baij_a; 661 const MatScalar *v_t,*value; 662 #if defined(PETSC_USE_BOPT_g) 663 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 664 #endif 665 666 PetscFunctionBegin; 667 668 if (roworiented) { 669 stepval = (n-1)*bs; 670 } else { 671 stepval = (m-1)*bs; 672 } 673 for (i=0; i<m; i++) { 674 #if defined(PETSC_USE_BOPT_g) 675 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 676 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 677 #endif 678 row = im[i]; 679 v_t = v + i*bs2; 680 if (row >= rstart && row < rend) { 681 for (j=0; j<n; j++) { 682 col = in[j]; 683 684 /* Look up into the Hash Table */ 685 key = row*Nbs+col+1; 686 h1 = HASH(size,key,tmp); 687 688 idx = h1; 689 #if defined(PETSC_USE_BOPT_g) 690 total_ct++; 691 insert_ct++; 692 if (HT[idx] != key) { 693 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 694 if (idx == size) { 695 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 696 if (idx == h1) { 697 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 698 } 699 } 700 } 701 #else 702 if (HT[idx] != key) { 703 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 704 if (idx == size) { 705 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 706 if (idx == h1) { 707 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 708 } 709 } 710 } 711 #endif 712 baij_a = HD[idx]; 713 if (roworiented) { 714 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 715 /* value = v + (i*(stepval+bs)+j)*bs; */ 716 value = v_t; 717 v_t += bs; 718 if (addv == ADD_VALUES) { 719 for (ii=0; ii<bs; ii++,value+=stepval) { 720 for (jj=ii; jj<bs2; jj+=bs) { 721 baij_a[jj] += *value++; 722 } 723 } 724 } else { 725 for (ii=0; ii<bs; ii++,value+=stepval) { 726 for (jj=ii; jj<bs2; jj+=bs) { 727 baij_a[jj] = *value++; 728 } 729 } 730 } 731 } else { 732 value = v + j*(stepval+bs)*bs + i*bs; 733 if (addv == ADD_VALUES) { 734 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 735 for (jj=0; jj<bs; jj++) { 736 baij_a[jj] += *value++; 737 } 738 } 739 } else { 740 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 741 for (jj=0; jj<bs; jj++) { 742 baij_a[jj] = *value++; 743 } 744 } 745 } 746 } 747 } 748 } else { 749 if (!baij->donotstash) { 750 if (roworiented) { 751 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 752 } else { 753 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 754 } 755 } 756 } 757 } 758 #if defined(PETSC_USE_BOPT_g) 759 baij->ht_total_ct = total_ct; 760 baij->ht_insert_ct = insert_ct; 761 #endif 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatGetValues_MPIBAIJ" 767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 768 { 769 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 770 PetscErrorCode ierr; 771 PetscInt bs=baij->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 772 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 773 774 PetscFunctionBegin; 775 for (i=0; i<m; i++) { 776 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 777 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 778 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 779 row = idxm[i] - bsrstart; 780 for (j=0; j<n; j++) { 781 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 782 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 783 if (idxn[j] >= bscstart && idxn[j] < bscend){ 784 col = idxn[j] - bscstart; 785 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 786 } else { 787 if (!baij->colmap) { 788 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 789 } 790 #if defined (PETSC_USE_CTABLE) 791 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 792 data --; 793 #else 794 data = baij->colmap[idxn[j]/bs]-1; 795 #endif 796 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 797 else { 798 col = data + idxn[j]%bs; 799 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 800 } 801 } 802 } 803 } else { 804 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 805 } 806 } 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatNorm_MPIBAIJ" 812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 813 { 814 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 815 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 816 PetscErrorCode ierr; 817 PetscInt i,bs2=baij->bs2; 818 PetscReal sum = 0.0; 819 MatScalar *v; 820 821 PetscFunctionBegin; 822 if (baij->size == 1) { 823 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 824 } else { 825 if (type == NORM_FROBENIUS) { 826 v = amat->a; 827 for (i=0; i<amat->nz*bs2; i++) { 828 #if defined(PETSC_USE_COMPLEX) 829 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 830 #else 831 sum += (*v)*(*v); v++; 832 #endif 833 } 834 v = bmat->a; 835 for (i=0; i<bmat->nz*bs2; i++) { 836 #if defined(PETSC_USE_COMPLEX) 837 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 838 #else 839 sum += (*v)*(*v); v++; 840 #endif 841 } 842 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 843 *nrm = sqrt(*nrm); 844 } else { 845 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 846 } 847 } 848 PetscFunctionReturn(0); 849 } 850 851 852 /* 853 Creates the hash table, and sets the table 854 This table is created only once. 855 If new entried need to be added to the matrix 856 then the hash table has to be destroyed and 857 recreated. 858 */ 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 861 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 862 { 863 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 864 Mat A = baij->A,B=baij->B; 865 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 866 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 867 PetscErrorCode ierr; 868 PetscInt size,bs2=baij->bs2,rstart=baij->rstart; 869 PetscInt cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 870 PetscInt *HT,key; 871 MatScalar **HD; 872 PetscReal tmp; 873 #if defined(PETSC_USE_BOPT_g) 874 PetscInt ct=0,max=0; 875 #endif 876 877 PetscFunctionBegin; 878 baij->ht_size=(PetscInt)(factor*nz); 879 size = baij->ht_size; 880 881 if (baij->ht) { 882 PetscFunctionReturn(0); 883 } 884 885 /* Allocate Memory for Hash Table */ 886 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 887 baij->ht = (PetscInt*)(baij->hd + size); 888 HD = baij->hd; 889 HT = baij->ht; 890 891 892 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 893 894 895 /* Loop Over A */ 896 for (i=0; i<a->mbs; i++) { 897 for (j=ai[i]; j<ai[i+1]; j++) { 898 row = i+rstart; 899 col = aj[j]+cstart; 900 901 key = row*Nbs + col + 1; 902 h1 = HASH(size,key,tmp); 903 for (k=0; k<size; k++){ 904 if (!HT[(h1+k)%size]) { 905 HT[(h1+k)%size] = key; 906 HD[(h1+k)%size] = a->a + j*bs2; 907 break; 908 #if defined(PETSC_USE_BOPT_g) 909 } else { 910 ct++; 911 #endif 912 } 913 } 914 #if defined(PETSC_USE_BOPT_g) 915 if (k> max) max = k; 916 #endif 917 } 918 } 919 /* Loop Over B */ 920 for (i=0; i<b->mbs; i++) { 921 for (j=bi[i]; j<bi[i+1]; j++) { 922 row = i+rstart; 923 col = garray[bj[j]]; 924 key = row*Nbs + col + 1; 925 h1 = HASH(size,key,tmp); 926 for (k=0; k<size; k++){ 927 if (!HT[(h1+k)%size]) { 928 HT[(h1+k)%size] = key; 929 HD[(h1+k)%size] = b->a + j*bs2; 930 break; 931 #if defined(PETSC_USE_BOPT_g) 932 } else { 933 ct++; 934 #endif 935 } 936 } 937 #if defined(PETSC_USE_BOPT_g) 938 if (k> max) max = k; 939 #endif 940 } 941 } 942 943 /* Print Summary */ 944 #if defined(PETSC_USE_BOPT_g) 945 for (i=0,j=0; i<size; i++) { 946 if (HT[i]) {j++;} 947 } 948 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max); 949 #endif 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 955 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 956 { 957 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 958 PetscErrorCode ierr; 959 PetscInt nstash,reallocs; 960 InsertMode addv; 961 962 PetscFunctionBegin; 963 if (baij->donotstash) { 964 PetscFunctionReturn(0); 965 } 966 967 /* make sure all processors are either in INSERTMODE or ADDMODE */ 968 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 969 if (addv == (ADD_VALUES|INSERT_VALUES)) { 970 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 971 } 972 mat->insertmode = addv; /* in case this processor had no cache */ 973 974 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 975 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 976 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 977 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs); 978 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 979 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 985 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 986 { 987 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 988 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 989 PetscErrorCode ierr; 990 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 991 PetscInt *row,*col,other_disassembled; 992 PetscTruth r1,r2,r3; 993 MatScalar *val; 994 InsertMode addv = mat->insertmode; 995 PetscMPIInt n; 996 997 PetscFunctionBegin; 998 if (!baij->donotstash) { 999 while (1) { 1000 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1001 if (!flg) break; 1002 1003 for (i=0; i<n;) { 1004 /* Now identify the consecutive vals belonging to the same row */ 1005 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1006 if (j < n) ncols = j-i; 1007 else ncols = n-i; 1008 /* Now assemble all these values with a single function call */ 1009 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1010 i = j; 1011 } 1012 } 1013 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1014 /* Now process the block-stash. Since the values are stashed column-oriented, 1015 set the roworiented flag to column oriented, and after MatSetValues() 1016 restore the original flags */ 1017 r1 = baij->roworiented; 1018 r2 = a->roworiented; 1019 r3 = b->roworiented; 1020 baij->roworiented = PETSC_FALSE; 1021 a->roworiented = PETSC_FALSE; 1022 b->roworiented = PETSC_FALSE; 1023 while (1) { 1024 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1025 if (!flg) break; 1026 1027 for (i=0; i<n;) { 1028 /* Now identify the consecutive vals belonging to the same row */ 1029 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1030 if (j < n) ncols = j-i; 1031 else ncols = n-i; 1032 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1033 i = j; 1034 } 1035 } 1036 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1037 baij->roworiented = r1; 1038 a->roworiented = r2; 1039 b->roworiented = r3; 1040 } 1041 1042 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1043 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1044 1045 /* determine if any processor has disassembled, if so we must 1046 also disassemble ourselfs, in order that we may reassemble. */ 1047 /* 1048 if nonzero structure of submatrix B cannot change then we know that 1049 no processor disassembled thus we can skip this stuff 1050 */ 1051 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1052 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1053 if (mat->was_assembled && !other_disassembled) { 1054 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1055 } 1056 } 1057 1058 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1059 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1060 } 1061 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1062 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1063 1064 #if defined(PETSC_USE_BOPT_g) 1065 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1066 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1067 baij->ht_total_ct = 0; 1068 baij->ht_insert_ct = 0; 1069 } 1070 #endif 1071 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1072 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1073 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1074 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1075 } 1076 1077 if (baij->rowvalues) { 1078 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1079 baij->rowvalues = 0; 1080 } 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1086 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1087 { 1088 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1089 PetscErrorCode ierr; 1090 PetscMPIInt size = baij->size,rank = baij->rank; 1091 PetscInt bs = baij->bs; 1092 PetscTruth iascii,isdraw; 1093 PetscViewer sviewer; 1094 PetscViewerFormat format; 1095 1096 PetscFunctionBegin; 1097 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1098 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1099 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1100 if (iascii) { 1101 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1102 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1103 MatInfo info; 1104 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1105 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1106 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1107 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1108 baij->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1109 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1110 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1111 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1112 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1113 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1114 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1117 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1120 PetscFunctionReturn(0); 1121 } 1122 } 1123 1124 if (isdraw) { 1125 PetscDraw draw; 1126 PetscTruth isnull; 1127 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1128 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1129 } 1130 1131 if (size == 1) { 1132 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1133 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1134 } else { 1135 /* assemble the entire matrix onto first processor. */ 1136 Mat A; 1137 Mat_SeqBAIJ *Aloc; 1138 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1139 MatScalar *a; 1140 1141 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1142 /* Perhaps this should be the type of mat? */ 1143 if (!rank) { 1144 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1145 } else { 1146 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 1147 } 1148 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1149 ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1150 PetscLogObjectParent(mat,A); 1151 1152 /* copy over the A part */ 1153 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1154 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1155 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1156 1157 for (i=0; i<mbs; i++) { 1158 rvals[0] = bs*(baij->rstart + i); 1159 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1160 for (j=ai[i]; j<ai[i+1]; j++) { 1161 col = (baij->cstart+aj[j])*bs; 1162 for (k=0; k<bs; k++) { 1163 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1164 col++; a += bs; 1165 } 1166 } 1167 } 1168 /* copy over the B part */ 1169 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1170 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1171 for (i=0; i<mbs; i++) { 1172 rvals[0] = bs*(baij->rstart + i); 1173 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1174 for (j=ai[i]; j<ai[i+1]; j++) { 1175 col = baij->garray[aj[j]]*bs; 1176 for (k=0; k<bs; k++) { 1177 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1178 col++; a += bs; 1179 } 1180 } 1181 } 1182 ierr = PetscFree(rvals);CHKERRQ(ierr); 1183 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1184 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1185 /* 1186 Everyone has to call to draw the matrix since the graphics waits are 1187 synchronized across all processors that share the PetscDraw object 1188 */ 1189 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1190 if (!rank) { 1191 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1192 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1193 } 1194 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1195 ierr = MatDestroy(A);CHKERRQ(ierr); 1196 } 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "MatView_MPIBAIJ" 1202 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1203 { 1204 PetscErrorCode ierr; 1205 PetscTruth iascii,isdraw,issocket,isbinary; 1206 1207 PetscFunctionBegin; 1208 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1209 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1210 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1211 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1212 if (iascii || isdraw || issocket || isbinary) { 1213 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1214 } else { 1215 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1216 } 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1222 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1223 { 1224 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1225 PetscErrorCode ierr; 1226 1227 PetscFunctionBegin; 1228 #if defined(PETSC_USE_LOG) 1229 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 1230 #endif 1231 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1232 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1233 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1234 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1235 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1236 #if defined (PETSC_USE_CTABLE) 1237 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1238 #else 1239 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1240 #endif 1241 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1242 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1243 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1244 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1245 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1246 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1247 #if defined(PETSC_USE_MAT_SINGLE) 1248 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1249 #endif 1250 ierr = PetscFree(baij);CHKERRQ(ierr); 1251 1252 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1253 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1254 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1255 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1256 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1257 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatMult_MPIBAIJ" 1263 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1264 { 1265 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1266 PetscErrorCode ierr; 1267 PetscInt nt; 1268 1269 PetscFunctionBegin; 1270 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1271 if (nt != A->n) { 1272 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1273 } 1274 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1275 if (nt != A->m) { 1276 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1277 } 1278 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1279 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1280 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1281 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1282 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1288 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1289 { 1290 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1291 PetscErrorCode ierr; 1292 1293 PetscFunctionBegin; 1294 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1295 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1296 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1297 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1303 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1304 { 1305 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1306 PetscErrorCode ierr; 1307 1308 PetscFunctionBegin; 1309 /* do nondiagonal part */ 1310 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1311 /* send it on its way */ 1312 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1313 /* do local part */ 1314 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1315 /* receive remote parts: note this assumes the values are not actually */ 1316 /* inserted in yy until the next line, which is true for my implementation*/ 1317 /* but is not perhaps always true. */ 1318 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1324 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1325 { 1326 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1327 PetscErrorCode ierr; 1328 1329 PetscFunctionBegin; 1330 /* do nondiagonal part */ 1331 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1332 /* send it on its way */ 1333 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1334 /* do local part */ 1335 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1336 /* receive remote parts: note this assumes the values are not actually */ 1337 /* inserted in yy until the next line, which is true for my implementation*/ 1338 /* but is not perhaps always true. */ 1339 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 /* 1344 This only works correctly for square matrices where the subblock A->A is the 1345 diagonal block 1346 */ 1347 #undef __FUNCT__ 1348 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1349 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1350 { 1351 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1352 PetscErrorCode ierr; 1353 1354 PetscFunctionBegin; 1355 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1356 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatScale_MPIBAIJ" 1362 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1363 { 1364 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1365 PetscErrorCode ierr; 1366 1367 PetscFunctionBegin; 1368 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1369 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1375 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1376 { 1377 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1378 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1379 PetscErrorCode ierr; 1380 PetscInt bs = mat->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1381 PetscInt nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1382 PetscInt *cmap,*idx_p,cstart = mat->cstart; 1383 1384 PetscFunctionBegin; 1385 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1386 mat->getrowactive = PETSC_TRUE; 1387 1388 if (!mat->rowvalues && (idx || v)) { 1389 /* 1390 allocate enough space to hold information from the longest row. 1391 */ 1392 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1393 PetscInt max = 1,mbs = mat->mbs,tmp; 1394 for (i=0; i<mbs; i++) { 1395 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1396 if (max < tmp) { max = tmp; } 1397 } 1398 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1399 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1400 } 1401 1402 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1403 lrow = row - brstart; 1404 1405 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1406 if (!v) {pvA = 0; pvB = 0;} 1407 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1408 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1409 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1410 nztot = nzA + nzB; 1411 1412 cmap = mat->garray; 1413 if (v || idx) { 1414 if (nztot) { 1415 /* Sort by increasing column numbers, assuming A and B already sorted */ 1416 PetscInt imark = -1; 1417 if (v) { 1418 *v = v_p = mat->rowvalues; 1419 for (i=0; i<nzB; i++) { 1420 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1421 else break; 1422 } 1423 imark = i; 1424 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1425 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1426 } 1427 if (idx) { 1428 *idx = idx_p = mat->rowindices; 1429 if (imark > -1) { 1430 for (i=0; i<imark; i++) { 1431 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1432 } 1433 } else { 1434 for (i=0; i<nzB; i++) { 1435 if (cmap[cworkB[i]/bs] < cstart) 1436 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1437 else break; 1438 } 1439 imark = i; 1440 } 1441 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1442 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1443 } 1444 } else { 1445 if (idx) *idx = 0; 1446 if (v) *v = 0; 1447 } 1448 } 1449 *nz = nztot; 1450 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1451 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1452 PetscFunctionReturn(0); 1453 } 1454 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1457 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1458 { 1459 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1460 1461 PetscFunctionBegin; 1462 if (baij->getrowactive == PETSC_FALSE) { 1463 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1464 } 1465 baij->getrowactive = PETSC_FALSE; 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1471 PetscErrorCode MatGetBlockSize_MPIBAIJ(Mat mat,PetscInt *bs) 1472 { 1473 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1474 1475 PetscFunctionBegin; 1476 *bs = baij->bs; 1477 PetscFunctionReturn(0); 1478 } 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1482 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1483 { 1484 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1485 PetscErrorCode ierr; 1486 1487 PetscFunctionBegin; 1488 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1489 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1495 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1496 { 1497 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1498 Mat A = a->A,B = a->B; 1499 PetscErrorCode ierr; 1500 PetscReal isend[5],irecv[5]; 1501 1502 PetscFunctionBegin; 1503 info->block_size = (PetscReal)a->bs; 1504 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1505 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1506 isend[3] = info->memory; isend[4] = info->mallocs; 1507 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1508 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1509 isend[3] += info->memory; isend[4] += info->mallocs; 1510 if (flag == MAT_LOCAL) { 1511 info->nz_used = isend[0]; 1512 info->nz_allocated = isend[1]; 1513 info->nz_unneeded = isend[2]; 1514 info->memory = isend[3]; 1515 info->mallocs = isend[4]; 1516 } else if (flag == MAT_GLOBAL_MAX) { 1517 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1518 info->nz_used = irecv[0]; 1519 info->nz_allocated = irecv[1]; 1520 info->nz_unneeded = irecv[2]; 1521 info->memory = irecv[3]; 1522 info->mallocs = irecv[4]; 1523 } else if (flag == MAT_GLOBAL_SUM) { 1524 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1525 info->nz_used = irecv[0]; 1526 info->nz_allocated = irecv[1]; 1527 info->nz_unneeded = irecv[2]; 1528 info->memory = irecv[3]; 1529 info->mallocs = irecv[4]; 1530 } else { 1531 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1532 } 1533 info->rows_global = (PetscReal)A->M; 1534 info->columns_global = (PetscReal)A->N; 1535 info->rows_local = (PetscReal)A->m; 1536 info->columns_local = (PetscReal)A->N; 1537 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1538 info->fill_ratio_needed = 0; 1539 info->factor_mallocs = 0; 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1545 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1546 { 1547 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1548 PetscErrorCode ierr; 1549 1550 PetscFunctionBegin; 1551 switch (op) { 1552 case MAT_NO_NEW_NONZERO_LOCATIONS: 1553 case MAT_YES_NEW_NONZERO_LOCATIONS: 1554 case MAT_COLUMNS_UNSORTED: 1555 case MAT_COLUMNS_SORTED: 1556 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1557 case MAT_KEEP_ZEROED_ROWS: 1558 case MAT_NEW_NONZERO_LOCATION_ERR: 1559 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1560 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1561 break; 1562 case MAT_ROW_ORIENTED: 1563 a->roworiented = PETSC_TRUE; 1564 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1565 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1566 break; 1567 case MAT_ROWS_SORTED: 1568 case MAT_ROWS_UNSORTED: 1569 case MAT_YES_NEW_DIAGONALS: 1570 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1571 break; 1572 case MAT_COLUMN_ORIENTED: 1573 a->roworiented = PETSC_FALSE; 1574 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1575 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1576 break; 1577 case MAT_IGNORE_OFF_PROC_ENTRIES: 1578 a->donotstash = PETSC_TRUE; 1579 break; 1580 case MAT_NO_NEW_DIAGONALS: 1581 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1582 case MAT_USE_HASH_TABLE: 1583 a->ht_flag = PETSC_TRUE; 1584 break; 1585 case MAT_SYMMETRIC: 1586 case MAT_STRUCTURALLY_SYMMETRIC: 1587 case MAT_HERMITIAN: 1588 case MAT_SYMMETRY_ETERNAL: 1589 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1590 break; 1591 case MAT_NOT_SYMMETRIC: 1592 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1593 case MAT_NOT_HERMITIAN: 1594 case MAT_NOT_SYMMETRY_ETERNAL: 1595 break; 1596 default: 1597 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1604 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1605 { 1606 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1607 Mat_SeqBAIJ *Aloc; 1608 Mat B; 1609 PetscErrorCode ierr; 1610 PetscInt M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1611 PetscInt bs=baij->bs,mbs=baij->mbs; 1612 MatScalar *a; 1613 1614 PetscFunctionBegin; 1615 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1616 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1617 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1618 ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1619 1620 /* copy over the A part */ 1621 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1622 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1623 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1624 1625 for (i=0; i<mbs; i++) { 1626 rvals[0] = bs*(baij->rstart + i); 1627 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1628 for (j=ai[i]; j<ai[i+1]; j++) { 1629 col = (baij->cstart+aj[j])*bs; 1630 for (k=0; k<bs; k++) { 1631 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1632 col++; a += bs; 1633 } 1634 } 1635 } 1636 /* copy over the B part */ 1637 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1638 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1639 for (i=0; i<mbs; i++) { 1640 rvals[0] = bs*(baij->rstart + i); 1641 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1642 for (j=ai[i]; j<ai[i+1]; j++) { 1643 col = baij->garray[aj[j]]*bs; 1644 for (k=0; k<bs; k++) { 1645 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1646 col++; a += bs; 1647 } 1648 } 1649 } 1650 ierr = PetscFree(rvals);CHKERRQ(ierr); 1651 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1652 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1653 1654 if (matout) { 1655 *matout = B; 1656 } else { 1657 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1658 } 1659 PetscFunctionReturn(0); 1660 } 1661 1662 #undef __FUNCT__ 1663 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1664 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1665 { 1666 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1667 Mat a = baij->A,b = baij->B; 1668 PetscErrorCode ierr; 1669 PetscInt s1,s2,s3; 1670 1671 PetscFunctionBegin; 1672 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1673 if (rr) { 1674 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1675 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1676 /* Overlap communication with computation. */ 1677 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1678 } 1679 if (ll) { 1680 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1681 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1682 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1683 } 1684 /* scale the diagonal block */ 1685 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1686 1687 if (rr) { 1688 /* Do a scatter end and then right scale the off-diagonal block */ 1689 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1690 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1691 } 1692 1693 PetscFunctionReturn(0); 1694 } 1695 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1698 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1699 { 1700 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1701 PetscErrorCode ierr; 1702 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1703 PetscInt i,N,*rows,*owners = l->rowners; 1704 PetscInt *nprocs,j,idx,nsends,row; 1705 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1706 PetscInt *rvalues,tag = A->tag,count,base,slen,*source; 1707 PetscInt *lens,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1708 MPI_Comm comm = A->comm; 1709 MPI_Request *send_waits,*recv_waits; 1710 MPI_Status recv_status,*send_status; 1711 IS istmp; 1712 PetscTruth found; 1713 1714 PetscFunctionBegin; 1715 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1716 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1717 1718 /* first count number of contributors to each processor */ 1719 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1720 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1721 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1722 for (i=0; i<N; i++) { 1723 idx = rows[i]; 1724 found = PETSC_FALSE; 1725 for (j=0; j<size; j++) { 1726 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1727 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 1728 } 1729 } 1730 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1731 } 1732 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1733 1734 /* inform other processors of number of messages and max length*/ 1735 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1736 1737 /* post receives: */ 1738 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1739 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1740 for (i=0; i<nrecvs; i++) { 1741 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1742 } 1743 1744 /* do sends: 1745 1) starts[i] gives the starting index in svalues for stuff going to 1746 the ith processor 1747 */ 1748 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1749 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1750 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1751 starts[0] = 0; 1752 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1753 for (i=0; i<N; i++) { 1754 svalues[starts[owner[i]]++] = rows[i]; 1755 } 1756 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1757 1758 starts[0] = 0; 1759 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1760 count = 0; 1761 for (i=0; i<size; i++) { 1762 if (nprocs[2*i+1]) { 1763 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1764 } 1765 } 1766 ierr = PetscFree(starts);CHKERRQ(ierr); 1767 1768 base = owners[rank]*bs; 1769 1770 /* wait on receives */ 1771 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1772 source = lens + nrecvs; 1773 count = nrecvs; slen = 0; 1774 while (count) { 1775 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1776 /* unpack receives into our local space */ 1777 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1778 source[imdex] = recv_status.MPI_SOURCE; 1779 lens[imdex] = n; 1780 slen += n; 1781 count--; 1782 } 1783 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1784 1785 /* move the data into the send scatter */ 1786 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1787 count = 0; 1788 for (i=0; i<nrecvs; i++) { 1789 values = rvalues + i*nmax; 1790 for (j=0; j<lens[i]; j++) { 1791 lrows[count++] = values[j] - base; 1792 } 1793 } 1794 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1795 ierr = PetscFree(lens);CHKERRQ(ierr); 1796 ierr = PetscFree(owner);CHKERRQ(ierr); 1797 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1798 1799 /* actually zap the local rows */ 1800 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1801 PetscLogObjectParent(A,istmp); 1802 1803 /* 1804 Zero the required rows. If the "diagonal block" of the matrix 1805 is square and the user wishes to set the diagonal we use seperate 1806 code so that MatSetValues() is not called for each diagonal allocating 1807 new memory, thus calling lots of mallocs and slowing things down. 1808 1809 Contributed by: Mathew Knepley 1810 */ 1811 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1812 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1813 if (diag && (l->A->M == l->A->N)) { 1814 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1815 } else if (diag) { 1816 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1817 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1818 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1819 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1820 } 1821 for (i=0; i<slen; i++) { 1822 row = lrows[i] + rstart_bs; 1823 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1824 } 1825 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1826 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1827 } else { 1828 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1829 } 1830 1831 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1832 ierr = PetscFree(lrows);CHKERRQ(ierr); 1833 1834 /* wait on sends */ 1835 if (nsends) { 1836 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1837 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1838 ierr = PetscFree(send_status);CHKERRQ(ierr); 1839 } 1840 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1841 ierr = PetscFree(svalues);CHKERRQ(ierr); 1842 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1848 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1849 { 1850 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1851 MPI_Comm comm = A->comm; 1852 static PetscTruth called = PETSC_FALSE; 1853 PetscErrorCode ierr; 1854 1855 PetscFunctionBegin; 1856 if (!a->rank) { 1857 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1858 } 1859 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1860 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1861 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1862 PetscFunctionReturn(0); 1863 } 1864 1865 #undef __FUNCT__ 1866 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1867 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1868 { 1869 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1870 PetscErrorCode ierr; 1871 1872 PetscFunctionBegin; 1873 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1874 PetscFunctionReturn(0); 1875 } 1876 1877 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1878 1879 #undef __FUNCT__ 1880 #define __FUNCT__ "MatEqual_MPIBAIJ" 1881 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1882 { 1883 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1884 Mat a,b,c,d; 1885 PetscTruth flg; 1886 PetscErrorCode ierr; 1887 1888 PetscFunctionBegin; 1889 a = matA->A; b = matA->B; 1890 c = matB->A; d = matB->B; 1891 1892 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1893 if (flg == PETSC_TRUE) { 1894 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1895 } 1896 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1897 PetscFunctionReturn(0); 1898 } 1899 1900 1901 #undef __FUNCT__ 1902 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1903 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1904 { 1905 PetscErrorCode ierr; 1906 1907 PetscFunctionBegin; 1908 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 /* -------------------------------------------------------------------*/ 1913 static struct _MatOps MatOps_Values = { 1914 MatSetValues_MPIBAIJ, 1915 MatGetRow_MPIBAIJ, 1916 MatRestoreRow_MPIBAIJ, 1917 MatMult_MPIBAIJ, 1918 /* 4*/ MatMultAdd_MPIBAIJ, 1919 MatMultTranspose_MPIBAIJ, 1920 MatMultTransposeAdd_MPIBAIJ, 1921 0, 1922 0, 1923 0, 1924 /*10*/ 0, 1925 0, 1926 0, 1927 0, 1928 MatTranspose_MPIBAIJ, 1929 /*15*/ MatGetInfo_MPIBAIJ, 1930 MatEqual_MPIBAIJ, 1931 MatGetDiagonal_MPIBAIJ, 1932 MatDiagonalScale_MPIBAIJ, 1933 MatNorm_MPIBAIJ, 1934 /*20*/ MatAssemblyBegin_MPIBAIJ, 1935 MatAssemblyEnd_MPIBAIJ, 1936 0, 1937 MatSetOption_MPIBAIJ, 1938 MatZeroEntries_MPIBAIJ, 1939 /*25*/ MatZeroRows_MPIBAIJ, 1940 0, 1941 0, 1942 0, 1943 0, 1944 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1945 0, 1946 0, 1947 0, 1948 0, 1949 /*35*/ MatDuplicate_MPIBAIJ, 1950 0, 1951 0, 1952 0, 1953 0, 1954 /*40*/ 0, 1955 MatGetSubMatrices_MPIBAIJ, 1956 MatIncreaseOverlap_MPIBAIJ, 1957 MatGetValues_MPIBAIJ, 1958 0, 1959 /*45*/ MatPrintHelp_MPIBAIJ, 1960 MatScale_MPIBAIJ, 1961 0, 1962 0, 1963 0, 1964 /*50*/ MatGetBlockSize_MPIBAIJ, 1965 0, 1966 0, 1967 0, 1968 0, 1969 /*55*/ 0, 1970 0, 1971 MatSetUnfactored_MPIBAIJ, 1972 0, 1973 MatSetValuesBlocked_MPIBAIJ, 1974 /*60*/ 0, 1975 MatDestroy_MPIBAIJ, 1976 MatView_MPIBAIJ, 1977 MatGetPetscMaps_Petsc, 1978 0, 1979 /*65*/ 0, 1980 0, 1981 0, 1982 0, 1983 0, 1984 /*70*/ MatGetRowMax_MPIBAIJ, 1985 0, 1986 0, 1987 0, 1988 0, 1989 /*75*/ 0, 1990 0, 1991 0, 1992 0, 1993 0, 1994 /*80*/ 0, 1995 0, 1996 0, 1997 0, 1998 MatLoad_MPIBAIJ, 1999 /*85*/ 0, 2000 0, 2001 0, 2002 0, 2003 0, 2004 /*90*/ 0, 2005 0, 2006 0, 2007 0, 2008 0, 2009 /*95*/ 0, 2010 0, 2011 0, 2012 0}; 2013 2014 2015 EXTERN_C_BEGIN 2016 #undef __FUNCT__ 2017 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2018 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2019 { 2020 PetscFunctionBegin; 2021 *a = ((Mat_MPIBAIJ *)A->data)->A; 2022 *iscopy = PETSC_FALSE; 2023 PetscFunctionReturn(0); 2024 } 2025 EXTERN_C_END 2026 2027 EXTERN_C_BEGIN 2028 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*); 2029 EXTERN_C_END 2030 2031 EXTERN_C_BEGIN 2032 #undef __FUNCT__ 2033 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2034 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2035 { 2036 Mat_MPIBAIJ *b; 2037 PetscErrorCode ierr; 2038 PetscInt i; 2039 2040 PetscFunctionBegin; 2041 B->preallocated = PETSC_TRUE; 2042 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2043 2044 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2045 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2046 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2047 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2048 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2049 if (d_nnz) { 2050 for (i=0; i<B->m/bs; i++) { 2051 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2052 } 2053 } 2054 if (o_nnz) { 2055 for (i=0; i<B->m/bs; i++) { 2056 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2057 } 2058 } 2059 2060 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2061 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2062 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2063 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2064 2065 b = (Mat_MPIBAIJ*)B->data; 2066 b->bs = bs; 2067 b->bs2 = bs*bs; 2068 b->mbs = B->m/bs; 2069 b->nbs = B->n/bs; 2070 b->Mbs = B->M/bs; 2071 b->Nbs = B->N/bs; 2072 2073 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2074 b->rowners[0] = 0; 2075 for (i=2; i<=b->size; i++) { 2076 b->rowners[i] += b->rowners[i-1]; 2077 } 2078 b->rstart = b->rowners[b->rank]; 2079 b->rend = b->rowners[b->rank+1]; 2080 2081 ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2082 b->cowners[0] = 0; 2083 for (i=2; i<=b->size; i++) { 2084 b->cowners[i] += b->cowners[i-1]; 2085 } 2086 b->cstart = b->cowners[b->rank]; 2087 b->cend = b->cowners[b->rank+1]; 2088 2089 for (i=0; i<=b->size; i++) { 2090 b->rowners_bs[i] = b->rowners[i]*bs; 2091 } 2092 b->rstart_bs = b->rstart*bs; 2093 b->rend_bs = b->rend*bs; 2094 b->cstart_bs = b->cstart*bs; 2095 b->cend_bs = b->cend*bs; 2096 2097 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2098 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2099 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2100 PetscLogObjectParent(B,b->A); 2101 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2102 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2103 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2104 PetscLogObjectParent(B,b->B); 2105 2106 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2107 2108 PetscFunctionReturn(0); 2109 } 2110 EXTERN_C_END 2111 2112 EXTERN_C_BEGIN 2113 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2114 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2115 EXTERN_C_END 2116 2117 /*MC 2118 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2119 2120 Options Database Keys: 2121 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2122 2123 Level: beginner 2124 2125 .seealso: MatCreateMPIBAIJ 2126 M*/ 2127 2128 EXTERN_C_BEGIN 2129 #undef __FUNCT__ 2130 #define __FUNCT__ "MatCreate_MPIBAIJ" 2131 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2132 { 2133 Mat_MPIBAIJ *b; 2134 PetscErrorCode ierr; 2135 PetscTruth flg; 2136 2137 PetscFunctionBegin; 2138 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2139 B->data = (void*)b; 2140 2141 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2142 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2143 B->mapping = 0; 2144 B->factor = 0; 2145 B->assembled = PETSC_FALSE; 2146 2147 B->insertmode = NOT_SET_VALUES; 2148 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2149 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2150 2151 /* build local table of row and column ownerships */ 2152 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 2153 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2154 b->cowners = b->rowners + b->size + 2; 2155 b->rowners_bs = b->cowners + b->size + 2; 2156 2157 /* build cache for off array entries formed */ 2158 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2159 b->donotstash = PETSC_FALSE; 2160 b->colmap = PETSC_NULL; 2161 b->garray = PETSC_NULL; 2162 b->roworiented = PETSC_TRUE; 2163 2164 #if defined(PETSC_USE_MAT_SINGLE) 2165 /* stuff for MatSetValues_XXX in single precision */ 2166 b->setvalueslen = 0; 2167 b->setvaluescopy = PETSC_NULL; 2168 #endif 2169 2170 /* stuff used in block assembly */ 2171 b->barray = 0; 2172 2173 /* stuff used for matrix vector multiply */ 2174 b->lvec = 0; 2175 b->Mvctx = 0; 2176 2177 /* stuff for MatGetRow() */ 2178 b->rowindices = 0; 2179 b->rowvalues = 0; 2180 b->getrowactive = PETSC_FALSE; 2181 2182 /* hash table stuff */ 2183 b->ht = 0; 2184 b->hd = 0; 2185 b->ht_size = 0; 2186 b->ht_flag = PETSC_FALSE; 2187 b->ht_fact = 0; 2188 b->ht_total_ct = 0; 2189 b->ht_insert_ct = 0; 2190 2191 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2192 if (flg) { 2193 PetscReal fact = 1.39; 2194 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2195 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2196 if (fact <= 1.0) fact = 1.39; 2197 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2198 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2199 } 2200 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2201 "MatStoreValues_MPIBAIJ", 2202 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2203 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2204 "MatRetrieveValues_MPIBAIJ", 2205 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2206 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2207 "MatGetDiagonalBlock_MPIBAIJ", 2208 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2209 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2210 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2211 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2212 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2213 "MatDiagonalScaleLocal_MPIBAIJ", 2214 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2215 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2216 "MatSetHashTableFactor_MPIBAIJ", 2217 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 EXTERN_C_END 2221 2222 /*MC 2223 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2224 2225 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2226 and MATMPIBAIJ otherwise. 2227 2228 Options Database Keys: 2229 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2230 2231 Level: beginner 2232 2233 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ 2234 M*/ 2235 2236 EXTERN_C_BEGIN 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatCreate_BAIJ" 2239 PetscErrorCode MatCreate_BAIJ(Mat A) 2240 { 2241 PetscErrorCode ierr; 2242 PetscMPIInt size; 2243 2244 PetscFunctionBegin; 2245 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2246 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2247 if (size == 1) { 2248 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2249 } else { 2250 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2251 } 2252 PetscFunctionReturn(0); 2253 } 2254 EXTERN_C_END 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2258 /*@C 2259 MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2260 (block compressed row). For good matrix assembly performance 2261 the user should preallocate the matrix storage by setting the parameters 2262 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2263 performance can be increased by more than a factor of 50. 2264 2265 Collective on Mat 2266 2267 Input Parameters: 2268 + A - the matrix 2269 . bs - size of blockk 2270 . d_nz - number of block nonzeros per block row in diagonal portion of local 2271 submatrix (same for all local rows) 2272 . d_nnz - array containing the number of block nonzeros in the various block rows 2273 of the in diagonal portion of the local (possibly different for each block 2274 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2275 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2276 submatrix (same for all local rows). 2277 - o_nnz - array containing the number of nonzeros in the various block rows of the 2278 off-diagonal portion of the local submatrix (possibly different for 2279 each block row) or PETSC_NULL. 2280 2281 Output Parameter: 2282 2283 2284 Options Database Keys: 2285 . -mat_no_unroll - uses code that does not unroll the loops in the 2286 block calculations (much slower) 2287 . -mat_block_size - size of the blocks to use 2288 2289 Notes: 2290 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2291 than it must be used on all processors that share the object for that argument. 2292 2293 Storage Information: 2294 For a square global matrix we define each processor's diagonal portion 2295 to be its local rows and the corresponding columns (a square submatrix); 2296 each processor's off-diagonal portion encompasses the remainder of the 2297 local matrix (a rectangular submatrix). 2298 2299 The user can specify preallocated storage for the diagonal part of 2300 the local submatrix with either d_nz or d_nnz (not both). Set 2301 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2302 memory allocation. Likewise, specify preallocated storage for the 2303 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2304 2305 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2306 the figure below we depict these three local rows and all columns (0-11). 2307 2308 .vb 2309 0 1 2 3 4 5 6 7 8 9 10 11 2310 ------------------- 2311 row 3 | o o o d d d o o o o o o 2312 row 4 | o o o d d d o o o o o o 2313 row 5 | o o o d d d o o o o o o 2314 ------------------- 2315 .ve 2316 2317 Thus, any entries in the d locations are stored in the d (diagonal) 2318 submatrix, and any entries in the o locations are stored in the 2319 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2320 stored simply in the MATSEQBAIJ format for compressed row storage. 2321 2322 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2323 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2324 In general, for PDE problems in which most nonzeros are near the diagonal, 2325 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2326 or you will get TERRIBLE performance; see the users' manual chapter on 2327 matrices. 2328 2329 Level: intermediate 2330 2331 .keywords: matrix, block, aij, compressed row, sparse, parallel 2332 2333 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2334 @*/ 2335 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2336 { 2337 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2338 2339 PetscFunctionBegin; 2340 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2341 if (f) { 2342 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2343 } 2344 PetscFunctionReturn(0); 2345 } 2346 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "MatCreateMPIBAIJ" 2349 /*@C 2350 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2351 (block compressed row). For good matrix assembly performance 2352 the user should preallocate the matrix storage by setting the parameters 2353 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2354 performance can be increased by more than a factor of 50. 2355 2356 Collective on MPI_Comm 2357 2358 Input Parameters: 2359 + comm - MPI communicator 2360 . bs - size of blockk 2361 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2362 This value should be the same as the local size used in creating the 2363 y vector for the matrix-vector product y = Ax. 2364 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2365 This value should be the same as the local size used in creating the 2366 x vector for the matrix-vector product y = Ax. 2367 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2368 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2369 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2370 submatrix (same for all local rows) 2371 . d_nnz - array containing the number of nonzero blocks in the various block rows 2372 of the in diagonal portion of the local (possibly different for each block 2373 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2374 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2375 submatrix (same for all local rows). 2376 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2377 off-diagonal portion of the local submatrix (possibly different for 2378 each block row) or PETSC_NULL. 2379 2380 Output Parameter: 2381 . A - the matrix 2382 2383 Options Database Keys: 2384 . -mat_no_unroll - uses code that does not unroll the loops in the 2385 block calculations (much slower) 2386 . -mat_block_size - size of the blocks to use 2387 2388 Notes: 2389 A nonzero block is any block that as 1 or more nonzeros in it 2390 2391 The user MUST specify either the local or global matrix dimensions 2392 (possibly both). 2393 2394 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2395 than it must be used on all processors that share the object for that argument. 2396 2397 Storage Information: 2398 For a square global matrix we define each processor's diagonal portion 2399 to be its local rows and the corresponding columns (a square submatrix); 2400 each processor's off-diagonal portion encompasses the remainder of the 2401 local matrix (a rectangular submatrix). 2402 2403 The user can specify preallocated storage for the diagonal part of 2404 the local submatrix with either d_nz or d_nnz (not both). Set 2405 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2406 memory allocation. Likewise, specify preallocated storage for the 2407 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2408 2409 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2410 the figure below we depict these three local rows and all columns (0-11). 2411 2412 .vb 2413 0 1 2 3 4 5 6 7 8 9 10 11 2414 ------------------- 2415 row 3 | o o o d d d o o o o o o 2416 row 4 | o o o d d d o o o o o o 2417 row 5 | o o o d d d o o o o o o 2418 ------------------- 2419 .ve 2420 2421 Thus, any entries in the d locations are stored in the d (diagonal) 2422 submatrix, and any entries in the o locations are stored in the 2423 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2424 stored simply in the MATSEQBAIJ format for compressed row storage. 2425 2426 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2427 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2428 In general, for PDE problems in which most nonzeros are near the diagonal, 2429 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2430 or you will get TERRIBLE performance; see the users' manual chapter on 2431 matrices. 2432 2433 Level: intermediate 2434 2435 .keywords: matrix, block, aij, compressed row, sparse, parallel 2436 2437 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2438 @*/ 2439 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2440 { 2441 PetscErrorCode ierr; 2442 PetscMPIInt size; 2443 2444 PetscFunctionBegin; 2445 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2446 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2447 if (size > 1) { 2448 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2449 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2450 } else { 2451 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2452 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2453 } 2454 PetscFunctionReturn(0); 2455 } 2456 2457 #undef __FUNCT__ 2458 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2459 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2460 { 2461 Mat mat; 2462 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2463 PetscErrorCode ierr; 2464 PetscInt len=0; 2465 2466 PetscFunctionBegin; 2467 *newmat = 0; 2468 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2469 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2470 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2471 2472 mat->factor = matin->factor; 2473 mat->preallocated = PETSC_TRUE; 2474 mat->assembled = PETSC_TRUE; 2475 mat->insertmode = NOT_SET_VALUES; 2476 2477 a = (Mat_MPIBAIJ*)mat->data; 2478 a->bs = oldmat->bs; 2479 a->bs2 = oldmat->bs2; 2480 a->mbs = oldmat->mbs; 2481 a->nbs = oldmat->nbs; 2482 a->Mbs = oldmat->Mbs; 2483 a->Nbs = oldmat->Nbs; 2484 2485 a->rstart = oldmat->rstart; 2486 a->rend = oldmat->rend; 2487 a->cstart = oldmat->cstart; 2488 a->cend = oldmat->cend; 2489 a->size = oldmat->size; 2490 a->rank = oldmat->rank; 2491 a->donotstash = oldmat->donotstash; 2492 a->roworiented = oldmat->roworiented; 2493 a->rowindices = 0; 2494 a->rowvalues = 0; 2495 a->getrowactive = PETSC_FALSE; 2496 a->barray = 0; 2497 a->rstart_bs = oldmat->rstart_bs; 2498 a->rend_bs = oldmat->rend_bs; 2499 a->cstart_bs = oldmat->cstart_bs; 2500 a->cend_bs = oldmat->cend_bs; 2501 2502 /* hash table stuff */ 2503 a->ht = 0; 2504 a->hd = 0; 2505 a->ht_size = 0; 2506 a->ht_flag = oldmat->ht_flag; 2507 a->ht_fact = oldmat->ht_fact; 2508 a->ht_total_ct = 0; 2509 a->ht_insert_ct = 0; 2510 2511 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2512 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2513 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2514 if (oldmat->colmap) { 2515 #if defined (PETSC_USE_CTABLE) 2516 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2517 #else 2518 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2519 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt)); 2520 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2521 #endif 2522 } else a->colmap = 0; 2523 2524 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2525 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2526 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 2527 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2528 } else a->garray = 0; 2529 2530 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2531 PetscLogObjectParent(mat,a->lvec); 2532 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2533 PetscLogObjectParent(mat,a->Mvctx); 2534 2535 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2536 PetscLogObjectParent(mat,a->A); 2537 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2538 PetscLogObjectParent(mat,a->B); 2539 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2540 *newmat = mat; 2541 2542 PetscFunctionReturn(0); 2543 } 2544 2545 #include "petscsys.h" 2546 2547 #undef __FUNCT__ 2548 #define __FUNCT__ "MatLoad_MPIBAIJ" 2549 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2550 { 2551 Mat A; 2552 PetscErrorCode ierr; 2553 int fd; 2554 PetscInt i,nz,j,rstart,rend; 2555 PetscScalar *vals,*buf; 2556 MPI_Comm comm = ((PetscObject)viewer)->comm; 2557 MPI_Status status; 2558 PetscMPIInt rank,size,maxnz; 2559 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2560 PetscMPIInt *locrowlens,*sndcounts = 0,*procsnz = 0,*browners; 2561 PetscInt jj,*mycols,*ibuf; 2562 PetscInt tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2563 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2564 PetscInt dcount,kmax,k,nzcount,tmp; 2565 2566 PetscFunctionBegin; 2567 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2568 2569 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2570 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2571 if (!rank) { 2572 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2573 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2574 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2575 if (header[3] < 0) { 2576 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2577 } 2578 } 2579 2580 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2581 M = header[1]; N = header[2]; 2582 2583 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2584 2585 /* 2586 This code adds extra rows to make sure the number of rows is 2587 divisible by the blocksize 2588 */ 2589 Mbs = M/bs; 2590 extra_rows = bs - M + bs*(Mbs); 2591 if (extra_rows == bs) extra_rows = 0; 2592 else Mbs++; 2593 if (extra_rows &&!rank) { 2594 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2595 } 2596 2597 /* determine ownership of all rows */ 2598 mbs = Mbs/size + ((Mbs % size) > rank); 2599 m = mbs*bs; 2600 ierr = PetscMalloc(2*(size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2601 browners = (PetscMPIInt*)rowners + size + 1; 2602 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2603 rowners[0] = 0; 2604 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2605 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2606 rstart = rowners[rank]; 2607 rend = rowners[rank+1]; 2608 2609 /* distribute row lengths to all processors */ 2610 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2611 if (!rank) { 2612 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2613 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2614 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2615 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2616 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2617 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2618 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2619 } else { 2620 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2621 } 2622 2623 if (!rank) { 2624 /* calculate the number of nonzeros on each processor */ 2625 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2626 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2627 for (i=0; i<size; i++) { 2628 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2629 procsnz[i] += rowlengths[j]; 2630 } 2631 } 2632 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2633 2634 /* determine max buffer needed and allocate it */ 2635 maxnz = 0; 2636 for (i=0; i<size; i++) { 2637 maxnz = PetscMax(maxnz,procsnz[i]); 2638 } 2639 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2640 2641 /* read in my part of the matrix column indices */ 2642 nz = procsnz[0]; 2643 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2644 mycols = ibuf; 2645 if (size == 1) nz -= extra_rows; 2646 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2647 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2648 2649 /* read in every ones (except the last) and ship off */ 2650 for (i=1; i<size-1; i++) { 2651 nz = procsnz[i]; 2652 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2653 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2654 } 2655 /* read in the stuff for the last proc */ 2656 if (size != 1) { 2657 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2658 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2659 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2660 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2661 } 2662 ierr = PetscFree(cols);CHKERRQ(ierr); 2663 } else { 2664 /* determine buffer space needed for message */ 2665 nz = 0; 2666 for (i=0; i<m; i++) { 2667 nz += locrowlens[i]; 2668 } 2669 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2670 mycols = ibuf; 2671 /* receive message of column indices*/ 2672 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2673 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2674 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2675 } 2676 2677 /* loop over local rows, determining number of off diagonal entries */ 2678 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2679 odlens = dlens + (rend-rstart); 2680 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2681 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2682 masked1 = mask + Mbs; 2683 masked2 = masked1 + Mbs; 2684 rowcount = 0; nzcount = 0; 2685 for (i=0; i<mbs; i++) { 2686 dcount = 0; 2687 odcount = 0; 2688 for (j=0; j<bs; j++) { 2689 kmax = locrowlens[rowcount]; 2690 for (k=0; k<kmax; k++) { 2691 tmp = mycols[nzcount++]/bs; 2692 if (!mask[tmp]) { 2693 mask[tmp] = 1; 2694 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2695 else masked1[dcount++] = tmp; 2696 } 2697 } 2698 rowcount++; 2699 } 2700 2701 dlens[i] = dcount; 2702 odlens[i] = odcount; 2703 2704 /* zero out the mask elements we set */ 2705 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2706 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2707 } 2708 2709 /* create our matrix */ 2710 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2711 ierr = MatSetType(A,type);CHKERRQ(ierr) 2712 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2713 2714 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2715 MatSetOption(A,MAT_COLUMNS_SORTED); 2716 2717 if (!rank) { 2718 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2719 /* read in my part of the matrix numerical values */ 2720 nz = procsnz[0]; 2721 vals = buf; 2722 mycols = ibuf; 2723 if (size == 1) nz -= extra_rows; 2724 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2725 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2726 2727 /* insert into matrix */ 2728 jj = rstart*bs; 2729 for (i=0; i<m; i++) { 2730 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2731 mycols += locrowlens[i]; 2732 vals += locrowlens[i]; 2733 jj++; 2734 } 2735 /* read in other processors (except the last one) and ship out */ 2736 for (i=1; i<size-1; i++) { 2737 nz = procsnz[i]; 2738 vals = buf; 2739 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2740 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2741 } 2742 /* the last proc */ 2743 if (size != 1){ 2744 nz = procsnz[i] - extra_rows; 2745 vals = buf; 2746 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2747 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2748 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2749 } 2750 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2751 } else { 2752 /* receive numeric values */ 2753 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2754 2755 /* receive message of values*/ 2756 vals = buf; 2757 mycols = ibuf; 2758 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2759 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2760 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2761 2762 /* insert into matrix */ 2763 jj = rstart*bs; 2764 for (i=0; i<m; i++) { 2765 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2766 mycols += locrowlens[i]; 2767 vals += locrowlens[i]; 2768 jj++; 2769 } 2770 } 2771 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2772 ierr = PetscFree(buf);CHKERRQ(ierr); 2773 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2774 ierr = PetscFree(rowners);CHKERRQ(ierr); 2775 ierr = PetscFree(dlens);CHKERRQ(ierr); 2776 ierr = PetscFree(mask);CHKERRQ(ierr); 2777 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2778 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2779 2780 *newmat = A; 2781 PetscFunctionReturn(0); 2782 } 2783 2784 #undef __FUNCT__ 2785 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2786 /*@ 2787 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2788 2789 Input Parameters: 2790 . mat - the matrix 2791 . fact - factor 2792 2793 Collective on Mat 2794 2795 Level: advanced 2796 2797 Notes: 2798 This can also be set by the command line option: -mat_use_hash_table fact 2799 2800 .keywords: matrix, hashtable, factor, HT 2801 2802 .seealso: MatSetOption() 2803 @*/ 2804 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2805 { 2806 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2807 2808 PetscFunctionBegin; 2809 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2810 if (f) { 2811 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2812 } 2813 PetscFunctionReturn(0); 2814 } 2815 2816 #undef __FUNCT__ 2817 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2818 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2819 { 2820 Mat_MPIBAIJ *baij; 2821 2822 PetscFunctionBegin; 2823 baij = (Mat_MPIBAIJ*)mat->data; 2824 baij->ht_fact = fact; 2825 PetscFunctionReturn(0); 2826 } 2827 2828 #undef __FUNCT__ 2829 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2830 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2831 { 2832 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2833 PetscFunctionBegin; 2834 *Ad = a->A; 2835 *Ao = a->B; 2836 *colmap = a->garray; 2837 PetscFunctionReturn(0); 2838 } 2839