1 2 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 3 4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 PetscInt i; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 52 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 53 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 55 56 for (i=0; i<A->m; i++){ 57 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 58 } 59 60 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 61 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 63 64 PetscFunctionReturn(0); 65 } 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 77 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 EXTERN_C_BEGIN 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 86 { 87 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 92 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 /* 98 Local utility routine that creates a mapping from the global column 99 number to the local number in the off-diagonal part of the local 100 storage of the matrix. This is done in a non scable way since the 101 length of colmap equals the global matrix length. 102 */ 103 #undef __FUNCT__ 104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 106 { 107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 108 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109 PetscErrorCode ierr; 110 PetscInt nbs = B->nbs,i,bs=mat->bs; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 120 PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt)); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 PetscInt new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatSetValues_MPIBAIJ" 285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt i,N = m*n; 290 MatScalar *vsingle; 291 292 PetscFunctionBegin; 293 if (N > b->setvalueslen) { 294 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 295 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 296 b->setvalueslen = N; 297 } 298 vsingle = b->setvaluescopy; 299 300 for (i=0; i<N; i++) { 301 vsingle[i] = v[i]; 302 } 303 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 310 { 311 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 312 PetscErrorCode ierr; 313 PetscInt i,N = m*n*b->bs2; 314 MatScalar *vsingle; 315 316 PetscFunctionBegin; 317 if (N > b->setvalueslen) { 318 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 319 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 320 b->setvalueslen = N; 321 } 322 vsingle = b->setvaluescopy; 323 for (i=0; i<N; i++) { 324 vsingle[i] = v[i]; 325 } 326 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 333 { 334 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 335 PetscErrorCode ierr; 336 PetscInt i,N = m*n; 337 MatScalar *vsingle; 338 339 PetscFunctionBegin; 340 if (N > b->setvalueslen) { 341 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 342 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 343 b->setvalueslen = N; 344 } 345 vsingle = b->setvaluescopy; 346 for (i=0; i<N; i++) { 347 vsingle[i] = v[i]; 348 } 349 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 356 { 357 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 358 PetscErrorCode ierr; 359 PetscInt i,N = m*n*b->bs2; 360 MatScalar *vsingle; 361 362 PetscFunctionBegin; 363 if (N > b->setvalueslen) { 364 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 365 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 366 b->setvalueslen = N; 367 } 368 vsingle = b->setvaluescopy; 369 for (i=0; i<N; i++) { 370 vsingle[i] = v[i]; 371 } 372 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 373 PetscFunctionReturn(0); 374 } 375 #endif 376 377 #undef __FUNCT__ 378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 380 { 381 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 382 MatScalar value; 383 PetscTruth roworiented = baij->roworiented; 384 PetscErrorCode ierr; 385 PetscInt i,j,row,col; 386 PetscInt rstart_orig=baij->rstart_bs; 387 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 388 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 389 390 /* Some Variables required in the macro */ 391 Mat A = baij->A; 392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 393 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 394 MatScalar *aa=a->a; 395 396 Mat B = baij->B; 397 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 398 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 399 MatScalar *ba=b->a; 400 401 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 402 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 403 MatScalar *ap,*bap; 404 405 PetscFunctionBegin; 406 for (i=0; i<m; i++) { 407 if (im[i] < 0) continue; 408 #if defined(PETSC_USE_BOPT_g) 409 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 410 #endif 411 if (im[i] >= rstart_orig && im[i] < rend_orig) { 412 row = im[i] - rstart_orig; 413 for (j=0; j<n; j++) { 414 if (in[j] >= cstart_orig && in[j] < cend_orig){ 415 col = in[j] - cstart_orig; 416 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 417 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 418 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 419 } else if (in[j] < 0) continue; 420 #if defined(PETSC_USE_BOPT_g) 421 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);} 422 #endif 423 else { 424 if (mat->was_assembled) { 425 if (!baij->colmap) { 426 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 427 } 428 #if defined (PETSC_USE_CTABLE) 429 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 430 col = col - 1; 431 #else 432 col = baij->colmap[in[j]/bs] - 1; 433 #endif 434 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 435 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 436 col = in[j]; 437 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 438 B = baij->B; 439 b = (Mat_SeqBAIJ*)(B)->data; 440 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 441 ba=b->a; 442 } else col += in[j]%bs; 443 } else col = in[j]; 444 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 445 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 446 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 447 } 448 } 449 } else { 450 if (!baij->donotstash) { 451 if (roworiented) { 452 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 453 } else { 454 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 455 } 456 } 457 } 458 } 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 const MatScalar *value; 468 MatScalar *barray=baij->barray; 469 PetscTruth roworiented = baij->roworiented; 470 PetscErrorCode ierr; 471 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 472 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 473 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 474 475 PetscFunctionBegin; 476 if(!barray) { 477 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 478 baij->barray = barray; 479 } 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 if (im[i] < 0) continue; 488 #if defined(PETSC_USE_BOPT_g) 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 490 #endif 491 if (im[i] >= rstart && im[i] < rend) { 492 row = im[i] - rstart; 493 for (j=0; j<n; j++) { 494 /* If NumCol = 1 then a copy is not required */ 495 if ((roworiented) && (n == 1)) { 496 barray = (MatScalar*)v + i*bs2; 497 } else if((!roworiented) && (m == 1)) { 498 barray = (MatScalar*)v + j*bs2; 499 } else { /* Here a copy is required */ 500 if (roworiented) { 501 value = v + i*(stepval+bs)*bs + j*bs; 502 } else { 503 value = v + j*(stepval+bs)*bs + i*bs; 504 } 505 for (ii=0; ii<bs; ii++,value+=stepval) { 506 for (jj=0; jj<bs; jj++) { 507 *barray++ = *value++; 508 } 509 } 510 barray -=bs2; 511 } 512 513 if (in[j] >= cstart && in[j] < cend){ 514 col = in[j] - cstart; 515 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 516 } 517 else if (in[j] < 0) continue; 518 #if defined(PETSC_USE_BOPT_g) 519 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 520 #endif 521 else { 522 if (mat->was_assembled) { 523 if (!baij->colmap) { 524 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 525 } 526 527 #if defined(PETSC_USE_BOPT_g) 528 #if defined (PETSC_USE_CTABLE) 529 { PetscInt data; 530 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 531 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 532 } 533 #else 534 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 535 #endif 536 #endif 537 #if defined (PETSC_USE_CTABLE) 538 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 539 col = (col - 1)/bs; 540 #else 541 col = (baij->colmap[in[j]] - 1)/bs; 542 #endif 543 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 544 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 545 col = in[j]; 546 } 547 } 548 else col = in[j]; 549 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 550 } 551 } 552 } else { 553 if (!baij->donotstash) { 554 if (roworiented) { 555 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 556 } else { 557 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 558 } 559 } 560 } 561 } 562 PetscFunctionReturn(0); 563 } 564 565 #define HASH_KEY 0.6180339887 566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 567 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 568 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 572 { 573 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 574 PetscTruth roworiented = baij->roworiented; 575 PetscErrorCode ierr; 576 PetscInt i,j,row,col; 577 PetscInt rstart_orig=baij->rstart_bs; 578 PetscInt rend_orig=baij->rend_bs,Nbs=baij->Nbs; 579 PetscInt h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx; 580 PetscReal tmp; 581 MatScalar **HD = baij->hd,value; 582 #if defined(PETSC_USE_BOPT_g) 583 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 584 #endif 585 586 PetscFunctionBegin; 587 588 for (i=0; i<m; i++) { 589 #if defined(PETSC_USE_BOPT_g) 590 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 591 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 592 #endif 593 row = im[i]; 594 if (row >= rstart_orig && row < rend_orig) { 595 for (j=0; j<n; j++) { 596 col = in[j]; 597 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 598 /* Look up PetscInto the Hash Table */ 599 key = (row/bs)*Nbs+(col/bs)+1; 600 h1 = HASH(size,key,tmp); 601 602 603 idx = h1; 604 #if defined(PETSC_USE_BOPT_g) 605 insert_ct++; 606 total_ct++; 607 if (HT[idx] != key) { 608 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 609 if (idx == size) { 610 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 611 if (idx == h1) { 612 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 613 } 614 } 615 } 616 #else 617 if (HT[idx] != key) { 618 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 619 if (idx == size) { 620 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 621 if (idx == h1) { 622 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 623 } 624 } 625 } 626 #endif 627 /* A HASH table entry is found, so insert the values at the correct address */ 628 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 629 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 630 } 631 } else { 632 if (!baij->donotstash) { 633 if (roworiented) { 634 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 635 } else { 636 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 637 } 638 } 639 } 640 } 641 #if defined(PETSC_USE_BOPT_g) 642 baij->ht_total_ct = total_ct; 643 baij->ht_insert_ct = insert_ct; 644 #endif 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 651 { 652 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 653 PetscTruth roworiented = baij->roworiented; 654 PetscErrorCode ierr; 655 PetscInt i,j,ii,jj,row,col; 656 PetscInt rstart=baij->rstart ; 657 PetscInt rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2; 658 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 659 PetscReal tmp; 660 MatScalar **HD = baij->hd,*baij_a; 661 const MatScalar *v_t,*value; 662 #if defined(PETSC_USE_BOPT_g) 663 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 664 #endif 665 666 PetscFunctionBegin; 667 668 if (roworiented) { 669 stepval = (n-1)*bs; 670 } else { 671 stepval = (m-1)*bs; 672 } 673 for (i=0; i<m; i++) { 674 #if defined(PETSC_USE_BOPT_g) 675 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 676 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 677 #endif 678 row = im[i]; 679 v_t = v + i*bs2; 680 if (row >= rstart && row < rend) { 681 for (j=0; j<n; j++) { 682 col = in[j]; 683 684 /* Look up into the Hash Table */ 685 key = row*Nbs+col+1; 686 h1 = HASH(size,key,tmp); 687 688 idx = h1; 689 #if defined(PETSC_USE_BOPT_g) 690 total_ct++; 691 insert_ct++; 692 if (HT[idx] != key) { 693 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 694 if (idx == size) { 695 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 696 if (idx == h1) { 697 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 698 } 699 } 700 } 701 #else 702 if (HT[idx] != key) { 703 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 704 if (idx == size) { 705 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 706 if (idx == h1) { 707 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 708 } 709 } 710 } 711 #endif 712 baij_a = HD[idx]; 713 if (roworiented) { 714 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 715 /* value = v + (i*(stepval+bs)+j)*bs; */ 716 value = v_t; 717 v_t += bs; 718 if (addv == ADD_VALUES) { 719 for (ii=0; ii<bs; ii++,value+=stepval) { 720 for (jj=ii; jj<bs2; jj+=bs) { 721 baij_a[jj] += *value++; 722 } 723 } 724 } else { 725 for (ii=0; ii<bs; ii++,value+=stepval) { 726 for (jj=ii; jj<bs2; jj+=bs) { 727 baij_a[jj] = *value++; 728 } 729 } 730 } 731 } else { 732 value = v + j*(stepval+bs)*bs + i*bs; 733 if (addv == ADD_VALUES) { 734 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 735 for (jj=0; jj<bs; jj++) { 736 baij_a[jj] += *value++; 737 } 738 } 739 } else { 740 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 741 for (jj=0; jj<bs; jj++) { 742 baij_a[jj] = *value++; 743 } 744 } 745 } 746 } 747 } 748 } else { 749 if (!baij->donotstash) { 750 if (roworiented) { 751 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 752 } else { 753 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 754 } 755 } 756 } 757 } 758 #if defined(PETSC_USE_BOPT_g) 759 baij->ht_total_ct = total_ct; 760 baij->ht_insert_ct = insert_ct; 761 #endif 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatGetValues_MPIBAIJ" 767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 768 { 769 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 770 PetscErrorCode ierr; 771 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 772 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 773 774 PetscFunctionBegin; 775 for (i=0; i<m; i++) { 776 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 777 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 778 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 779 row = idxm[i] - bsrstart; 780 for (j=0; j<n; j++) { 781 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 782 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 783 if (idxn[j] >= bscstart && idxn[j] < bscend){ 784 col = idxn[j] - bscstart; 785 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 786 } else { 787 if (!baij->colmap) { 788 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 789 } 790 #if defined (PETSC_USE_CTABLE) 791 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 792 data --; 793 #else 794 data = baij->colmap[idxn[j]/bs]-1; 795 #endif 796 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 797 else { 798 col = data + idxn[j]%bs; 799 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 800 } 801 } 802 } 803 } else { 804 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 805 } 806 } 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatNorm_MPIBAIJ" 812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 813 { 814 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 815 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 816 PetscErrorCode ierr; 817 PetscInt i,bs2=baij->bs2; 818 PetscReal sum = 0.0; 819 MatScalar *v; 820 821 PetscFunctionBegin; 822 if (baij->size == 1) { 823 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 824 } else { 825 if (type == NORM_FROBENIUS) { 826 v = amat->a; 827 for (i=0; i<amat->nz*bs2; i++) { 828 #if defined(PETSC_USE_COMPLEX) 829 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 830 #else 831 sum += (*v)*(*v); v++; 832 #endif 833 } 834 v = bmat->a; 835 for (i=0; i<bmat->nz*bs2; i++) { 836 #if defined(PETSC_USE_COMPLEX) 837 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 838 #else 839 sum += (*v)*(*v); v++; 840 #endif 841 } 842 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 843 *nrm = sqrt(*nrm); 844 } else { 845 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 846 } 847 } 848 PetscFunctionReturn(0); 849 } 850 851 852 /* 853 Creates the hash table, and sets the table 854 This table is created only once. 855 If new entried need to be added to the matrix 856 then the hash table has to be destroyed and 857 recreated. 858 */ 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 861 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 862 { 863 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 864 Mat A = baij->A,B=baij->B; 865 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 866 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 867 PetscErrorCode ierr; 868 PetscInt size,bs2=baij->bs2,rstart=baij->rstart; 869 PetscInt cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 870 PetscInt *HT,key; 871 MatScalar **HD; 872 PetscReal tmp; 873 #if defined(PETSC_USE_BOPT_g) 874 PetscInt ct=0,max=0; 875 #endif 876 877 PetscFunctionBegin; 878 baij->ht_size=(PetscInt)(factor*nz); 879 size = baij->ht_size; 880 881 if (baij->ht) { 882 PetscFunctionReturn(0); 883 } 884 885 /* Allocate Memory for Hash Table */ 886 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 887 baij->ht = (PetscInt*)(baij->hd + size); 888 HD = baij->hd; 889 HT = baij->ht; 890 891 892 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 893 894 895 /* Loop Over A */ 896 for (i=0; i<a->mbs; i++) { 897 for (j=ai[i]; j<ai[i+1]; j++) { 898 row = i+rstart; 899 col = aj[j]+cstart; 900 901 key = row*Nbs + col + 1; 902 h1 = HASH(size,key,tmp); 903 for (k=0; k<size; k++){ 904 if (!HT[(h1+k)%size]) { 905 HT[(h1+k)%size] = key; 906 HD[(h1+k)%size] = a->a + j*bs2; 907 break; 908 #if defined(PETSC_USE_BOPT_g) 909 } else { 910 ct++; 911 #endif 912 } 913 } 914 #if defined(PETSC_USE_BOPT_g) 915 if (k> max) max = k; 916 #endif 917 } 918 } 919 /* Loop Over B */ 920 for (i=0; i<b->mbs; i++) { 921 for (j=bi[i]; j<bi[i+1]; j++) { 922 row = i+rstart; 923 col = garray[bj[j]]; 924 key = row*Nbs + col + 1; 925 h1 = HASH(size,key,tmp); 926 for (k=0; k<size; k++){ 927 if (!HT[(h1+k)%size]) { 928 HT[(h1+k)%size] = key; 929 HD[(h1+k)%size] = b->a + j*bs2; 930 break; 931 #if defined(PETSC_USE_BOPT_g) 932 } else { 933 ct++; 934 #endif 935 } 936 } 937 #if defined(PETSC_USE_BOPT_g) 938 if (k> max) max = k; 939 #endif 940 } 941 } 942 943 /* Print Summary */ 944 #if defined(PETSC_USE_BOPT_g) 945 for (i=0,j=0; i<size; i++) { 946 if (HT[i]) {j++;} 947 } 948 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max); 949 #endif 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 955 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 956 { 957 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 958 PetscErrorCode ierr; 959 PetscInt nstash,reallocs; 960 InsertMode addv; 961 962 PetscFunctionBegin; 963 if (baij->donotstash) { 964 PetscFunctionReturn(0); 965 } 966 967 /* make sure all processors are either in INSERTMODE or ADDMODE */ 968 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 969 if (addv == (ADD_VALUES|INSERT_VALUES)) { 970 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 971 } 972 mat->insertmode = addv; /* in case this processor had no cache */ 973 974 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 975 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 976 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 977 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs); 978 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 979 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 985 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 986 { 987 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 988 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 989 PetscErrorCode ierr; 990 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 991 PetscInt *row,*col,other_disassembled; 992 PetscTruth r1,r2,r3; 993 MatScalar *val; 994 InsertMode addv = mat->insertmode; 995 PetscMPIInt n; 996 997 PetscFunctionBegin; 998 if (!baij->donotstash) { 999 while (1) { 1000 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1001 if (!flg) break; 1002 1003 for (i=0; i<n;) { 1004 /* Now identify the consecutive vals belonging to the same row */ 1005 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1006 if (j < n) ncols = j-i; 1007 else ncols = n-i; 1008 /* Now assemble all these values with a single function call */ 1009 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1010 i = j; 1011 } 1012 } 1013 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1014 /* Now process the block-stash. Since the values are stashed column-oriented, 1015 set the roworiented flag to column oriented, and after MatSetValues() 1016 restore the original flags */ 1017 r1 = baij->roworiented; 1018 r2 = a->roworiented; 1019 r3 = b->roworiented; 1020 baij->roworiented = PETSC_FALSE; 1021 a->roworiented = PETSC_FALSE; 1022 b->roworiented = PETSC_FALSE; 1023 while (1) { 1024 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1025 if (!flg) break; 1026 1027 for (i=0; i<n;) { 1028 /* Now identify the consecutive vals belonging to the same row */ 1029 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1030 if (j < n) ncols = j-i; 1031 else ncols = n-i; 1032 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1033 i = j; 1034 } 1035 } 1036 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1037 baij->roworiented = r1; 1038 a->roworiented = r2; 1039 b->roworiented = r3; 1040 } 1041 1042 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1043 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1044 1045 /* determine if any processor has disassembled, if so we must 1046 also disassemble ourselfs, in order that we may reassemble. */ 1047 /* 1048 if nonzero structure of submatrix B cannot change then we know that 1049 no processor disassembled thus we can skip this stuff 1050 */ 1051 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1052 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1053 if (mat->was_assembled && !other_disassembled) { 1054 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1055 } 1056 } 1057 1058 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1059 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1060 } 1061 b->compressedrow.use = PETSC_TRUE; 1062 b->compressedrow.checked = PETSC_FALSE; 1063 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1064 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1065 1066 #if defined(PETSC_USE_BOPT_g) 1067 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1068 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1069 baij->ht_total_ct = 0; 1070 baij->ht_insert_ct = 0; 1071 } 1072 #endif 1073 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1074 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1075 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1076 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1077 } 1078 1079 if (baij->rowvalues) { 1080 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1081 baij->rowvalues = 0; 1082 } 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1088 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1089 { 1090 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1091 PetscErrorCode ierr; 1092 PetscMPIInt size = baij->size,rank = baij->rank; 1093 PetscInt bs = mat->bs; 1094 PetscTruth iascii,isdraw; 1095 PetscViewer sviewer; 1096 PetscViewerFormat format; 1097 1098 PetscFunctionBegin; 1099 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1100 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1101 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1102 if (iascii) { 1103 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1104 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1105 MatInfo info; 1106 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1107 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1108 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1109 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1110 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1111 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1112 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1113 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1114 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1115 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1116 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1119 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1122 PetscFunctionReturn(0); 1123 } 1124 } 1125 1126 if (isdraw) { 1127 PetscDraw draw; 1128 PetscTruth isnull; 1129 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1130 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1131 } 1132 1133 if (size == 1) { 1134 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1135 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1136 } else { 1137 /* assemble the entire matrix onto first processor. */ 1138 Mat A; 1139 Mat_SeqBAIJ *Aloc; 1140 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1141 MatScalar *a; 1142 1143 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1144 /* Perhaps this should be the type of mat? */ 1145 if (!rank) { 1146 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1147 } else { 1148 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 1149 } 1150 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1151 ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1152 PetscLogObjectParent(mat,A); 1153 1154 /* copy over the A part */ 1155 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1156 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1157 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1158 1159 for (i=0; i<mbs; i++) { 1160 rvals[0] = bs*(baij->rstart + i); 1161 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1162 for (j=ai[i]; j<ai[i+1]; j++) { 1163 col = (baij->cstart+aj[j])*bs; 1164 for (k=0; k<bs; k++) { 1165 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1166 col++; a += bs; 1167 } 1168 } 1169 } 1170 /* copy over the B part */ 1171 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1172 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1173 for (i=0; i<mbs; i++) { 1174 rvals[0] = bs*(baij->rstart + i); 1175 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1176 for (j=ai[i]; j<ai[i+1]; j++) { 1177 col = baij->garray[aj[j]]*bs; 1178 for (k=0; k<bs; k++) { 1179 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1180 col++; a += bs; 1181 } 1182 } 1183 } 1184 ierr = PetscFree(rvals);CHKERRQ(ierr); 1185 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1186 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1187 /* 1188 Everyone has to call to draw the matrix since the graphics waits are 1189 synchronized across all processors that share the PetscDraw object 1190 */ 1191 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1192 if (!rank) { 1193 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1194 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1195 } 1196 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1197 ierr = MatDestroy(A);CHKERRQ(ierr); 1198 } 1199 PetscFunctionReturn(0); 1200 } 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatView_MPIBAIJ" 1204 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1205 { 1206 PetscErrorCode ierr; 1207 PetscTruth iascii,isdraw,issocket,isbinary; 1208 1209 PetscFunctionBegin; 1210 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1211 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1212 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1213 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1214 if (iascii || isdraw || issocket || isbinary) { 1215 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1216 } else { 1217 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1218 } 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1224 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1225 { 1226 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1227 PetscErrorCode ierr; 1228 1229 PetscFunctionBegin; 1230 #if defined(PETSC_USE_LOG) 1231 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 1232 #endif 1233 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1234 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1235 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1236 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1237 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1238 #if defined (PETSC_USE_CTABLE) 1239 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1240 #else 1241 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1242 #endif 1243 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1244 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1245 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1246 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1247 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1248 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1249 #if defined(PETSC_USE_MAT_SINGLE) 1250 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1251 #endif 1252 ierr = PetscFree(baij);CHKERRQ(ierr); 1253 1254 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1255 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1256 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1257 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1258 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1259 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1260 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNCT__ 1265 #define __FUNCT__ "MatMult_MPIBAIJ" 1266 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1267 { 1268 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1269 PetscErrorCode ierr; 1270 PetscInt nt; 1271 1272 PetscFunctionBegin; 1273 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1274 if (nt != A->n) { 1275 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1276 } 1277 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1278 if (nt != A->m) { 1279 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1280 } 1281 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1282 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1283 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1284 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1285 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1291 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1292 { 1293 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1294 PetscErrorCode ierr; 1295 1296 PetscFunctionBegin; 1297 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1298 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1299 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1300 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1306 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1307 { 1308 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1309 PetscErrorCode ierr; 1310 1311 PetscFunctionBegin; 1312 /* do nondiagonal part */ 1313 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1314 /* send it on its way */ 1315 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1316 /* do local part */ 1317 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1318 /* receive remote parts: note this assumes the values are not actually */ 1319 /* inserted in yy until the next line, which is true for my implementation*/ 1320 /* but is not perhaps always true. */ 1321 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1327 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1328 { 1329 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 /* do nondiagonal part */ 1334 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1335 /* send it on its way */ 1336 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1337 /* do local part */ 1338 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1339 /* receive remote parts: note this assumes the values are not actually */ 1340 /* inserted in yy until the next line, which is true for my implementation*/ 1341 /* but is not perhaps always true. */ 1342 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 /* 1347 This only works correctly for square matrices where the subblock A->A is the 1348 diagonal block 1349 */ 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1352 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1353 { 1354 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1355 PetscErrorCode ierr; 1356 1357 PetscFunctionBegin; 1358 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1359 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 #undef __FUNCT__ 1364 #define __FUNCT__ "MatScale_MPIBAIJ" 1365 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1366 { 1367 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1368 PetscErrorCode ierr; 1369 1370 PetscFunctionBegin; 1371 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1372 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 #undef __FUNCT__ 1377 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1378 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1379 { 1380 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1381 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1382 PetscErrorCode ierr; 1383 PetscInt bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1384 PetscInt nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1385 PetscInt *cmap,*idx_p,cstart = mat->cstart; 1386 1387 PetscFunctionBegin; 1388 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1389 mat->getrowactive = PETSC_TRUE; 1390 1391 if (!mat->rowvalues && (idx || v)) { 1392 /* 1393 allocate enough space to hold information from the longest row. 1394 */ 1395 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1396 PetscInt max = 1,mbs = mat->mbs,tmp; 1397 for (i=0; i<mbs; i++) { 1398 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1399 if (max < tmp) { max = tmp; } 1400 } 1401 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1402 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1403 } 1404 1405 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1406 lrow = row - brstart; 1407 1408 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1409 if (!v) {pvA = 0; pvB = 0;} 1410 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1411 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1412 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1413 nztot = nzA + nzB; 1414 1415 cmap = mat->garray; 1416 if (v || idx) { 1417 if (nztot) { 1418 /* Sort by increasing column numbers, assuming A and B already sorted */ 1419 PetscInt imark = -1; 1420 if (v) { 1421 *v = v_p = mat->rowvalues; 1422 for (i=0; i<nzB; i++) { 1423 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1424 else break; 1425 } 1426 imark = i; 1427 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1428 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1429 } 1430 if (idx) { 1431 *idx = idx_p = mat->rowindices; 1432 if (imark > -1) { 1433 for (i=0; i<imark; i++) { 1434 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1435 } 1436 } else { 1437 for (i=0; i<nzB; i++) { 1438 if (cmap[cworkB[i]/bs] < cstart) 1439 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1440 else break; 1441 } 1442 imark = i; 1443 } 1444 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1445 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1446 } 1447 } else { 1448 if (idx) *idx = 0; 1449 if (v) *v = 0; 1450 } 1451 } 1452 *nz = nztot; 1453 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1454 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1460 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1461 { 1462 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1463 1464 PetscFunctionBegin; 1465 if (baij->getrowactive == PETSC_FALSE) { 1466 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1467 } 1468 baij->getrowactive = PETSC_FALSE; 1469 PetscFunctionReturn(0); 1470 } 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1474 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1475 { 1476 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1477 PetscErrorCode ierr; 1478 1479 PetscFunctionBegin; 1480 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1481 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1487 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1488 { 1489 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1490 Mat A = a->A,B = a->B; 1491 PetscErrorCode ierr; 1492 PetscReal isend[5],irecv[5]; 1493 1494 PetscFunctionBegin; 1495 info->block_size = (PetscReal)matin->bs; 1496 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1497 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1498 isend[3] = info->memory; isend[4] = info->mallocs; 1499 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1500 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1501 isend[3] += info->memory; isend[4] += info->mallocs; 1502 if (flag == MAT_LOCAL) { 1503 info->nz_used = isend[0]; 1504 info->nz_allocated = isend[1]; 1505 info->nz_unneeded = isend[2]; 1506 info->memory = isend[3]; 1507 info->mallocs = isend[4]; 1508 } else if (flag == MAT_GLOBAL_MAX) { 1509 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1510 info->nz_used = irecv[0]; 1511 info->nz_allocated = irecv[1]; 1512 info->nz_unneeded = irecv[2]; 1513 info->memory = irecv[3]; 1514 info->mallocs = irecv[4]; 1515 } else if (flag == MAT_GLOBAL_SUM) { 1516 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1517 info->nz_used = irecv[0]; 1518 info->nz_allocated = irecv[1]; 1519 info->nz_unneeded = irecv[2]; 1520 info->memory = irecv[3]; 1521 info->mallocs = irecv[4]; 1522 } else { 1523 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1524 } 1525 info->rows_global = (PetscReal)A->M; 1526 info->columns_global = (PetscReal)A->N; 1527 info->rows_local = (PetscReal)A->m; 1528 info->columns_local = (PetscReal)A->N; 1529 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1530 info->fill_ratio_needed = 0; 1531 info->factor_mallocs = 0; 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1537 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1538 { 1539 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 switch (op) { 1544 case MAT_NO_NEW_NONZERO_LOCATIONS: 1545 case MAT_YES_NEW_NONZERO_LOCATIONS: 1546 case MAT_COLUMNS_UNSORTED: 1547 case MAT_COLUMNS_SORTED: 1548 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1549 case MAT_KEEP_ZEROED_ROWS: 1550 case MAT_NEW_NONZERO_LOCATION_ERR: 1551 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1552 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1553 break; 1554 case MAT_ROW_ORIENTED: 1555 a->roworiented = PETSC_TRUE; 1556 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1557 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1558 break; 1559 case MAT_ROWS_SORTED: 1560 case MAT_ROWS_UNSORTED: 1561 case MAT_YES_NEW_DIAGONALS: 1562 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1563 break; 1564 case MAT_COLUMN_ORIENTED: 1565 a->roworiented = PETSC_FALSE; 1566 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1567 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1568 break; 1569 case MAT_IGNORE_OFF_PROC_ENTRIES: 1570 a->donotstash = PETSC_TRUE; 1571 break; 1572 case MAT_NO_NEW_DIAGONALS: 1573 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1574 case MAT_USE_HASH_TABLE: 1575 a->ht_flag = PETSC_TRUE; 1576 break; 1577 case MAT_SYMMETRIC: 1578 case MAT_STRUCTURALLY_SYMMETRIC: 1579 case MAT_HERMITIAN: 1580 case MAT_SYMMETRY_ETERNAL: 1581 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1582 break; 1583 case MAT_NOT_SYMMETRIC: 1584 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1585 case MAT_NOT_HERMITIAN: 1586 case MAT_NOT_SYMMETRY_ETERNAL: 1587 break; 1588 default: 1589 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1590 } 1591 PetscFunctionReturn(0); 1592 } 1593 1594 #undef __FUNCT__ 1595 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1596 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1597 { 1598 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1599 Mat_SeqBAIJ *Aloc; 1600 Mat B; 1601 PetscErrorCode ierr; 1602 PetscInt M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1603 PetscInt bs=A->bs,mbs=baij->mbs; 1604 MatScalar *a; 1605 1606 PetscFunctionBegin; 1607 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1608 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1609 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1610 ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1611 1612 /* copy over the A part */ 1613 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1614 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1615 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1616 1617 for (i=0; i<mbs; i++) { 1618 rvals[0] = bs*(baij->rstart + i); 1619 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1620 for (j=ai[i]; j<ai[i+1]; j++) { 1621 col = (baij->cstart+aj[j])*bs; 1622 for (k=0; k<bs; k++) { 1623 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1624 col++; a += bs; 1625 } 1626 } 1627 } 1628 /* copy over the B part */ 1629 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1630 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1631 for (i=0; i<mbs; i++) { 1632 rvals[0] = bs*(baij->rstart + i); 1633 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1634 for (j=ai[i]; j<ai[i+1]; j++) { 1635 col = baij->garray[aj[j]]*bs; 1636 for (k=0; k<bs; k++) { 1637 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1638 col++; a += bs; 1639 } 1640 } 1641 } 1642 ierr = PetscFree(rvals);CHKERRQ(ierr); 1643 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1644 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1645 1646 if (matout) { 1647 *matout = B; 1648 } else { 1649 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1650 } 1651 PetscFunctionReturn(0); 1652 } 1653 1654 #undef __FUNCT__ 1655 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1656 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1657 { 1658 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1659 Mat a = baij->A,b = baij->B; 1660 PetscErrorCode ierr; 1661 PetscInt s1,s2,s3; 1662 1663 PetscFunctionBegin; 1664 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1665 if (rr) { 1666 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1667 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1668 /* Overlap communication with computation. */ 1669 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1670 } 1671 if (ll) { 1672 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1673 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1674 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1675 } 1676 /* scale the diagonal block */ 1677 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1678 1679 if (rr) { 1680 /* Do a scatter end and then right scale the off-diagonal block */ 1681 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1682 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1683 } 1684 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1690 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1691 { 1692 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1693 PetscErrorCode ierr; 1694 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1695 PetscInt i,N,*rows,*owners = l->rowners; 1696 PetscInt *nprocs,j,idx,nsends,row; 1697 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1698 PetscInt *rvalues,tag = A->tag,count,base,slen,*source; 1699 PetscInt *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs; 1700 MPI_Comm comm = A->comm; 1701 MPI_Request *send_waits,*recv_waits; 1702 MPI_Status recv_status,*send_status; 1703 IS istmp; 1704 PetscTruth found; 1705 1706 PetscFunctionBegin; 1707 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1708 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1709 1710 /* first count number of contributors to each processor */ 1711 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1712 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1713 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1714 for (i=0; i<N; i++) { 1715 idx = rows[i]; 1716 found = PETSC_FALSE; 1717 for (j=0; j<size; j++) { 1718 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1719 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 1720 } 1721 } 1722 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1723 } 1724 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1725 1726 /* inform other processors of number of messages and max length*/ 1727 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1728 1729 /* post receives: */ 1730 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1731 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1732 for (i=0; i<nrecvs; i++) { 1733 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1734 } 1735 1736 /* do sends: 1737 1) starts[i] gives the starting index in svalues for stuff going to 1738 the ith processor 1739 */ 1740 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1741 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1742 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1743 starts[0] = 0; 1744 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1745 for (i=0; i<N; i++) { 1746 svalues[starts[owner[i]]++] = rows[i]; 1747 } 1748 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1749 1750 starts[0] = 0; 1751 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1752 count = 0; 1753 for (i=0; i<size; i++) { 1754 if (nprocs[2*i+1]) { 1755 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1756 } 1757 } 1758 ierr = PetscFree(starts);CHKERRQ(ierr); 1759 1760 base = owners[rank]*bs; 1761 1762 /* wait on receives */ 1763 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1764 source = lens + nrecvs; 1765 count = nrecvs; slen = 0; 1766 while (count) { 1767 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1768 /* unpack receives into our local space */ 1769 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1770 source[imdex] = recv_status.MPI_SOURCE; 1771 lens[imdex] = n; 1772 slen += n; 1773 count--; 1774 } 1775 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1776 1777 /* move the data into the send scatter */ 1778 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1779 count = 0; 1780 for (i=0; i<nrecvs; i++) { 1781 values = rvalues + i*nmax; 1782 for (j=0; j<lens[i]; j++) { 1783 lrows[count++] = values[j] - base; 1784 } 1785 } 1786 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1787 ierr = PetscFree(lens);CHKERRQ(ierr); 1788 ierr = PetscFree(owner);CHKERRQ(ierr); 1789 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1790 1791 /* actually zap the local rows */ 1792 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1793 PetscLogObjectParent(A,istmp); 1794 1795 /* 1796 Zero the required rows. If the "diagonal block" of the matrix 1797 is square and the user wishes to set the diagonal we use seperate 1798 code so that MatSetValues() is not called for each diagonal allocating 1799 new memory, thus calling lots of mallocs and slowing things down. 1800 1801 Contributed by: Mathew Knepley 1802 */ 1803 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1804 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1805 if (diag && (l->A->M == l->A->N)) { 1806 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1807 } else if (diag) { 1808 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1809 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1810 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1811 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1812 } 1813 for (i=0; i<slen; i++) { 1814 row = lrows[i] + rstart_bs; 1815 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1816 } 1817 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1818 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1819 } else { 1820 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1821 } 1822 1823 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1824 ierr = PetscFree(lrows);CHKERRQ(ierr); 1825 1826 /* wait on sends */ 1827 if (nsends) { 1828 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1829 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1830 ierr = PetscFree(send_status);CHKERRQ(ierr); 1831 } 1832 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1833 ierr = PetscFree(svalues);CHKERRQ(ierr); 1834 1835 PetscFunctionReturn(0); 1836 } 1837 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1840 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1841 { 1842 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1843 MPI_Comm comm = A->comm; 1844 static PetscTruth called = PETSC_FALSE; 1845 PetscErrorCode ierr; 1846 1847 PetscFunctionBegin; 1848 if (!a->rank) { 1849 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1850 } 1851 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1852 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1853 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1859 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1860 { 1861 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1866 PetscFunctionReturn(0); 1867 } 1868 1869 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1870 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatEqual_MPIBAIJ" 1873 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1874 { 1875 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1876 Mat a,b,c,d; 1877 PetscTruth flg; 1878 PetscErrorCode ierr; 1879 1880 PetscFunctionBegin; 1881 a = matA->A; b = matA->B; 1882 c = matB->A; d = matB->B; 1883 1884 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1885 if (flg == PETSC_TRUE) { 1886 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1887 } 1888 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1889 PetscFunctionReturn(0); 1890 } 1891 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1895 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1896 { 1897 PetscErrorCode ierr; 1898 1899 PetscFunctionBegin; 1900 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1901 PetscFunctionReturn(0); 1902 } 1903 1904 /* -------------------------------------------------------------------*/ 1905 static struct _MatOps MatOps_Values = { 1906 MatSetValues_MPIBAIJ, 1907 MatGetRow_MPIBAIJ, 1908 MatRestoreRow_MPIBAIJ, 1909 MatMult_MPIBAIJ, 1910 /* 4*/ MatMultAdd_MPIBAIJ, 1911 MatMultTranspose_MPIBAIJ, 1912 MatMultTransposeAdd_MPIBAIJ, 1913 0, 1914 0, 1915 0, 1916 /*10*/ 0, 1917 0, 1918 0, 1919 0, 1920 MatTranspose_MPIBAIJ, 1921 /*15*/ MatGetInfo_MPIBAIJ, 1922 MatEqual_MPIBAIJ, 1923 MatGetDiagonal_MPIBAIJ, 1924 MatDiagonalScale_MPIBAIJ, 1925 MatNorm_MPIBAIJ, 1926 /*20*/ MatAssemblyBegin_MPIBAIJ, 1927 MatAssemblyEnd_MPIBAIJ, 1928 0, 1929 MatSetOption_MPIBAIJ, 1930 MatZeroEntries_MPIBAIJ, 1931 /*25*/ MatZeroRows_MPIBAIJ, 1932 0, 1933 0, 1934 0, 1935 0, 1936 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1937 0, 1938 0, 1939 0, 1940 0, 1941 /*35*/ MatDuplicate_MPIBAIJ, 1942 0, 1943 0, 1944 0, 1945 0, 1946 /*40*/ 0, 1947 MatGetSubMatrices_MPIBAIJ, 1948 MatIncreaseOverlap_MPIBAIJ, 1949 MatGetValues_MPIBAIJ, 1950 0, 1951 /*45*/ MatPrintHelp_MPIBAIJ, 1952 MatScale_MPIBAIJ, 1953 0, 1954 0, 1955 0, 1956 /*50*/ 0, 1957 0, 1958 0, 1959 0, 1960 0, 1961 /*55*/ 0, 1962 0, 1963 MatSetUnfactored_MPIBAIJ, 1964 0, 1965 MatSetValuesBlocked_MPIBAIJ, 1966 /*60*/ 0, 1967 MatDestroy_MPIBAIJ, 1968 MatView_MPIBAIJ, 1969 MatGetPetscMaps_Petsc, 1970 0, 1971 /*65*/ 0, 1972 0, 1973 0, 1974 0, 1975 0, 1976 /*70*/ MatGetRowMax_MPIBAIJ, 1977 0, 1978 0, 1979 0, 1980 0, 1981 /*75*/ 0, 1982 0, 1983 0, 1984 0, 1985 0, 1986 /*80*/ 0, 1987 0, 1988 0, 1989 0, 1990 MatLoad_MPIBAIJ, 1991 /*85*/ 0, 1992 0, 1993 0, 1994 0, 1995 0, 1996 /*90*/ 0, 1997 0, 1998 0, 1999 0, 2000 0, 2001 /*95*/ 0, 2002 0, 2003 0, 2004 0}; 2005 2006 2007 EXTERN_C_BEGIN 2008 #undef __FUNCT__ 2009 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2010 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2011 { 2012 PetscFunctionBegin; 2013 *a = ((Mat_MPIBAIJ *)A->data)->A; 2014 *iscopy = PETSC_FALSE; 2015 PetscFunctionReturn(0); 2016 } 2017 EXTERN_C_END 2018 2019 EXTERN_C_BEGIN 2020 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*); 2021 EXTERN_C_END 2022 2023 #undef __FUNCT__ 2024 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2025 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2026 { 2027 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2028 PetscInt m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2029 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2030 const PetscInt *JJ; 2031 PetscScalar *values; 2032 PetscErrorCode ierr; 2033 2034 PetscFunctionBegin; 2035 #if defined(PETSC_OPT_g) 2036 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2037 #endif 2038 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2039 o_nnz = d_nnz + m; 2040 2041 for (i=0; i<m; i++) { 2042 nnz = I[i+1]- I[i]; 2043 JJ = J + I[i]; 2044 nnz_max = PetscMax(nnz_max,nnz); 2045 #if defined(PETSC_OPT_g) 2046 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2047 #endif 2048 for (j=0; j<nnz; j++) { 2049 if (*JJ >= cstart) break; 2050 JJ++; 2051 } 2052 d = 0; 2053 for (; j<nnz; j++) { 2054 if (*JJ++ >= cend) break; 2055 d++; 2056 } 2057 d_nnz[i] = d; 2058 o_nnz[i] = nnz - d; 2059 } 2060 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2061 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2062 2063 if (v) values = (PetscScalar*)v; 2064 else { 2065 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2066 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2067 } 2068 2069 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2070 for (i=0; i<m; i++) { 2071 ii = i + rstart; 2072 nnz = I[i+1]- I[i]; 2073 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2074 } 2075 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2076 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2077 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2078 2079 if (!v) { 2080 ierr = PetscFree(values);CHKERRQ(ierr); 2081 } 2082 PetscFunctionReturn(0); 2083 } 2084 2085 #undef __FUNCT__ 2086 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2087 /*@C 2088 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2089 (the default parallel PETSc format). 2090 2091 Collective on MPI_Comm 2092 2093 Input Parameters: 2094 + A - the matrix 2095 . i - the indices into j for the start of each local row (starts with zero) 2096 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2097 - v - optional values in the matrix 2098 2099 Level: developer 2100 2101 .keywords: matrix, aij, compressed row, sparse, parallel 2102 2103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2104 @*/ 2105 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2106 { 2107 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2108 2109 PetscFunctionBegin; 2110 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2111 if (f) { 2112 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2113 } 2114 PetscFunctionReturn(0); 2115 } 2116 2117 EXTERN_C_BEGIN 2118 #undef __FUNCT__ 2119 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2120 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2121 { 2122 Mat_MPIBAIJ *b; 2123 PetscErrorCode ierr; 2124 PetscInt i; 2125 2126 PetscFunctionBegin; 2127 B->preallocated = PETSC_TRUE; 2128 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2129 2130 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2131 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2132 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2133 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2134 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2135 if (d_nnz) { 2136 for (i=0; i<B->m/bs; i++) { 2137 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2138 } 2139 } 2140 if (o_nnz) { 2141 for (i=0; i<B->m/bs; i++) { 2142 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2143 } 2144 } 2145 2146 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2147 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2148 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2149 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2150 2151 b = (Mat_MPIBAIJ*)B->data; 2152 B->bs = bs; 2153 b->bs2 = bs*bs; 2154 b->mbs = B->m/bs; 2155 b->nbs = B->n/bs; 2156 b->Mbs = B->M/bs; 2157 b->Nbs = B->N/bs; 2158 2159 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2160 b->rowners[0] = 0; 2161 for (i=2; i<=b->size; i++) { 2162 b->rowners[i] += b->rowners[i-1]; 2163 } 2164 b->rstart = b->rowners[b->rank]; 2165 b->rend = b->rowners[b->rank+1]; 2166 2167 ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2168 b->cowners[0] = 0; 2169 for (i=2; i<=b->size; i++) { 2170 b->cowners[i] += b->cowners[i-1]; 2171 } 2172 b->cstart = b->cowners[b->rank]; 2173 b->cend = b->cowners[b->rank+1]; 2174 2175 for (i=0; i<=b->size; i++) { 2176 b->rowners_bs[i] = b->rowners[i]*bs; 2177 } 2178 b->rstart_bs = b->rstart*bs; 2179 b->rend_bs = b->rend*bs; 2180 b->cstart_bs = b->cstart*bs; 2181 b->cend_bs = b->cend*bs; 2182 2183 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2184 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2185 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2186 PetscLogObjectParent(B,b->A); 2187 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2188 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2189 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2190 PetscLogObjectParent(B,b->B); 2191 2192 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2193 2194 PetscFunctionReturn(0); 2195 } 2196 EXTERN_C_END 2197 2198 EXTERN_C_BEGIN 2199 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2200 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2201 EXTERN_C_END 2202 2203 /*MC 2204 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2205 2206 Options Database Keys: 2207 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2208 2209 Level: beginner 2210 2211 .seealso: MatCreateMPIBAIJ 2212 M*/ 2213 2214 EXTERN_C_BEGIN 2215 #undef __FUNCT__ 2216 #define __FUNCT__ "MatCreate_MPIBAIJ" 2217 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2218 { 2219 Mat_MPIBAIJ *b; 2220 PetscErrorCode ierr; 2221 PetscTruth flg; 2222 2223 PetscFunctionBegin; 2224 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2225 B->data = (void*)b; 2226 2227 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2228 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2229 B->mapping = 0; 2230 B->factor = 0; 2231 B->assembled = PETSC_FALSE; 2232 2233 B->insertmode = NOT_SET_VALUES; 2234 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2235 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2236 2237 /* build local table of row and column ownerships */ 2238 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 2239 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2240 b->cowners = b->rowners + b->size + 2; 2241 b->rowners_bs = b->cowners + b->size + 2; 2242 2243 /* build cache for off array entries formed */ 2244 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2245 b->donotstash = PETSC_FALSE; 2246 b->colmap = PETSC_NULL; 2247 b->garray = PETSC_NULL; 2248 b->roworiented = PETSC_TRUE; 2249 2250 #if defined(PETSC_USE_MAT_SINGLE) 2251 /* stuff for MatSetValues_XXX in single precision */ 2252 b->setvalueslen = 0; 2253 b->setvaluescopy = PETSC_NULL; 2254 #endif 2255 2256 /* stuff used in block assembly */ 2257 b->barray = 0; 2258 2259 /* stuff used for matrix vector multiply */ 2260 b->lvec = 0; 2261 b->Mvctx = 0; 2262 2263 /* stuff for MatGetRow() */ 2264 b->rowindices = 0; 2265 b->rowvalues = 0; 2266 b->getrowactive = PETSC_FALSE; 2267 2268 /* hash table stuff */ 2269 b->ht = 0; 2270 b->hd = 0; 2271 b->ht_size = 0; 2272 b->ht_flag = PETSC_FALSE; 2273 b->ht_fact = 0; 2274 b->ht_total_ct = 0; 2275 b->ht_insert_ct = 0; 2276 2277 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2278 if (flg) { 2279 PetscReal fact = 1.39; 2280 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2281 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2282 if (fact <= 1.0) fact = 1.39; 2283 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2284 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2285 } 2286 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2287 "MatStoreValues_MPIBAIJ", 2288 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2289 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2290 "MatRetrieveValues_MPIBAIJ", 2291 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2292 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2293 "MatGetDiagonalBlock_MPIBAIJ", 2294 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2295 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2296 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2297 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2298 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2299 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2300 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2301 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2302 "MatDiagonalScaleLocal_MPIBAIJ", 2303 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2304 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2305 "MatSetHashTableFactor_MPIBAIJ", 2306 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2307 PetscFunctionReturn(0); 2308 } 2309 EXTERN_C_END 2310 2311 /*MC 2312 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2313 2314 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2315 and MATMPIBAIJ otherwise. 2316 2317 Options Database Keys: 2318 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2319 2320 Level: beginner 2321 2322 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2323 M*/ 2324 2325 EXTERN_C_BEGIN 2326 #undef __FUNCT__ 2327 #define __FUNCT__ "MatCreate_BAIJ" 2328 PetscErrorCode MatCreate_BAIJ(Mat A) 2329 { 2330 PetscErrorCode ierr; 2331 PetscMPIInt size; 2332 2333 PetscFunctionBegin; 2334 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2335 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2336 if (size == 1) { 2337 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2338 } else { 2339 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2340 } 2341 PetscFunctionReturn(0); 2342 } 2343 EXTERN_C_END 2344 2345 #undef __FUNCT__ 2346 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2347 /*@C 2348 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2349 (block compressed row). For good matrix assembly performance 2350 the user should preallocate the matrix storage by setting the parameters 2351 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2352 performance can be increased by more than a factor of 50. 2353 2354 Collective on Mat 2355 2356 Input Parameters: 2357 + A - the matrix 2358 . bs - size of blockk 2359 . d_nz - number of block nonzeros per block row in diagonal portion of local 2360 submatrix (same for all local rows) 2361 . d_nnz - array containing the number of block nonzeros in the various block rows 2362 of the in diagonal portion of the local (possibly different for each block 2363 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2364 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2365 submatrix (same for all local rows). 2366 - o_nnz - array containing the number of nonzeros in the various block rows of the 2367 off-diagonal portion of the local submatrix (possibly different for 2368 each block row) or PETSC_NULL. 2369 2370 If the *_nnz parameter is given then the *_nz parameter is ignored 2371 2372 Options Database Keys: 2373 . -mat_no_unroll - uses code that does not unroll the loops in the 2374 block calculations (much slower) 2375 . -mat_block_size - size of the blocks to use 2376 2377 Notes: 2378 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2379 than it must be used on all processors that share the object for that argument. 2380 2381 Storage Information: 2382 For a square global matrix we define each processor's diagonal portion 2383 to be its local rows and the corresponding columns (a square submatrix); 2384 each processor's off-diagonal portion encompasses the remainder of the 2385 local matrix (a rectangular submatrix). 2386 2387 The user can specify preallocated storage for the diagonal part of 2388 the local submatrix with either d_nz or d_nnz (not both). Set 2389 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2390 memory allocation. Likewise, specify preallocated storage for the 2391 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2392 2393 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2394 the figure below we depict these three local rows and all columns (0-11). 2395 2396 .vb 2397 0 1 2 3 4 5 6 7 8 9 10 11 2398 ------------------- 2399 row 3 | o o o d d d o o o o o o 2400 row 4 | o o o d d d o o o o o o 2401 row 5 | o o o d d d o o o o o o 2402 ------------------- 2403 .ve 2404 2405 Thus, any entries in the d locations are stored in the d (diagonal) 2406 submatrix, and any entries in the o locations are stored in the 2407 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2408 stored simply in the MATSEQBAIJ format for compressed row storage. 2409 2410 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2411 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2412 In general, for PDE problems in which most nonzeros are near the diagonal, 2413 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2414 or you will get TERRIBLE performance; see the users' manual chapter on 2415 matrices. 2416 2417 Level: intermediate 2418 2419 .keywords: matrix, block, aij, compressed row, sparse, parallel 2420 2421 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2422 @*/ 2423 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2424 { 2425 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2426 2427 PetscFunctionBegin; 2428 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2429 if (f) { 2430 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2431 } 2432 PetscFunctionReturn(0); 2433 } 2434 2435 #undef __FUNCT__ 2436 #define __FUNCT__ "MatCreateMPIBAIJ" 2437 /*@C 2438 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2439 (block compressed row). For good matrix assembly performance 2440 the user should preallocate the matrix storage by setting the parameters 2441 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2442 performance can be increased by more than a factor of 50. 2443 2444 Collective on MPI_Comm 2445 2446 Input Parameters: 2447 + comm - MPI communicator 2448 . bs - size of blockk 2449 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2450 This value should be the same as the local size used in creating the 2451 y vector for the matrix-vector product y = Ax. 2452 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2453 This value should be the same as the local size used in creating the 2454 x vector for the matrix-vector product y = Ax. 2455 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2456 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2457 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2458 submatrix (same for all local rows) 2459 . d_nnz - array containing the number of nonzero blocks in the various block rows 2460 of the in diagonal portion of the local (possibly different for each block 2461 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2462 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2463 submatrix (same for all local rows). 2464 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2465 off-diagonal portion of the local submatrix (possibly different for 2466 each block row) or PETSC_NULL. 2467 2468 Output Parameter: 2469 . A - the matrix 2470 2471 Options Database Keys: 2472 . -mat_no_unroll - uses code that does not unroll the loops in the 2473 block calculations (much slower) 2474 . -mat_block_size - size of the blocks to use 2475 2476 Notes: 2477 If the *_nnz parameter is given then the *_nz parameter is ignored 2478 2479 A nonzero block is any block that as 1 or more nonzeros in it 2480 2481 The user MUST specify either the local or global matrix dimensions 2482 (possibly both). 2483 2484 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2485 than it must be used on all processors that share the object for that argument. 2486 2487 Storage Information: 2488 For a square global matrix we define each processor's diagonal portion 2489 to be its local rows and the corresponding columns (a square submatrix); 2490 each processor's off-diagonal portion encompasses the remainder of the 2491 local matrix (a rectangular submatrix). 2492 2493 The user can specify preallocated storage for the diagonal part of 2494 the local submatrix with either d_nz or d_nnz (not both). Set 2495 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2496 memory allocation. Likewise, specify preallocated storage for the 2497 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2498 2499 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2500 the figure below we depict these three local rows and all columns (0-11). 2501 2502 .vb 2503 0 1 2 3 4 5 6 7 8 9 10 11 2504 ------------------- 2505 row 3 | o o o d d d o o o o o o 2506 row 4 | o o o d d d o o o o o o 2507 row 5 | o o o d d d o o o o o o 2508 ------------------- 2509 .ve 2510 2511 Thus, any entries in the d locations are stored in the d (diagonal) 2512 submatrix, and any entries in the o locations are stored in the 2513 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2514 stored simply in the MATSEQBAIJ format for compressed row storage. 2515 2516 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2517 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2518 In general, for PDE problems in which most nonzeros are near the diagonal, 2519 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2520 or you will get TERRIBLE performance; see the users' manual chapter on 2521 matrices. 2522 2523 Level: intermediate 2524 2525 .keywords: matrix, block, aij, compressed row, sparse, parallel 2526 2527 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2528 @*/ 2529 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2530 { 2531 PetscErrorCode ierr; 2532 PetscMPIInt size; 2533 2534 PetscFunctionBegin; 2535 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2536 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2537 if (size > 1) { 2538 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2539 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2540 } else { 2541 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2542 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2543 } 2544 PetscFunctionReturn(0); 2545 } 2546 2547 #undef __FUNCT__ 2548 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2549 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2550 { 2551 Mat mat; 2552 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2553 PetscErrorCode ierr; 2554 PetscInt len=0; 2555 2556 PetscFunctionBegin; 2557 *newmat = 0; 2558 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2559 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2560 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2561 2562 mat->factor = matin->factor; 2563 mat->preallocated = PETSC_TRUE; 2564 mat->assembled = PETSC_TRUE; 2565 mat->insertmode = NOT_SET_VALUES; 2566 2567 a = (Mat_MPIBAIJ*)mat->data; 2568 mat->bs = matin->bs; 2569 a->bs2 = oldmat->bs2; 2570 a->mbs = oldmat->mbs; 2571 a->nbs = oldmat->nbs; 2572 a->Mbs = oldmat->Mbs; 2573 a->Nbs = oldmat->Nbs; 2574 2575 a->rstart = oldmat->rstart; 2576 a->rend = oldmat->rend; 2577 a->cstart = oldmat->cstart; 2578 a->cend = oldmat->cend; 2579 a->size = oldmat->size; 2580 a->rank = oldmat->rank; 2581 a->donotstash = oldmat->donotstash; 2582 a->roworiented = oldmat->roworiented; 2583 a->rowindices = 0; 2584 a->rowvalues = 0; 2585 a->getrowactive = PETSC_FALSE; 2586 a->barray = 0; 2587 a->rstart_bs = oldmat->rstart_bs; 2588 a->rend_bs = oldmat->rend_bs; 2589 a->cstart_bs = oldmat->cstart_bs; 2590 a->cend_bs = oldmat->cend_bs; 2591 2592 /* hash table stuff */ 2593 a->ht = 0; 2594 a->hd = 0; 2595 a->ht_size = 0; 2596 a->ht_flag = oldmat->ht_flag; 2597 a->ht_fact = oldmat->ht_fact; 2598 a->ht_total_ct = 0; 2599 a->ht_insert_ct = 0; 2600 2601 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2602 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2603 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 2604 if (oldmat->colmap) { 2605 #if defined (PETSC_USE_CTABLE) 2606 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2607 #else 2608 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2609 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt)); 2610 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2611 #endif 2612 } else a->colmap = 0; 2613 2614 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2615 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2616 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 2617 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2618 } else a->garray = 0; 2619 2620 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2621 PetscLogObjectParent(mat,a->lvec); 2622 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2623 PetscLogObjectParent(mat,a->Mvctx); 2624 2625 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2626 PetscLogObjectParent(mat,a->A); 2627 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2628 PetscLogObjectParent(mat,a->B); 2629 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2630 *newmat = mat; 2631 2632 PetscFunctionReturn(0); 2633 } 2634 2635 #include "petscsys.h" 2636 2637 #undef __FUNCT__ 2638 #define __FUNCT__ "MatLoad_MPIBAIJ" 2639 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2640 { 2641 Mat A; 2642 PetscErrorCode ierr; 2643 int fd; 2644 PetscInt i,nz,j,rstart,rend; 2645 PetscScalar *vals,*buf; 2646 MPI_Comm comm = ((PetscObject)viewer)->comm; 2647 MPI_Status status; 2648 PetscMPIInt rank,size,maxnz; 2649 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2650 PetscInt *locrowlens,*procsnz = 0,*browners; 2651 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows; 2652 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2653 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2654 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2655 2656 PetscFunctionBegin; 2657 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2658 2659 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2660 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2661 if (!rank) { 2662 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2663 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2664 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2665 } 2666 2667 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2668 M = header[1]; N = header[2]; 2669 2670 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2671 2672 /* 2673 This code adds extra rows to make sure the number of rows is 2674 divisible by the blocksize 2675 */ 2676 Mbs = M/bs; 2677 extra_rows = bs - M + bs*Mbs; 2678 if (extra_rows == bs) extra_rows = 0; 2679 else Mbs++; 2680 if (extra_rows && !rank) { 2681 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2682 } 2683 2684 /* determine ownership of all rows */ 2685 mbs = Mbs/size + ((Mbs % size) > rank); 2686 m = mbs*bs; 2687 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2688 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2689 rowners[0] = 0; 2690 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2691 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2692 rstart = rowners[rank]; 2693 rend = rowners[rank+1]; 2694 2695 /* distribute row lengths to all processors */ 2696 ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2697 if (!rank) { 2698 mend = m; 2699 if (size == 1) mend = mend - extra_rows; 2700 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2701 for (j=mend; j<m; j++) locrowlens[j] = 1; 2702 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2703 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2704 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2705 for (j=0; j<m; j++) { 2706 procsnz[0] += locrowlens[j]; 2707 } 2708 for (i=1; i<size; i++) { 2709 mend = browners[i+1] - browners[i]; 2710 if (i == size-1) mend = mend - extra_rows; 2711 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2712 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2713 /* calculate the number of nonzeros on each processor */ 2714 for (j=0; j<browners[i+1]-browners[i]; j++) { 2715 procsnz[i] += rowlengths[j]; 2716 } 2717 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2718 } 2719 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2720 } else { 2721 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2722 } 2723 2724 if (!rank) { 2725 /* determine max buffer needed and allocate it */ 2726 maxnz = procsnz[0]; 2727 for (i=1; i<size; i++) { 2728 maxnz = PetscMax(maxnz,procsnz[i]); 2729 } 2730 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2731 2732 /* read in my part of the matrix column indices */ 2733 nz = procsnz[0]; 2734 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2735 mycols = ibuf; 2736 if (size == 1) nz -= extra_rows; 2737 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2738 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2739 2740 /* read in every ones (except the last) and ship off */ 2741 for (i=1; i<size-1; i++) { 2742 nz = procsnz[i]; 2743 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2744 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2745 } 2746 /* read in the stuff for the last proc */ 2747 if (size != 1) { 2748 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2749 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2750 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2751 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2752 } 2753 ierr = PetscFree(cols);CHKERRQ(ierr); 2754 } else { 2755 /* determine buffer space needed for message */ 2756 nz = 0; 2757 for (i=0; i<m; i++) { 2758 nz += locrowlens[i]; 2759 } 2760 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2761 mycols = ibuf; 2762 /* receive message of column indices*/ 2763 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2764 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2765 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2766 } 2767 2768 /* loop over local rows, determining number of off diagonal entries */ 2769 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2770 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2771 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2772 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2773 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2774 rowcount = 0; nzcount = 0; 2775 for (i=0; i<mbs; i++) { 2776 dcount = 0; 2777 odcount = 0; 2778 for (j=0; j<bs; j++) { 2779 kmax = locrowlens[rowcount]; 2780 for (k=0; k<kmax; k++) { 2781 tmp = mycols[nzcount++]/bs; 2782 if (!mask[tmp]) { 2783 mask[tmp] = 1; 2784 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2785 else masked1[dcount++] = tmp; 2786 } 2787 } 2788 rowcount++; 2789 } 2790 2791 dlens[i] = dcount; 2792 odlens[i] = odcount; 2793 2794 /* zero out the mask elements we set */ 2795 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2796 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2797 } 2798 2799 /* create our matrix */ 2800 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2801 ierr = MatSetType(A,type);CHKERRQ(ierr) 2802 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2803 2804 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2805 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2806 2807 if (!rank) { 2808 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2809 /* read in my part of the matrix numerical values */ 2810 nz = procsnz[0]; 2811 vals = buf; 2812 mycols = ibuf; 2813 if (size == 1) nz -= extra_rows; 2814 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2815 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2816 2817 /* insert into matrix */ 2818 jj = rstart*bs; 2819 for (i=0; i<m; i++) { 2820 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2821 mycols += locrowlens[i]; 2822 vals += locrowlens[i]; 2823 jj++; 2824 } 2825 /* read in other processors (except the last one) and ship out */ 2826 for (i=1; i<size-1; i++) { 2827 nz = procsnz[i]; 2828 vals = buf; 2829 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2830 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2831 } 2832 /* the last proc */ 2833 if (size != 1){ 2834 nz = procsnz[i] - extra_rows; 2835 vals = buf; 2836 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2837 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2838 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2839 } 2840 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2841 } else { 2842 /* receive numeric values */ 2843 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2844 2845 /* receive message of values*/ 2846 vals = buf; 2847 mycols = ibuf; 2848 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2849 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2850 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2851 2852 /* insert into matrix */ 2853 jj = rstart*bs; 2854 for (i=0; i<m; i++) { 2855 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2856 mycols += locrowlens[i]; 2857 vals += locrowlens[i]; 2858 jj++; 2859 } 2860 } 2861 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2862 ierr = PetscFree(buf);CHKERRQ(ierr); 2863 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2864 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2865 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2866 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2867 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2868 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2869 2870 *newmat = A; 2871 PetscFunctionReturn(0); 2872 } 2873 2874 #undef __FUNCT__ 2875 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2876 /*@ 2877 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2878 2879 Input Parameters: 2880 . mat - the matrix 2881 . fact - factor 2882 2883 Collective on Mat 2884 2885 Level: advanced 2886 2887 Notes: 2888 This can also be set by the command line option: -mat_use_hash_table fact 2889 2890 .keywords: matrix, hashtable, factor, HT 2891 2892 .seealso: MatSetOption() 2893 @*/ 2894 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2895 { 2896 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2897 2898 PetscFunctionBegin; 2899 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2900 if (f) { 2901 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2902 } 2903 PetscFunctionReturn(0); 2904 } 2905 2906 #undef __FUNCT__ 2907 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2908 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2909 { 2910 Mat_MPIBAIJ *baij; 2911 2912 PetscFunctionBegin; 2913 baij = (Mat_MPIBAIJ*)mat->data; 2914 baij->ht_fact = fact; 2915 PetscFunctionReturn(0); 2916 } 2917 2918 #undef __FUNCT__ 2919 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2920 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2921 { 2922 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2923 PetscFunctionBegin; 2924 *Ad = a->A; 2925 *Ao = a->B; 2926 *colmap = a->garray; 2927 PetscFunctionReturn(0); 2928 } 2929