1 2 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 3 4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 PetscInt i; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 52 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 53 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 55 56 for (i=0; i<A->m; i++){ 57 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 58 } 59 60 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 61 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 63 64 PetscFunctionReturn(0); 65 } 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 77 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 EXTERN_C_BEGIN 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 86 { 87 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 92 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 /* 98 Local utility routine that creates a mapping from the global column 99 number to the local number in the off-diagonal part of the local 100 storage of the matrix. This is done in a non scable way since the 101 length of colmap equals the global matrix length. 102 */ 103 #undef __FUNCT__ 104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 106 { 107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 108 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109 PetscErrorCode ierr; 110 PetscInt nbs = B->nbs,i,bs=mat->bs; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 120 PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt)); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 PetscInt new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatSetValues_MPIBAIJ" 285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt i,N = m*n; 290 MatScalar *vsingle; 291 292 PetscFunctionBegin; 293 if (N > b->setvalueslen) { 294 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 295 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 296 b->setvalueslen = N; 297 } 298 vsingle = b->setvaluescopy; 299 300 for (i=0; i<N; i++) { 301 vsingle[i] = v[i]; 302 } 303 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 310 { 311 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 312 PetscErrorCode ierr; 313 PetscInt i,N = m*n*b->bs2; 314 MatScalar *vsingle; 315 316 PetscFunctionBegin; 317 if (N > b->setvalueslen) { 318 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 319 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 320 b->setvalueslen = N; 321 } 322 vsingle = b->setvaluescopy; 323 for (i=0; i<N; i++) { 324 vsingle[i] = v[i]; 325 } 326 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 333 { 334 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 335 PetscErrorCode ierr; 336 PetscInt i,N = m*n; 337 MatScalar *vsingle; 338 339 PetscFunctionBegin; 340 if (N > b->setvalueslen) { 341 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 342 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 343 b->setvalueslen = N; 344 } 345 vsingle = b->setvaluescopy; 346 for (i=0; i<N; i++) { 347 vsingle[i] = v[i]; 348 } 349 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 356 { 357 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 358 PetscErrorCode ierr; 359 PetscInt i,N = m*n*b->bs2; 360 MatScalar *vsingle; 361 362 PetscFunctionBegin; 363 if (N > b->setvalueslen) { 364 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 365 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 366 b->setvalueslen = N; 367 } 368 vsingle = b->setvaluescopy; 369 for (i=0; i<N; i++) { 370 vsingle[i] = v[i]; 371 } 372 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 373 PetscFunctionReturn(0); 374 } 375 #endif 376 377 #undef __FUNCT__ 378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 380 { 381 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 382 MatScalar value; 383 PetscTruth roworiented = baij->roworiented; 384 PetscErrorCode ierr; 385 PetscInt i,j,row,col; 386 PetscInt rstart_orig=baij->rstart_bs; 387 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 388 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 389 390 /* Some Variables required in the macro */ 391 Mat A = baij->A; 392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 393 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 394 MatScalar *aa=a->a; 395 396 Mat B = baij->B; 397 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 398 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 399 MatScalar *ba=b->a; 400 401 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 402 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 403 MatScalar *ap,*bap; 404 405 PetscFunctionBegin; 406 for (i=0; i<m; i++) { 407 if (im[i] < 0) continue; 408 #if defined(PETSC_USE_BOPT_g) 409 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 410 #endif 411 if (im[i] >= rstart_orig && im[i] < rend_orig) { 412 row = im[i] - rstart_orig; 413 for (j=0; j<n; j++) { 414 if (in[j] >= cstart_orig && in[j] < cend_orig){ 415 col = in[j] - cstart_orig; 416 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 417 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 418 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 419 } else if (in[j] < 0) continue; 420 #if defined(PETSC_USE_BOPT_g) 421 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);} 422 #endif 423 else { 424 if (mat->was_assembled) { 425 if (!baij->colmap) { 426 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 427 } 428 #if defined (PETSC_USE_CTABLE) 429 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 430 col = col - 1; 431 #else 432 col = baij->colmap[in[j]/bs] - 1; 433 #endif 434 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 435 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 436 col = in[j]; 437 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 438 B = baij->B; 439 b = (Mat_SeqBAIJ*)(B)->data; 440 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 441 ba=b->a; 442 } else col += in[j]%bs; 443 } else col = in[j]; 444 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 445 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 446 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 447 } 448 } 449 } else { 450 if (!baij->donotstash) { 451 if (roworiented) { 452 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 453 } else { 454 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 455 } 456 } 457 } 458 } 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 const MatScalar *value; 468 MatScalar *barray=baij->barray; 469 PetscTruth roworiented = baij->roworiented; 470 PetscErrorCode ierr; 471 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 472 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 473 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 474 475 PetscFunctionBegin; 476 if(!barray) { 477 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 478 baij->barray = barray; 479 } 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 if (im[i] < 0) continue; 488 #if defined(PETSC_USE_BOPT_g) 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 490 #endif 491 if (im[i] >= rstart && im[i] < rend) { 492 row = im[i] - rstart; 493 for (j=0; j<n; j++) { 494 /* If NumCol = 1 then a copy is not required */ 495 if ((roworiented) && (n == 1)) { 496 barray = (MatScalar*)v + i*bs2; 497 } else if((!roworiented) && (m == 1)) { 498 barray = (MatScalar*)v + j*bs2; 499 } else { /* Here a copy is required */ 500 if (roworiented) { 501 value = v + i*(stepval+bs)*bs + j*bs; 502 } else { 503 value = v + j*(stepval+bs)*bs + i*bs; 504 } 505 for (ii=0; ii<bs; ii++,value+=stepval) { 506 for (jj=0; jj<bs; jj++) { 507 *barray++ = *value++; 508 } 509 } 510 barray -=bs2; 511 } 512 513 if (in[j] >= cstart && in[j] < cend){ 514 col = in[j] - cstart; 515 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 516 } 517 else if (in[j] < 0) continue; 518 #if defined(PETSC_USE_BOPT_g) 519 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 520 #endif 521 else { 522 if (mat->was_assembled) { 523 if (!baij->colmap) { 524 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 525 } 526 527 #if defined(PETSC_USE_BOPT_g) 528 #if defined (PETSC_USE_CTABLE) 529 { PetscInt data; 530 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 531 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 532 } 533 #else 534 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 535 #endif 536 #endif 537 #if defined (PETSC_USE_CTABLE) 538 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 539 col = (col - 1)/bs; 540 #else 541 col = (baij->colmap[in[j]] - 1)/bs; 542 #endif 543 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 544 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 545 col = in[j]; 546 } 547 } 548 else col = in[j]; 549 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 550 } 551 } 552 } else { 553 if (!baij->donotstash) { 554 if (roworiented) { 555 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 556 } else { 557 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 558 } 559 } 560 } 561 } 562 PetscFunctionReturn(0); 563 } 564 565 #define HASH_KEY 0.6180339887 566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 567 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 568 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 572 { 573 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 574 PetscTruth roworiented = baij->roworiented; 575 PetscErrorCode ierr; 576 PetscInt i,j,row,col; 577 PetscInt rstart_orig=baij->rstart_bs; 578 PetscInt rend_orig=baij->rend_bs,Nbs=baij->Nbs; 579 PetscInt h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx; 580 PetscReal tmp; 581 MatScalar **HD = baij->hd,value; 582 #if defined(PETSC_USE_BOPT_g) 583 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 584 #endif 585 586 PetscFunctionBegin; 587 588 for (i=0; i<m; i++) { 589 #if defined(PETSC_USE_BOPT_g) 590 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 591 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 592 #endif 593 row = im[i]; 594 if (row >= rstart_orig && row < rend_orig) { 595 for (j=0; j<n; j++) { 596 col = in[j]; 597 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 598 /* Look up PetscInto the Hash Table */ 599 key = (row/bs)*Nbs+(col/bs)+1; 600 h1 = HASH(size,key,tmp); 601 602 603 idx = h1; 604 #if defined(PETSC_USE_BOPT_g) 605 insert_ct++; 606 total_ct++; 607 if (HT[idx] != key) { 608 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 609 if (idx == size) { 610 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 611 if (idx == h1) { 612 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 613 } 614 } 615 } 616 #else 617 if (HT[idx] != key) { 618 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 619 if (idx == size) { 620 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 621 if (idx == h1) { 622 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 623 } 624 } 625 } 626 #endif 627 /* A HASH table entry is found, so insert the values at the correct address */ 628 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 629 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 630 } 631 } else { 632 if (!baij->donotstash) { 633 if (roworiented) { 634 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 635 } else { 636 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 637 } 638 } 639 } 640 } 641 #if defined(PETSC_USE_BOPT_g) 642 baij->ht_total_ct = total_ct; 643 baij->ht_insert_ct = insert_ct; 644 #endif 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 651 { 652 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 653 PetscTruth roworiented = baij->roworiented; 654 PetscErrorCode ierr; 655 PetscInt i,j,ii,jj,row,col; 656 PetscInt rstart=baij->rstart ; 657 PetscInt rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2; 658 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 659 PetscReal tmp; 660 MatScalar **HD = baij->hd,*baij_a; 661 const MatScalar *v_t,*value; 662 #if defined(PETSC_USE_BOPT_g) 663 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 664 #endif 665 666 PetscFunctionBegin; 667 668 if (roworiented) { 669 stepval = (n-1)*bs; 670 } else { 671 stepval = (m-1)*bs; 672 } 673 for (i=0; i<m; i++) { 674 #if defined(PETSC_USE_BOPT_g) 675 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 676 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 677 #endif 678 row = im[i]; 679 v_t = v + i*bs2; 680 if (row >= rstart && row < rend) { 681 for (j=0; j<n; j++) { 682 col = in[j]; 683 684 /* Look up into the Hash Table */ 685 key = row*Nbs+col+1; 686 h1 = HASH(size,key,tmp); 687 688 idx = h1; 689 #if defined(PETSC_USE_BOPT_g) 690 total_ct++; 691 insert_ct++; 692 if (HT[idx] != key) { 693 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 694 if (idx == size) { 695 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 696 if (idx == h1) { 697 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 698 } 699 } 700 } 701 #else 702 if (HT[idx] != key) { 703 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 704 if (idx == size) { 705 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 706 if (idx == h1) { 707 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 708 } 709 } 710 } 711 #endif 712 baij_a = HD[idx]; 713 if (roworiented) { 714 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 715 /* value = v + (i*(stepval+bs)+j)*bs; */ 716 value = v_t; 717 v_t += bs; 718 if (addv == ADD_VALUES) { 719 for (ii=0; ii<bs; ii++,value+=stepval) { 720 for (jj=ii; jj<bs2; jj+=bs) { 721 baij_a[jj] += *value++; 722 } 723 } 724 } else { 725 for (ii=0; ii<bs; ii++,value+=stepval) { 726 for (jj=ii; jj<bs2; jj+=bs) { 727 baij_a[jj] = *value++; 728 } 729 } 730 } 731 } else { 732 value = v + j*(stepval+bs)*bs + i*bs; 733 if (addv == ADD_VALUES) { 734 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 735 for (jj=0; jj<bs; jj++) { 736 baij_a[jj] += *value++; 737 } 738 } 739 } else { 740 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 741 for (jj=0; jj<bs; jj++) { 742 baij_a[jj] = *value++; 743 } 744 } 745 } 746 } 747 } 748 } else { 749 if (!baij->donotstash) { 750 if (roworiented) { 751 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 752 } else { 753 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 754 } 755 } 756 } 757 } 758 #if defined(PETSC_USE_BOPT_g) 759 baij->ht_total_ct = total_ct; 760 baij->ht_insert_ct = insert_ct; 761 #endif 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatGetValues_MPIBAIJ" 767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 768 { 769 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 770 PetscErrorCode ierr; 771 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 772 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 773 774 PetscFunctionBegin; 775 for (i=0; i<m; i++) { 776 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 777 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 778 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 779 row = idxm[i] - bsrstart; 780 for (j=0; j<n; j++) { 781 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 782 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 783 if (idxn[j] >= bscstart && idxn[j] < bscend){ 784 col = idxn[j] - bscstart; 785 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 786 } else { 787 if (!baij->colmap) { 788 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 789 } 790 #if defined (PETSC_USE_CTABLE) 791 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 792 data --; 793 #else 794 data = baij->colmap[idxn[j]/bs]-1; 795 #endif 796 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 797 else { 798 col = data + idxn[j]%bs; 799 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 800 } 801 } 802 } 803 } else { 804 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 805 } 806 } 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatNorm_MPIBAIJ" 812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 813 { 814 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 815 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 816 PetscErrorCode ierr; 817 PetscInt i,bs2=baij->bs2; 818 PetscReal sum = 0.0; 819 MatScalar *v; 820 821 PetscFunctionBegin; 822 if (baij->size == 1) { 823 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 824 } else { 825 if (type == NORM_FROBENIUS) { 826 v = amat->a; 827 for (i=0; i<amat->nz*bs2; i++) { 828 #if defined(PETSC_USE_COMPLEX) 829 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 830 #else 831 sum += (*v)*(*v); v++; 832 #endif 833 } 834 v = bmat->a; 835 for (i=0; i<bmat->nz*bs2; i++) { 836 #if defined(PETSC_USE_COMPLEX) 837 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 838 #else 839 sum += (*v)*(*v); v++; 840 #endif 841 } 842 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 843 *nrm = sqrt(*nrm); 844 } else { 845 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 846 } 847 } 848 PetscFunctionReturn(0); 849 } 850 851 852 /* 853 Creates the hash table, and sets the table 854 This table is created only once. 855 If new entried need to be added to the matrix 856 then the hash table has to be destroyed and 857 recreated. 858 */ 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 861 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 862 { 863 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 864 Mat A = baij->A,B=baij->B; 865 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 866 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 867 PetscErrorCode ierr; 868 PetscInt size,bs2=baij->bs2,rstart=baij->rstart; 869 PetscInt cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 870 PetscInt *HT,key; 871 MatScalar **HD; 872 PetscReal tmp; 873 #if defined(PETSC_USE_BOPT_g) 874 PetscInt ct=0,max=0; 875 #endif 876 877 PetscFunctionBegin; 878 baij->ht_size=(PetscInt)(factor*nz); 879 size = baij->ht_size; 880 881 if (baij->ht) { 882 PetscFunctionReturn(0); 883 } 884 885 /* Allocate Memory for Hash Table */ 886 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 887 baij->ht = (PetscInt*)(baij->hd + size); 888 HD = baij->hd; 889 HT = baij->ht; 890 891 892 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 893 894 895 /* Loop Over A */ 896 for (i=0; i<a->mbs; i++) { 897 for (j=ai[i]; j<ai[i+1]; j++) { 898 row = i+rstart; 899 col = aj[j]+cstart; 900 901 key = row*Nbs + col + 1; 902 h1 = HASH(size,key,tmp); 903 for (k=0; k<size; k++){ 904 if (!HT[(h1+k)%size]) { 905 HT[(h1+k)%size] = key; 906 HD[(h1+k)%size] = a->a + j*bs2; 907 break; 908 #if defined(PETSC_USE_BOPT_g) 909 } else { 910 ct++; 911 #endif 912 } 913 } 914 #if defined(PETSC_USE_BOPT_g) 915 if (k> max) max = k; 916 #endif 917 } 918 } 919 /* Loop Over B */ 920 for (i=0; i<b->mbs; i++) { 921 for (j=bi[i]; j<bi[i+1]; j++) { 922 row = i+rstart; 923 col = garray[bj[j]]; 924 key = row*Nbs + col + 1; 925 h1 = HASH(size,key,tmp); 926 for (k=0; k<size; k++){ 927 if (!HT[(h1+k)%size]) { 928 HT[(h1+k)%size] = key; 929 HD[(h1+k)%size] = b->a + j*bs2; 930 break; 931 #if defined(PETSC_USE_BOPT_g) 932 } else { 933 ct++; 934 #endif 935 } 936 } 937 #if defined(PETSC_USE_BOPT_g) 938 if (k> max) max = k; 939 #endif 940 } 941 } 942 943 /* Print Summary */ 944 #if defined(PETSC_USE_BOPT_g) 945 for (i=0,j=0; i<size; i++) { 946 if (HT[i]) {j++;} 947 } 948 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max); 949 #endif 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 955 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 956 { 957 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 958 PetscErrorCode ierr; 959 PetscInt nstash,reallocs; 960 InsertMode addv; 961 962 PetscFunctionBegin; 963 if (baij->donotstash) { 964 PetscFunctionReturn(0); 965 } 966 967 /* make sure all processors are either in INSERTMODE or ADDMODE */ 968 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 969 if (addv == (ADD_VALUES|INSERT_VALUES)) { 970 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 971 } 972 mat->insertmode = addv; /* in case this processor had no cache */ 973 974 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 975 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 976 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 977 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs); 978 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 979 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 985 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 986 { 987 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 988 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 989 PetscErrorCode ierr; 990 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 991 PetscInt *row,*col,other_disassembled; 992 PetscTruth r1,r2,r3; 993 MatScalar *val; 994 InsertMode addv = mat->insertmode; 995 PetscMPIInt n; 996 997 PetscFunctionBegin; 998 if (!baij->donotstash) { 999 while (1) { 1000 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1001 if (!flg) break; 1002 1003 for (i=0; i<n;) { 1004 /* Now identify the consecutive vals belonging to the same row */ 1005 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1006 if (j < n) ncols = j-i; 1007 else ncols = n-i; 1008 /* Now assemble all these values with a single function call */ 1009 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1010 i = j; 1011 } 1012 } 1013 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1014 /* Now process the block-stash. Since the values are stashed column-oriented, 1015 set the roworiented flag to column oriented, and after MatSetValues() 1016 restore the original flags */ 1017 r1 = baij->roworiented; 1018 r2 = a->roworiented; 1019 r3 = b->roworiented; 1020 baij->roworiented = PETSC_FALSE; 1021 a->roworiented = PETSC_FALSE; 1022 b->roworiented = PETSC_FALSE; 1023 while (1) { 1024 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1025 if (!flg) break; 1026 1027 for (i=0; i<n;) { 1028 /* Now identify the consecutive vals belonging to the same row */ 1029 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1030 if (j < n) ncols = j-i; 1031 else ncols = n-i; 1032 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1033 i = j; 1034 } 1035 } 1036 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1037 baij->roworiented = r1; 1038 a->roworiented = r2; 1039 b->roworiented = r3; 1040 } 1041 1042 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1043 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1044 1045 /* determine if any processor has disassembled, if so we must 1046 also disassemble ourselfs, in order that we may reassemble. */ 1047 /* 1048 if nonzero structure of submatrix B cannot change then we know that 1049 no processor disassembled thus we can skip this stuff 1050 */ 1051 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1052 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1053 if (mat->was_assembled && !other_disassembled) { 1054 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1055 } 1056 } 1057 1058 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1059 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1060 } 1061 b->compressedrow.use = PETSC_TRUE; 1062 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1063 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1064 1065 #if defined(PETSC_USE_BOPT_g) 1066 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1067 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1068 baij->ht_total_ct = 0; 1069 baij->ht_insert_ct = 0; 1070 } 1071 #endif 1072 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1073 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1074 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1075 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1076 } 1077 1078 if (baij->rowvalues) { 1079 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1080 baij->rowvalues = 0; 1081 } 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1087 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1088 { 1089 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1090 PetscErrorCode ierr; 1091 PetscMPIInt size = baij->size,rank = baij->rank; 1092 PetscInt bs = mat->bs; 1093 PetscTruth iascii,isdraw; 1094 PetscViewer sviewer; 1095 PetscViewerFormat format; 1096 1097 PetscFunctionBegin; 1098 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1099 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1100 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1101 if (iascii) { 1102 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1103 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1104 MatInfo info; 1105 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1106 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1107 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1108 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1109 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1110 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1111 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1112 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1113 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1114 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1115 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1118 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1121 PetscFunctionReturn(0); 1122 } 1123 } 1124 1125 if (isdraw) { 1126 PetscDraw draw; 1127 PetscTruth isnull; 1128 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1129 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1130 } 1131 1132 if (size == 1) { 1133 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1134 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1135 } else { 1136 /* assemble the entire matrix onto first processor. */ 1137 Mat A; 1138 Mat_SeqBAIJ *Aloc; 1139 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1140 MatScalar *a; 1141 1142 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1143 /* Perhaps this should be the type of mat? */ 1144 if (!rank) { 1145 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1146 } else { 1147 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 1148 } 1149 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1150 ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1151 PetscLogObjectParent(mat,A); 1152 1153 /* copy over the A part */ 1154 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1155 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1156 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1157 1158 for (i=0; i<mbs; i++) { 1159 rvals[0] = bs*(baij->rstart + i); 1160 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1161 for (j=ai[i]; j<ai[i+1]; j++) { 1162 col = (baij->cstart+aj[j])*bs; 1163 for (k=0; k<bs; k++) { 1164 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1165 col++; a += bs; 1166 } 1167 } 1168 } 1169 /* copy over the B part */ 1170 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1171 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1172 for (i=0; i<mbs; i++) { 1173 rvals[0] = bs*(baij->rstart + i); 1174 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1175 for (j=ai[i]; j<ai[i+1]; j++) { 1176 col = baij->garray[aj[j]]*bs; 1177 for (k=0; k<bs; k++) { 1178 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1179 col++; a += bs; 1180 } 1181 } 1182 } 1183 ierr = PetscFree(rvals);CHKERRQ(ierr); 1184 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1185 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1186 /* 1187 Everyone has to call to draw the matrix since the graphics waits are 1188 synchronized across all processors that share the PetscDraw object 1189 */ 1190 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1191 if (!rank) { 1192 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1193 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1194 } 1195 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1196 ierr = MatDestroy(A);CHKERRQ(ierr); 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatView_MPIBAIJ" 1203 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1204 { 1205 PetscErrorCode ierr; 1206 PetscTruth iascii,isdraw,issocket,isbinary; 1207 1208 PetscFunctionBegin; 1209 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1210 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1211 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1212 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1213 if (iascii || isdraw || issocket || isbinary) { 1214 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1215 } else { 1216 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1217 } 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1223 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1224 { 1225 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 #if defined(PETSC_USE_LOG) 1230 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 1231 #endif 1232 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1233 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1234 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1235 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1236 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1237 #if defined (PETSC_USE_CTABLE) 1238 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1239 #else 1240 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1241 #endif 1242 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1243 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1244 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1245 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1246 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1247 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1248 #if defined(PETSC_USE_MAT_SINGLE) 1249 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1250 #endif 1251 ierr = PetscFree(baij);CHKERRQ(ierr); 1252 1253 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1254 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1255 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1256 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1257 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1258 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1259 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatMult_MPIBAIJ" 1265 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1266 { 1267 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1268 PetscErrorCode ierr; 1269 PetscInt nt; 1270 1271 PetscFunctionBegin; 1272 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1273 if (nt != A->n) { 1274 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1275 } 1276 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1277 if (nt != A->m) { 1278 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1279 } 1280 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1281 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1282 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1283 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1284 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1290 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1291 { 1292 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1293 PetscErrorCode ierr; 1294 1295 PetscFunctionBegin; 1296 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1297 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1298 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1299 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1305 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1306 { 1307 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1308 PetscErrorCode ierr; 1309 1310 PetscFunctionBegin; 1311 /* do nondiagonal part */ 1312 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1313 /* send it on its way */ 1314 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1315 /* do local part */ 1316 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1317 /* receive remote parts: note this assumes the values are not actually */ 1318 /* inserted in yy until the next line, which is true for my implementation*/ 1319 /* but is not perhaps always true. */ 1320 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1326 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1327 { 1328 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1329 PetscErrorCode ierr; 1330 1331 PetscFunctionBegin; 1332 /* do nondiagonal part */ 1333 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1334 /* send it on its way */ 1335 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1336 /* do local part */ 1337 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1338 /* receive remote parts: note this assumes the values are not actually */ 1339 /* inserted in yy until the next line, which is true for my implementation*/ 1340 /* but is not perhaps always true. */ 1341 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 /* 1346 This only works correctly for square matrices where the subblock A->A is the 1347 diagonal block 1348 */ 1349 #undef __FUNCT__ 1350 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1351 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1352 { 1353 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1354 PetscErrorCode ierr; 1355 1356 PetscFunctionBegin; 1357 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1358 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "MatScale_MPIBAIJ" 1364 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1365 { 1366 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1367 PetscErrorCode ierr; 1368 1369 PetscFunctionBegin; 1370 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1371 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1377 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1378 { 1379 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1380 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1381 PetscErrorCode ierr; 1382 PetscInt bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1383 PetscInt nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1384 PetscInt *cmap,*idx_p,cstart = mat->cstart; 1385 1386 PetscFunctionBegin; 1387 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1388 mat->getrowactive = PETSC_TRUE; 1389 1390 if (!mat->rowvalues && (idx || v)) { 1391 /* 1392 allocate enough space to hold information from the longest row. 1393 */ 1394 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1395 PetscInt max = 1,mbs = mat->mbs,tmp; 1396 for (i=0; i<mbs; i++) { 1397 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1398 if (max < tmp) { max = tmp; } 1399 } 1400 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1401 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1402 } 1403 1404 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1405 lrow = row - brstart; 1406 1407 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1408 if (!v) {pvA = 0; pvB = 0;} 1409 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1410 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1411 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1412 nztot = nzA + nzB; 1413 1414 cmap = mat->garray; 1415 if (v || idx) { 1416 if (nztot) { 1417 /* Sort by increasing column numbers, assuming A and B already sorted */ 1418 PetscInt imark = -1; 1419 if (v) { 1420 *v = v_p = mat->rowvalues; 1421 for (i=0; i<nzB; i++) { 1422 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1423 else break; 1424 } 1425 imark = i; 1426 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1427 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1428 } 1429 if (idx) { 1430 *idx = idx_p = mat->rowindices; 1431 if (imark > -1) { 1432 for (i=0; i<imark; i++) { 1433 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1434 } 1435 } else { 1436 for (i=0; i<nzB; i++) { 1437 if (cmap[cworkB[i]/bs] < cstart) 1438 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1439 else break; 1440 } 1441 imark = i; 1442 } 1443 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1444 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1445 } 1446 } else { 1447 if (idx) *idx = 0; 1448 if (v) *v = 0; 1449 } 1450 } 1451 *nz = nztot; 1452 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1453 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1459 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1460 { 1461 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1462 1463 PetscFunctionBegin; 1464 if (baij->getrowactive == PETSC_FALSE) { 1465 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1466 } 1467 baij->getrowactive = PETSC_FALSE; 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1473 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1474 { 1475 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1476 PetscErrorCode ierr; 1477 1478 PetscFunctionBegin; 1479 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1480 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } 1483 1484 #undef __FUNCT__ 1485 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1486 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1487 { 1488 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1489 Mat A = a->A,B = a->B; 1490 PetscErrorCode ierr; 1491 PetscReal isend[5],irecv[5]; 1492 1493 PetscFunctionBegin; 1494 info->block_size = (PetscReal)matin->bs; 1495 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1496 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1497 isend[3] = info->memory; isend[4] = info->mallocs; 1498 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1499 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1500 isend[3] += info->memory; isend[4] += info->mallocs; 1501 if (flag == MAT_LOCAL) { 1502 info->nz_used = isend[0]; 1503 info->nz_allocated = isend[1]; 1504 info->nz_unneeded = isend[2]; 1505 info->memory = isend[3]; 1506 info->mallocs = isend[4]; 1507 } else if (flag == MAT_GLOBAL_MAX) { 1508 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1509 info->nz_used = irecv[0]; 1510 info->nz_allocated = irecv[1]; 1511 info->nz_unneeded = irecv[2]; 1512 info->memory = irecv[3]; 1513 info->mallocs = irecv[4]; 1514 } else if (flag == MAT_GLOBAL_SUM) { 1515 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1516 info->nz_used = irecv[0]; 1517 info->nz_allocated = irecv[1]; 1518 info->nz_unneeded = irecv[2]; 1519 info->memory = irecv[3]; 1520 info->mallocs = irecv[4]; 1521 } else { 1522 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1523 } 1524 info->rows_global = (PetscReal)A->M; 1525 info->columns_global = (PetscReal)A->N; 1526 info->rows_local = (PetscReal)A->m; 1527 info->columns_local = (PetscReal)A->N; 1528 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1529 info->fill_ratio_needed = 0; 1530 info->factor_mallocs = 0; 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1536 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1537 { 1538 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1539 PetscErrorCode ierr; 1540 1541 PetscFunctionBegin; 1542 switch (op) { 1543 case MAT_NO_NEW_NONZERO_LOCATIONS: 1544 case MAT_YES_NEW_NONZERO_LOCATIONS: 1545 case MAT_COLUMNS_UNSORTED: 1546 case MAT_COLUMNS_SORTED: 1547 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1548 case MAT_KEEP_ZEROED_ROWS: 1549 case MAT_NEW_NONZERO_LOCATION_ERR: 1550 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1551 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1552 break; 1553 case MAT_ROW_ORIENTED: 1554 a->roworiented = PETSC_TRUE; 1555 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1556 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1557 break; 1558 case MAT_ROWS_SORTED: 1559 case MAT_ROWS_UNSORTED: 1560 case MAT_YES_NEW_DIAGONALS: 1561 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1562 break; 1563 case MAT_COLUMN_ORIENTED: 1564 a->roworiented = PETSC_FALSE; 1565 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1566 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1567 break; 1568 case MAT_IGNORE_OFF_PROC_ENTRIES: 1569 a->donotstash = PETSC_TRUE; 1570 break; 1571 case MAT_NO_NEW_DIAGONALS: 1572 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1573 case MAT_USE_HASH_TABLE: 1574 a->ht_flag = PETSC_TRUE; 1575 break; 1576 case MAT_SYMMETRIC: 1577 case MAT_STRUCTURALLY_SYMMETRIC: 1578 case MAT_HERMITIAN: 1579 case MAT_SYMMETRY_ETERNAL: 1580 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1581 break; 1582 case MAT_NOT_SYMMETRIC: 1583 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1584 case MAT_NOT_HERMITIAN: 1585 case MAT_NOT_SYMMETRY_ETERNAL: 1586 break; 1587 default: 1588 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1589 } 1590 PetscFunctionReturn(0); 1591 } 1592 1593 #undef __FUNCT__ 1594 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1595 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1596 { 1597 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1598 Mat_SeqBAIJ *Aloc; 1599 Mat B; 1600 PetscErrorCode ierr; 1601 PetscInt M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1602 PetscInt bs=A->bs,mbs=baij->mbs; 1603 MatScalar *a; 1604 1605 PetscFunctionBegin; 1606 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1607 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1608 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1609 ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1610 1611 /* copy over the A part */ 1612 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1613 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1614 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1615 1616 for (i=0; i<mbs; i++) { 1617 rvals[0] = bs*(baij->rstart + i); 1618 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1619 for (j=ai[i]; j<ai[i+1]; j++) { 1620 col = (baij->cstart+aj[j])*bs; 1621 for (k=0; k<bs; k++) { 1622 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1623 col++; a += bs; 1624 } 1625 } 1626 } 1627 /* copy over the B part */ 1628 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1629 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1630 for (i=0; i<mbs; i++) { 1631 rvals[0] = bs*(baij->rstart + i); 1632 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1633 for (j=ai[i]; j<ai[i+1]; j++) { 1634 col = baij->garray[aj[j]]*bs; 1635 for (k=0; k<bs; k++) { 1636 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1637 col++; a += bs; 1638 } 1639 } 1640 } 1641 ierr = PetscFree(rvals);CHKERRQ(ierr); 1642 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1644 1645 if (matout) { 1646 *matout = B; 1647 } else { 1648 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1649 } 1650 PetscFunctionReturn(0); 1651 } 1652 1653 #undef __FUNCT__ 1654 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1655 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1656 { 1657 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1658 Mat a = baij->A,b = baij->B; 1659 PetscErrorCode ierr; 1660 PetscInt s1,s2,s3; 1661 1662 PetscFunctionBegin; 1663 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1664 if (rr) { 1665 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1666 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1667 /* Overlap communication with computation. */ 1668 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1669 } 1670 if (ll) { 1671 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1672 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1673 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1674 } 1675 /* scale the diagonal block */ 1676 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1677 1678 if (rr) { 1679 /* Do a scatter end and then right scale the off-diagonal block */ 1680 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1681 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1682 } 1683 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1689 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1690 { 1691 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1692 PetscErrorCode ierr; 1693 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1694 PetscInt i,N,*rows,*owners = l->rowners; 1695 PetscInt *nprocs,j,idx,nsends,row; 1696 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1697 PetscInt *rvalues,tag = A->tag,count,base,slen,*source; 1698 PetscInt *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs; 1699 MPI_Comm comm = A->comm; 1700 MPI_Request *send_waits,*recv_waits; 1701 MPI_Status recv_status,*send_status; 1702 IS istmp; 1703 PetscTruth found; 1704 1705 PetscFunctionBegin; 1706 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1707 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1708 1709 /* first count number of contributors to each processor */ 1710 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1711 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1712 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1713 for (i=0; i<N; i++) { 1714 idx = rows[i]; 1715 found = PETSC_FALSE; 1716 for (j=0; j<size; j++) { 1717 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1718 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 1719 } 1720 } 1721 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1722 } 1723 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1724 1725 /* inform other processors of number of messages and max length*/ 1726 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1727 1728 /* post receives: */ 1729 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1730 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1731 for (i=0; i<nrecvs; i++) { 1732 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1733 } 1734 1735 /* do sends: 1736 1) starts[i] gives the starting index in svalues for stuff going to 1737 the ith processor 1738 */ 1739 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1740 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1741 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1742 starts[0] = 0; 1743 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1744 for (i=0; i<N; i++) { 1745 svalues[starts[owner[i]]++] = rows[i]; 1746 } 1747 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1748 1749 starts[0] = 0; 1750 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1751 count = 0; 1752 for (i=0; i<size; i++) { 1753 if (nprocs[2*i+1]) { 1754 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1755 } 1756 } 1757 ierr = PetscFree(starts);CHKERRQ(ierr); 1758 1759 base = owners[rank]*bs; 1760 1761 /* wait on receives */ 1762 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1763 source = lens + nrecvs; 1764 count = nrecvs; slen = 0; 1765 while (count) { 1766 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1767 /* unpack receives into our local space */ 1768 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1769 source[imdex] = recv_status.MPI_SOURCE; 1770 lens[imdex] = n; 1771 slen += n; 1772 count--; 1773 } 1774 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1775 1776 /* move the data into the send scatter */ 1777 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1778 count = 0; 1779 for (i=0; i<nrecvs; i++) { 1780 values = rvalues + i*nmax; 1781 for (j=0; j<lens[i]; j++) { 1782 lrows[count++] = values[j] - base; 1783 } 1784 } 1785 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1786 ierr = PetscFree(lens);CHKERRQ(ierr); 1787 ierr = PetscFree(owner);CHKERRQ(ierr); 1788 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1789 1790 /* actually zap the local rows */ 1791 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1792 PetscLogObjectParent(A,istmp); 1793 1794 /* 1795 Zero the required rows. If the "diagonal block" of the matrix 1796 is square and the user wishes to set the diagonal we use seperate 1797 code so that MatSetValues() is not called for each diagonal allocating 1798 new memory, thus calling lots of mallocs and slowing things down. 1799 1800 Contributed by: Mathew Knepley 1801 */ 1802 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1803 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1804 if (diag && (l->A->M == l->A->N)) { 1805 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1806 } else if (diag) { 1807 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1808 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1809 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1810 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1811 } 1812 for (i=0; i<slen; i++) { 1813 row = lrows[i] + rstart_bs; 1814 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1815 } 1816 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1817 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1818 } else { 1819 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1820 } 1821 1822 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1823 ierr = PetscFree(lrows);CHKERRQ(ierr); 1824 1825 /* wait on sends */ 1826 if (nsends) { 1827 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1828 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1829 ierr = PetscFree(send_status);CHKERRQ(ierr); 1830 } 1831 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1832 ierr = PetscFree(svalues);CHKERRQ(ierr); 1833 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1839 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1840 { 1841 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1842 MPI_Comm comm = A->comm; 1843 static PetscTruth called = PETSC_FALSE; 1844 PetscErrorCode ierr; 1845 1846 PetscFunctionBegin; 1847 if (!a->rank) { 1848 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1849 } 1850 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1851 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1852 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1853 PetscFunctionReturn(0); 1854 } 1855 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1858 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1859 { 1860 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1861 PetscErrorCode ierr; 1862 1863 PetscFunctionBegin; 1864 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1865 PetscFunctionReturn(0); 1866 } 1867 1868 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatEqual_MPIBAIJ" 1872 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1873 { 1874 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1875 Mat a,b,c,d; 1876 PetscTruth flg; 1877 PetscErrorCode ierr; 1878 1879 PetscFunctionBegin; 1880 a = matA->A; b = matA->B; 1881 c = matB->A; d = matB->B; 1882 1883 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1884 if (flg == PETSC_TRUE) { 1885 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1886 } 1887 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1888 PetscFunctionReturn(0); 1889 } 1890 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1894 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1895 { 1896 PetscErrorCode ierr; 1897 1898 PetscFunctionBegin; 1899 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1900 PetscFunctionReturn(0); 1901 } 1902 1903 /* -------------------------------------------------------------------*/ 1904 static struct _MatOps MatOps_Values = { 1905 MatSetValues_MPIBAIJ, 1906 MatGetRow_MPIBAIJ, 1907 MatRestoreRow_MPIBAIJ, 1908 MatMult_MPIBAIJ, 1909 /* 4*/ MatMultAdd_MPIBAIJ, 1910 MatMultTranspose_MPIBAIJ, 1911 MatMultTransposeAdd_MPIBAIJ, 1912 0, 1913 0, 1914 0, 1915 /*10*/ 0, 1916 0, 1917 0, 1918 0, 1919 MatTranspose_MPIBAIJ, 1920 /*15*/ MatGetInfo_MPIBAIJ, 1921 MatEqual_MPIBAIJ, 1922 MatGetDiagonal_MPIBAIJ, 1923 MatDiagonalScale_MPIBAIJ, 1924 MatNorm_MPIBAIJ, 1925 /*20*/ MatAssemblyBegin_MPIBAIJ, 1926 MatAssemblyEnd_MPIBAIJ, 1927 0, 1928 MatSetOption_MPIBAIJ, 1929 MatZeroEntries_MPIBAIJ, 1930 /*25*/ MatZeroRows_MPIBAIJ, 1931 0, 1932 0, 1933 0, 1934 0, 1935 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1936 0, 1937 0, 1938 0, 1939 0, 1940 /*35*/ MatDuplicate_MPIBAIJ, 1941 0, 1942 0, 1943 0, 1944 0, 1945 /*40*/ 0, 1946 MatGetSubMatrices_MPIBAIJ, 1947 MatIncreaseOverlap_MPIBAIJ, 1948 MatGetValues_MPIBAIJ, 1949 0, 1950 /*45*/ MatPrintHelp_MPIBAIJ, 1951 MatScale_MPIBAIJ, 1952 0, 1953 0, 1954 0, 1955 /*50*/ 0, 1956 0, 1957 0, 1958 0, 1959 0, 1960 /*55*/ 0, 1961 0, 1962 MatSetUnfactored_MPIBAIJ, 1963 0, 1964 MatSetValuesBlocked_MPIBAIJ, 1965 /*60*/ 0, 1966 MatDestroy_MPIBAIJ, 1967 MatView_MPIBAIJ, 1968 MatGetPetscMaps_Petsc, 1969 0, 1970 /*65*/ 0, 1971 0, 1972 0, 1973 0, 1974 0, 1975 /*70*/ MatGetRowMax_MPIBAIJ, 1976 0, 1977 0, 1978 0, 1979 0, 1980 /*75*/ 0, 1981 0, 1982 0, 1983 0, 1984 0, 1985 /*80*/ 0, 1986 0, 1987 0, 1988 0, 1989 MatLoad_MPIBAIJ, 1990 /*85*/ 0, 1991 0, 1992 0, 1993 0, 1994 0, 1995 /*90*/ 0, 1996 0, 1997 0, 1998 0, 1999 0, 2000 /*95*/ 0, 2001 0, 2002 0, 2003 0}; 2004 2005 2006 EXTERN_C_BEGIN 2007 #undef __FUNCT__ 2008 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2009 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2010 { 2011 PetscFunctionBegin; 2012 *a = ((Mat_MPIBAIJ *)A->data)->A; 2013 *iscopy = PETSC_FALSE; 2014 PetscFunctionReturn(0); 2015 } 2016 EXTERN_C_END 2017 2018 EXTERN_C_BEGIN 2019 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*); 2020 EXTERN_C_END 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2024 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2025 { 2026 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2027 PetscInt m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2028 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2029 const PetscInt *JJ; 2030 PetscScalar *values; 2031 PetscErrorCode ierr; 2032 2033 PetscFunctionBegin; 2034 #if defined(PETSC_OPT_g) 2035 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2036 #endif 2037 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2038 o_nnz = d_nnz + m; 2039 2040 for (i=0; i<m; i++) { 2041 nnz = I[i+1]- I[i]; 2042 JJ = J + I[i]; 2043 nnz_max = PetscMax(nnz_max,nnz); 2044 #if defined(PETSC_OPT_g) 2045 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2046 #endif 2047 for (j=0; j<nnz; j++) { 2048 if (*JJ >= cstart) break; 2049 JJ++; 2050 } 2051 d = 0; 2052 for (; j<nnz; j++) { 2053 if (*JJ++ >= cend) break; 2054 d++; 2055 } 2056 d_nnz[i] = d; 2057 o_nnz[i] = nnz - d; 2058 } 2059 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2060 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2061 2062 if (v) values = (PetscScalar*)v; 2063 else { 2064 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2065 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2066 } 2067 2068 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2069 for (i=0; i<m; i++) { 2070 ii = i + rstart; 2071 nnz = I[i+1]- I[i]; 2072 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2073 } 2074 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2075 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2076 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2077 2078 if (!v) { 2079 ierr = PetscFree(values);CHKERRQ(ierr); 2080 } 2081 PetscFunctionReturn(0); 2082 } 2083 2084 #undef __FUNCT__ 2085 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2086 /*@C 2087 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2088 (the default parallel PETSc format). 2089 2090 Collective on MPI_Comm 2091 2092 Input Parameters: 2093 + A - the matrix 2094 . i - the indices into j for the start of each local row (starts with zero) 2095 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2096 - v - optional values in the matrix 2097 2098 Level: developer 2099 2100 .keywords: matrix, aij, compressed row, sparse, parallel 2101 2102 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2103 @*/ 2104 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2105 { 2106 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2107 2108 PetscFunctionBegin; 2109 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2110 if (f) { 2111 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2112 } 2113 PetscFunctionReturn(0); 2114 } 2115 2116 EXTERN_C_BEGIN 2117 #undef __FUNCT__ 2118 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2119 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2120 { 2121 Mat_MPIBAIJ *b; 2122 PetscErrorCode ierr; 2123 PetscInt i; 2124 2125 PetscFunctionBegin; 2126 B->preallocated = PETSC_TRUE; 2127 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2128 2129 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2130 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2131 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2132 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2133 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2134 if (d_nnz) { 2135 for (i=0; i<B->m/bs; i++) { 2136 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2137 } 2138 } 2139 if (o_nnz) { 2140 for (i=0; i<B->m/bs; i++) { 2141 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2142 } 2143 } 2144 2145 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2146 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2147 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2148 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2149 2150 b = (Mat_MPIBAIJ*)B->data; 2151 B->bs = bs; 2152 b->bs2 = bs*bs; 2153 b->mbs = B->m/bs; 2154 b->nbs = B->n/bs; 2155 b->Mbs = B->M/bs; 2156 b->Nbs = B->N/bs; 2157 2158 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2159 b->rowners[0] = 0; 2160 for (i=2; i<=b->size; i++) { 2161 b->rowners[i] += b->rowners[i-1]; 2162 } 2163 b->rstart = b->rowners[b->rank]; 2164 b->rend = b->rowners[b->rank+1]; 2165 2166 ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2167 b->cowners[0] = 0; 2168 for (i=2; i<=b->size; i++) { 2169 b->cowners[i] += b->cowners[i-1]; 2170 } 2171 b->cstart = b->cowners[b->rank]; 2172 b->cend = b->cowners[b->rank+1]; 2173 2174 for (i=0; i<=b->size; i++) { 2175 b->rowners_bs[i] = b->rowners[i]*bs; 2176 } 2177 b->rstart_bs = b->rstart*bs; 2178 b->rend_bs = b->rend*bs; 2179 b->cstart_bs = b->cstart*bs; 2180 b->cend_bs = b->cend*bs; 2181 2182 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2183 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2184 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2185 PetscLogObjectParent(B,b->A); 2186 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2187 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2188 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2189 PetscLogObjectParent(B,b->B); 2190 2191 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2192 2193 PetscFunctionReturn(0); 2194 } 2195 EXTERN_C_END 2196 2197 EXTERN_C_BEGIN 2198 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2199 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2200 EXTERN_C_END 2201 2202 /*MC 2203 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2204 2205 Options Database Keys: 2206 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2207 2208 Level: beginner 2209 2210 .seealso: MatCreateMPIBAIJ 2211 M*/ 2212 2213 EXTERN_C_BEGIN 2214 #undef __FUNCT__ 2215 #define __FUNCT__ "MatCreate_MPIBAIJ" 2216 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2217 { 2218 Mat_MPIBAIJ *b; 2219 PetscErrorCode ierr; 2220 PetscTruth flg; 2221 2222 PetscFunctionBegin; 2223 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2224 B->data = (void*)b; 2225 2226 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2227 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2228 B->mapping = 0; 2229 B->factor = 0; 2230 B->assembled = PETSC_FALSE; 2231 2232 B->insertmode = NOT_SET_VALUES; 2233 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2234 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2235 2236 /* build local table of row and column ownerships */ 2237 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 2238 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2239 b->cowners = b->rowners + b->size + 2; 2240 b->rowners_bs = b->cowners + b->size + 2; 2241 2242 /* build cache for off array entries formed */ 2243 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2244 b->donotstash = PETSC_FALSE; 2245 b->colmap = PETSC_NULL; 2246 b->garray = PETSC_NULL; 2247 b->roworiented = PETSC_TRUE; 2248 2249 #if defined(PETSC_USE_MAT_SINGLE) 2250 /* stuff for MatSetValues_XXX in single precision */ 2251 b->setvalueslen = 0; 2252 b->setvaluescopy = PETSC_NULL; 2253 #endif 2254 2255 /* stuff used in block assembly */ 2256 b->barray = 0; 2257 2258 /* stuff used for matrix vector multiply */ 2259 b->lvec = 0; 2260 b->Mvctx = 0; 2261 2262 /* stuff for MatGetRow() */ 2263 b->rowindices = 0; 2264 b->rowvalues = 0; 2265 b->getrowactive = PETSC_FALSE; 2266 2267 /* hash table stuff */ 2268 b->ht = 0; 2269 b->hd = 0; 2270 b->ht_size = 0; 2271 b->ht_flag = PETSC_FALSE; 2272 b->ht_fact = 0; 2273 b->ht_total_ct = 0; 2274 b->ht_insert_ct = 0; 2275 2276 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2277 if (flg) { 2278 PetscReal fact = 1.39; 2279 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2280 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2281 if (fact <= 1.0) fact = 1.39; 2282 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2283 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2284 } 2285 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2286 "MatStoreValues_MPIBAIJ", 2287 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2288 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2289 "MatRetrieveValues_MPIBAIJ", 2290 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2291 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2292 "MatGetDiagonalBlock_MPIBAIJ", 2293 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2294 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2295 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2296 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2297 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2298 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2299 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2300 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2301 "MatDiagonalScaleLocal_MPIBAIJ", 2302 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2303 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2304 "MatSetHashTableFactor_MPIBAIJ", 2305 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2306 PetscFunctionReturn(0); 2307 } 2308 EXTERN_C_END 2309 2310 /*MC 2311 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2312 2313 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2314 and MATMPIBAIJ otherwise. 2315 2316 Options Database Keys: 2317 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2318 2319 Level: beginner 2320 2321 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2322 M*/ 2323 2324 EXTERN_C_BEGIN 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatCreate_BAIJ" 2327 PetscErrorCode MatCreate_BAIJ(Mat A) 2328 { 2329 PetscErrorCode ierr; 2330 PetscMPIInt size; 2331 2332 PetscFunctionBegin; 2333 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2334 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2335 if (size == 1) { 2336 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2337 } else { 2338 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2339 } 2340 PetscFunctionReturn(0); 2341 } 2342 EXTERN_C_END 2343 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2346 /*@C 2347 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2348 (block compressed row). For good matrix assembly performance 2349 the user should preallocate the matrix storage by setting the parameters 2350 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2351 performance can be increased by more than a factor of 50. 2352 2353 Collective on Mat 2354 2355 Input Parameters: 2356 + A - the matrix 2357 . bs - size of blockk 2358 . d_nz - number of block nonzeros per block row in diagonal portion of local 2359 submatrix (same for all local rows) 2360 . d_nnz - array containing the number of block nonzeros in the various block rows 2361 of the in diagonal portion of the local (possibly different for each block 2362 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2363 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2364 submatrix (same for all local rows). 2365 - o_nnz - array containing the number of nonzeros in the various block rows of the 2366 off-diagonal portion of the local submatrix (possibly different for 2367 each block row) or PETSC_NULL. 2368 2369 If the *_nnz parameter is given then the *_nz parameter is ignored 2370 2371 Options Database Keys: 2372 . -mat_no_unroll - uses code that does not unroll the loops in the 2373 block calculations (much slower) 2374 . -mat_block_size - size of the blocks to use 2375 2376 Notes: 2377 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2378 than it must be used on all processors that share the object for that argument. 2379 2380 Storage Information: 2381 For a square global matrix we define each processor's diagonal portion 2382 to be its local rows and the corresponding columns (a square submatrix); 2383 each processor's off-diagonal portion encompasses the remainder of the 2384 local matrix (a rectangular submatrix). 2385 2386 The user can specify preallocated storage for the diagonal part of 2387 the local submatrix with either d_nz or d_nnz (not both). Set 2388 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2389 memory allocation. Likewise, specify preallocated storage for the 2390 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2391 2392 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2393 the figure below we depict these three local rows and all columns (0-11). 2394 2395 .vb 2396 0 1 2 3 4 5 6 7 8 9 10 11 2397 ------------------- 2398 row 3 | o o o d d d o o o o o o 2399 row 4 | o o o d d d o o o o o o 2400 row 5 | o o o d d d o o o o o o 2401 ------------------- 2402 .ve 2403 2404 Thus, any entries in the d locations are stored in the d (diagonal) 2405 submatrix, and any entries in the o locations are stored in the 2406 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2407 stored simply in the MATSEQBAIJ format for compressed row storage. 2408 2409 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2410 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2411 In general, for PDE problems in which most nonzeros are near the diagonal, 2412 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2413 or you will get TERRIBLE performance; see the users' manual chapter on 2414 matrices. 2415 2416 Level: intermediate 2417 2418 .keywords: matrix, block, aij, compressed row, sparse, parallel 2419 2420 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2421 @*/ 2422 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2423 { 2424 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2425 2426 PetscFunctionBegin; 2427 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2428 if (f) { 2429 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2430 } 2431 PetscFunctionReturn(0); 2432 } 2433 2434 #undef __FUNCT__ 2435 #define __FUNCT__ "MatCreateMPIBAIJ" 2436 /*@C 2437 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2438 (block compressed row). For good matrix assembly performance 2439 the user should preallocate the matrix storage by setting the parameters 2440 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2441 performance can be increased by more than a factor of 50. 2442 2443 Collective on MPI_Comm 2444 2445 Input Parameters: 2446 + comm - MPI communicator 2447 . bs - size of blockk 2448 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2449 This value should be the same as the local size used in creating the 2450 y vector for the matrix-vector product y = Ax. 2451 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2452 This value should be the same as the local size used in creating the 2453 x vector for the matrix-vector product y = Ax. 2454 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2455 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2456 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2457 submatrix (same for all local rows) 2458 . d_nnz - array containing the number of nonzero blocks in the various block rows 2459 of the in diagonal portion of the local (possibly different for each block 2460 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2461 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2462 submatrix (same for all local rows). 2463 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2464 off-diagonal portion of the local submatrix (possibly different for 2465 each block row) or PETSC_NULL. 2466 2467 Output Parameter: 2468 . A - the matrix 2469 2470 Options Database Keys: 2471 . -mat_no_unroll - uses code that does not unroll the loops in the 2472 block calculations (much slower) 2473 . -mat_block_size - size of the blocks to use 2474 2475 Notes: 2476 If the *_nnz parameter is given then the *_nz parameter is ignored 2477 2478 A nonzero block is any block that as 1 or more nonzeros in it 2479 2480 The user MUST specify either the local or global matrix dimensions 2481 (possibly both). 2482 2483 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2484 than it must be used on all processors that share the object for that argument. 2485 2486 Storage Information: 2487 For a square global matrix we define each processor's diagonal portion 2488 to be its local rows and the corresponding columns (a square submatrix); 2489 each processor's off-diagonal portion encompasses the remainder of the 2490 local matrix (a rectangular submatrix). 2491 2492 The user can specify preallocated storage for the diagonal part of 2493 the local submatrix with either d_nz or d_nnz (not both). Set 2494 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2495 memory allocation. Likewise, specify preallocated storage for the 2496 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2497 2498 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2499 the figure below we depict these three local rows and all columns (0-11). 2500 2501 .vb 2502 0 1 2 3 4 5 6 7 8 9 10 11 2503 ------------------- 2504 row 3 | o o o d d d o o o o o o 2505 row 4 | o o o d d d o o o o o o 2506 row 5 | o o o d d d o o o o o o 2507 ------------------- 2508 .ve 2509 2510 Thus, any entries in the d locations are stored in the d (diagonal) 2511 submatrix, and any entries in the o locations are stored in the 2512 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2513 stored simply in the MATSEQBAIJ format for compressed row storage. 2514 2515 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2516 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2517 In general, for PDE problems in which most nonzeros are near the diagonal, 2518 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2519 or you will get TERRIBLE performance; see the users' manual chapter on 2520 matrices. 2521 2522 Level: intermediate 2523 2524 .keywords: matrix, block, aij, compressed row, sparse, parallel 2525 2526 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2527 @*/ 2528 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2529 { 2530 PetscErrorCode ierr; 2531 PetscMPIInt size; 2532 2533 PetscFunctionBegin; 2534 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2535 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2536 if (size > 1) { 2537 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2538 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2539 } else { 2540 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2541 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2542 } 2543 PetscFunctionReturn(0); 2544 } 2545 2546 #undef __FUNCT__ 2547 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2548 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2549 { 2550 Mat mat; 2551 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2552 PetscErrorCode ierr; 2553 PetscInt len=0; 2554 2555 PetscFunctionBegin; 2556 *newmat = 0; 2557 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2558 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2559 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2560 2561 mat->factor = matin->factor; 2562 mat->preallocated = PETSC_TRUE; 2563 mat->assembled = PETSC_TRUE; 2564 mat->insertmode = NOT_SET_VALUES; 2565 2566 a = (Mat_MPIBAIJ*)mat->data; 2567 mat->bs = matin->bs; 2568 a->bs2 = oldmat->bs2; 2569 a->mbs = oldmat->mbs; 2570 a->nbs = oldmat->nbs; 2571 a->Mbs = oldmat->Mbs; 2572 a->Nbs = oldmat->Nbs; 2573 2574 a->rstart = oldmat->rstart; 2575 a->rend = oldmat->rend; 2576 a->cstart = oldmat->cstart; 2577 a->cend = oldmat->cend; 2578 a->size = oldmat->size; 2579 a->rank = oldmat->rank; 2580 a->donotstash = oldmat->donotstash; 2581 a->roworiented = oldmat->roworiented; 2582 a->rowindices = 0; 2583 a->rowvalues = 0; 2584 a->getrowactive = PETSC_FALSE; 2585 a->barray = 0; 2586 a->rstart_bs = oldmat->rstart_bs; 2587 a->rend_bs = oldmat->rend_bs; 2588 a->cstart_bs = oldmat->cstart_bs; 2589 a->cend_bs = oldmat->cend_bs; 2590 2591 /* hash table stuff */ 2592 a->ht = 0; 2593 a->hd = 0; 2594 a->ht_size = 0; 2595 a->ht_flag = oldmat->ht_flag; 2596 a->ht_fact = oldmat->ht_fact; 2597 a->ht_total_ct = 0; 2598 a->ht_insert_ct = 0; 2599 2600 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2601 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2602 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 2603 if (oldmat->colmap) { 2604 #if defined (PETSC_USE_CTABLE) 2605 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2606 #else 2607 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2608 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt)); 2609 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2610 #endif 2611 } else a->colmap = 0; 2612 2613 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2614 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2615 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 2616 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2617 } else a->garray = 0; 2618 2619 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2620 PetscLogObjectParent(mat,a->lvec); 2621 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2622 PetscLogObjectParent(mat,a->Mvctx); 2623 2624 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2625 PetscLogObjectParent(mat,a->A); 2626 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2627 PetscLogObjectParent(mat,a->B); 2628 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2629 *newmat = mat; 2630 2631 PetscFunctionReturn(0); 2632 } 2633 2634 #include "petscsys.h" 2635 2636 #undef __FUNCT__ 2637 #define __FUNCT__ "MatLoad_MPIBAIJ" 2638 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2639 { 2640 Mat A; 2641 PetscErrorCode ierr; 2642 int fd; 2643 PetscInt i,nz,j,rstart,rend; 2644 PetscScalar *vals,*buf; 2645 MPI_Comm comm = ((PetscObject)viewer)->comm; 2646 MPI_Status status; 2647 PetscMPIInt rank,size,maxnz; 2648 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2649 PetscInt *locrowlens,*procsnz = 0,*browners; 2650 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows; 2651 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2652 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2653 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2654 2655 PetscFunctionBegin; 2656 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2657 2658 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2659 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2660 if (!rank) { 2661 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2662 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2663 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2664 } 2665 2666 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2667 M = header[1]; N = header[2]; 2668 2669 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2670 2671 /* 2672 This code adds extra rows to make sure the number of rows is 2673 divisible by the blocksize 2674 */ 2675 Mbs = M/bs; 2676 extra_rows = bs - M + bs*Mbs; 2677 if (extra_rows == bs) extra_rows = 0; 2678 else Mbs++; 2679 if (extra_rows && !rank) { 2680 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2681 } 2682 2683 /* determine ownership of all rows */ 2684 mbs = Mbs/size + ((Mbs % size) > rank); 2685 m = mbs*bs; 2686 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2687 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2688 rowners[0] = 0; 2689 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2690 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2691 rstart = rowners[rank]; 2692 rend = rowners[rank+1]; 2693 2694 /* distribute row lengths to all processors */ 2695 ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2696 if (!rank) { 2697 mend = m; 2698 if (size == 1) mend = mend - extra_rows; 2699 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2700 for (j=mend; j<m; j++) locrowlens[j] = 1; 2701 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2702 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2703 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2704 for (j=0; j<m; j++) { 2705 procsnz[0] += locrowlens[j]; 2706 } 2707 for (i=1; i<size; i++) { 2708 mend = browners[i+1] - browners[i]; 2709 if (i == size-1) mend = mend - extra_rows; 2710 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2711 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2712 /* calculate the number of nonzeros on each processor */ 2713 for (j=0; j<browners[i+1]-browners[i]; j++) { 2714 procsnz[i] += rowlengths[j]; 2715 } 2716 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2717 } 2718 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2719 } else { 2720 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2721 } 2722 2723 if (!rank) { 2724 /* determine max buffer needed and allocate it */ 2725 maxnz = procsnz[0]; 2726 for (i=1; i<size; i++) { 2727 maxnz = PetscMax(maxnz,procsnz[i]); 2728 } 2729 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2730 2731 /* read in my part of the matrix column indices */ 2732 nz = procsnz[0]; 2733 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2734 mycols = ibuf; 2735 if (size == 1) nz -= extra_rows; 2736 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2737 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2738 2739 /* read in every ones (except the last) and ship off */ 2740 for (i=1; i<size-1; i++) { 2741 nz = procsnz[i]; 2742 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2743 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2744 } 2745 /* read in the stuff for the last proc */ 2746 if (size != 1) { 2747 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2748 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2749 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2750 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2751 } 2752 ierr = PetscFree(cols);CHKERRQ(ierr); 2753 } else { 2754 /* determine buffer space needed for message */ 2755 nz = 0; 2756 for (i=0; i<m; i++) { 2757 nz += locrowlens[i]; 2758 } 2759 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2760 mycols = ibuf; 2761 /* receive message of column indices*/ 2762 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2763 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2764 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2765 } 2766 2767 /* loop over local rows, determining number of off diagonal entries */ 2768 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2769 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2770 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2771 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2772 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2773 rowcount = 0; nzcount = 0; 2774 for (i=0; i<mbs; i++) { 2775 dcount = 0; 2776 odcount = 0; 2777 for (j=0; j<bs; j++) { 2778 kmax = locrowlens[rowcount]; 2779 for (k=0; k<kmax; k++) { 2780 tmp = mycols[nzcount++]/bs; 2781 if (!mask[tmp]) { 2782 mask[tmp] = 1; 2783 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2784 else masked1[dcount++] = tmp; 2785 } 2786 } 2787 rowcount++; 2788 } 2789 2790 dlens[i] = dcount; 2791 odlens[i] = odcount; 2792 2793 /* zero out the mask elements we set */ 2794 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2795 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2796 } 2797 2798 /* create our matrix */ 2799 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2800 ierr = MatSetType(A,type);CHKERRQ(ierr) 2801 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2802 2803 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2804 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2805 2806 if (!rank) { 2807 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2808 /* read in my part of the matrix numerical values */ 2809 nz = procsnz[0]; 2810 vals = buf; 2811 mycols = ibuf; 2812 if (size == 1) nz -= extra_rows; 2813 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2814 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2815 2816 /* insert into matrix */ 2817 jj = rstart*bs; 2818 for (i=0; i<m; i++) { 2819 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2820 mycols += locrowlens[i]; 2821 vals += locrowlens[i]; 2822 jj++; 2823 } 2824 /* read in other processors (except the last one) and ship out */ 2825 for (i=1; i<size-1; i++) { 2826 nz = procsnz[i]; 2827 vals = buf; 2828 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2829 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2830 } 2831 /* the last proc */ 2832 if (size != 1){ 2833 nz = procsnz[i] - extra_rows; 2834 vals = buf; 2835 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2836 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2837 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2838 } 2839 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2840 } else { 2841 /* receive numeric values */ 2842 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2843 2844 /* receive message of values*/ 2845 vals = buf; 2846 mycols = ibuf; 2847 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2848 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2849 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2850 2851 /* insert into matrix */ 2852 jj = rstart*bs; 2853 for (i=0; i<m; i++) { 2854 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2855 mycols += locrowlens[i]; 2856 vals += locrowlens[i]; 2857 jj++; 2858 } 2859 } 2860 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2861 ierr = PetscFree(buf);CHKERRQ(ierr); 2862 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2863 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2864 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2865 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2866 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2867 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2868 2869 *newmat = A; 2870 PetscFunctionReturn(0); 2871 } 2872 2873 #undef __FUNCT__ 2874 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2875 /*@ 2876 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2877 2878 Input Parameters: 2879 . mat - the matrix 2880 . fact - factor 2881 2882 Collective on Mat 2883 2884 Level: advanced 2885 2886 Notes: 2887 This can also be set by the command line option: -mat_use_hash_table fact 2888 2889 .keywords: matrix, hashtable, factor, HT 2890 2891 .seealso: MatSetOption() 2892 @*/ 2893 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2894 { 2895 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2896 2897 PetscFunctionBegin; 2898 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2899 if (f) { 2900 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2901 } 2902 PetscFunctionReturn(0); 2903 } 2904 2905 #undef __FUNCT__ 2906 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2907 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2908 { 2909 Mat_MPIBAIJ *baij; 2910 2911 PetscFunctionBegin; 2912 baij = (Mat_MPIBAIJ*)mat->data; 2913 baij->ht_fact = fact; 2914 PetscFunctionReturn(0); 2915 } 2916 2917 #undef __FUNCT__ 2918 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2919 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2920 { 2921 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2922 PetscFunctionBegin; 2923 *Ad = a->A; 2924 *Ao = a->B; 2925 *colmap = a->garray; 2926 PetscFunctionReturn(0); 2927 } 2928