1 2 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 3 4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 PetscInt i; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 52 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 53 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 55 56 for (i=0; i<A->m; i++){ 57 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 58 } 59 60 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 61 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 63 64 PetscFunctionReturn(0); 65 } 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 77 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 EXTERN_C_BEGIN 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 86 { 87 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 92 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 /* 98 Local utility routine that creates a mapping from the global column 99 number to the local number in the off-diagonal part of the local 100 storage of the matrix. This is done in a non scable way since the 101 length of colmap equals the global matrix length. 102 */ 103 #undef __FUNCT__ 104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 106 { 107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 108 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109 PetscErrorCode ierr; 110 PetscInt nbs = B->nbs,i,bs=mat->bs; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 120 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 PetscInt new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatSetValues_MPIBAIJ" 285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt i,N = m*n; 290 MatScalar *vsingle; 291 292 PetscFunctionBegin; 293 if (N > b->setvalueslen) { 294 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 295 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 296 b->setvalueslen = N; 297 } 298 vsingle = b->setvaluescopy; 299 300 for (i=0; i<N; i++) { 301 vsingle[i] = v[i]; 302 } 303 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 310 { 311 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 312 PetscErrorCode ierr; 313 PetscInt i,N = m*n*b->bs2; 314 MatScalar *vsingle; 315 316 PetscFunctionBegin; 317 if (N > b->setvalueslen) { 318 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 319 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 320 b->setvalueslen = N; 321 } 322 vsingle = b->setvaluescopy; 323 for (i=0; i<N; i++) { 324 vsingle[i] = v[i]; 325 } 326 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 333 { 334 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 335 PetscErrorCode ierr; 336 PetscInt i,N = m*n; 337 MatScalar *vsingle; 338 339 PetscFunctionBegin; 340 if (N > b->setvalueslen) { 341 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 342 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 343 b->setvalueslen = N; 344 } 345 vsingle = b->setvaluescopy; 346 for (i=0; i<N; i++) { 347 vsingle[i] = v[i]; 348 } 349 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 356 { 357 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 358 PetscErrorCode ierr; 359 PetscInt i,N = m*n*b->bs2; 360 MatScalar *vsingle; 361 362 PetscFunctionBegin; 363 if (N > b->setvalueslen) { 364 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 365 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 366 b->setvalueslen = N; 367 } 368 vsingle = b->setvaluescopy; 369 for (i=0; i<N; i++) { 370 vsingle[i] = v[i]; 371 } 372 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 373 PetscFunctionReturn(0); 374 } 375 #endif 376 377 #undef __FUNCT__ 378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 380 { 381 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 382 MatScalar value; 383 PetscTruth roworiented = baij->roworiented; 384 PetscErrorCode ierr; 385 PetscInt i,j,row,col; 386 PetscInt rstart_orig=baij->rstart_bs; 387 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 388 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 389 390 /* Some Variables required in the macro */ 391 Mat A = baij->A; 392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 393 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 394 MatScalar *aa=a->a; 395 396 Mat B = baij->B; 397 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 398 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 399 MatScalar *ba=b->a; 400 401 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 402 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 403 MatScalar *ap,*bap; 404 405 PetscFunctionBegin; 406 for (i=0; i<m; i++) { 407 if (im[i] < 0) continue; 408 #if defined(PETSC_USE_DEBUG) 409 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 410 #endif 411 if (im[i] >= rstart_orig && im[i] < rend_orig) { 412 row = im[i] - rstart_orig; 413 for (j=0; j<n; j++) { 414 if (in[j] >= cstart_orig && in[j] < cend_orig){ 415 col = in[j] - cstart_orig; 416 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 417 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 418 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 419 } else if (in[j] < 0) continue; 420 #if defined(PETSC_USE_DEBUG) 421 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);} 422 #endif 423 else { 424 if (mat->was_assembled) { 425 if (!baij->colmap) { 426 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 427 } 428 #if defined (PETSC_USE_CTABLE) 429 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 430 col = col - 1; 431 #else 432 col = baij->colmap[in[j]/bs] - 1; 433 #endif 434 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 435 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 436 col = in[j]; 437 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 438 B = baij->B; 439 b = (Mat_SeqBAIJ*)(B)->data; 440 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 441 ba=b->a; 442 } else col += in[j]%bs; 443 } else col = in[j]; 444 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 445 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 446 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 447 } 448 } 449 } else { 450 if (!baij->donotstash) { 451 if (roworiented) { 452 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 453 } else { 454 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 455 } 456 } 457 } 458 } 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 const MatScalar *value; 468 MatScalar *barray=baij->barray; 469 PetscTruth roworiented = baij->roworiented; 470 PetscErrorCode ierr; 471 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 472 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 473 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 474 475 PetscFunctionBegin; 476 if(!barray) { 477 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 478 baij->barray = barray; 479 } 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 if (im[i] < 0) continue; 488 #if defined(PETSC_USE_DEBUG) 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 490 #endif 491 if (im[i] >= rstart && im[i] < rend) { 492 row = im[i] - rstart; 493 for (j=0; j<n; j++) { 494 /* If NumCol = 1 then a copy is not required */ 495 if ((roworiented) && (n == 1)) { 496 barray = (MatScalar*)v + i*bs2; 497 } else if((!roworiented) && (m == 1)) { 498 barray = (MatScalar*)v + j*bs2; 499 } else { /* Here a copy is required */ 500 if (roworiented) { 501 value = v + i*(stepval+bs)*bs + j*bs; 502 } else { 503 value = v + j*(stepval+bs)*bs + i*bs; 504 } 505 for (ii=0; ii<bs; ii++,value+=stepval) { 506 for (jj=0; jj<bs; jj++) { 507 *barray++ = *value++; 508 } 509 } 510 barray -=bs2; 511 } 512 513 if (in[j] >= cstart && in[j] < cend){ 514 col = in[j] - cstart; 515 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 516 } 517 else if (in[j] < 0) continue; 518 #if defined(PETSC_USE_DEBUG) 519 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 520 #endif 521 else { 522 if (mat->was_assembled) { 523 if (!baij->colmap) { 524 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 525 } 526 527 #if defined(PETSC_USE_DEBUG) 528 #if defined (PETSC_USE_CTABLE) 529 { PetscInt data; 530 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 531 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 532 } 533 #else 534 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 535 #endif 536 #endif 537 #if defined (PETSC_USE_CTABLE) 538 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 539 col = (col - 1)/bs; 540 #else 541 col = (baij->colmap[in[j]] - 1)/bs; 542 #endif 543 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 544 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 545 col = in[j]; 546 } 547 } 548 else col = in[j]; 549 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 550 } 551 } 552 } else { 553 if (!baij->donotstash) { 554 if (roworiented) { 555 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 556 } else { 557 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 558 } 559 } 560 } 561 } 562 PetscFunctionReturn(0); 563 } 564 565 #define HASH_KEY 0.6180339887 566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 567 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 568 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 572 { 573 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 574 PetscTruth roworiented = baij->roworiented; 575 PetscErrorCode ierr; 576 PetscInt i,j,row,col; 577 PetscInt rstart_orig=baij->rstart_bs; 578 PetscInt rend_orig=baij->rend_bs,Nbs=baij->Nbs; 579 PetscInt h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx; 580 PetscReal tmp; 581 MatScalar **HD = baij->hd,value; 582 #if defined(PETSC_USE_DEBUG) 583 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 584 #endif 585 586 PetscFunctionBegin; 587 588 for (i=0; i<m; i++) { 589 #if defined(PETSC_USE_DEBUG) 590 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 591 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 592 #endif 593 row = im[i]; 594 if (row >= rstart_orig && row < rend_orig) { 595 for (j=0; j<n; j++) { 596 col = in[j]; 597 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 598 /* Look up PetscInto the Hash Table */ 599 key = (row/bs)*Nbs+(col/bs)+1; 600 h1 = HASH(size,key,tmp); 601 602 603 idx = h1; 604 #if defined(PETSC_USE_DEBUG) 605 insert_ct++; 606 total_ct++; 607 if (HT[idx] != key) { 608 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 609 if (idx == size) { 610 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 611 if (idx == h1) { 612 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 613 } 614 } 615 } 616 #else 617 if (HT[idx] != key) { 618 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 619 if (idx == size) { 620 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 621 if (idx == h1) { 622 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 623 } 624 } 625 } 626 #endif 627 /* A HASH table entry is found, so insert the values at the correct address */ 628 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 629 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 630 } 631 } else { 632 if (!baij->donotstash) { 633 if (roworiented) { 634 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 635 } else { 636 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 637 } 638 } 639 } 640 } 641 #if defined(PETSC_USE_DEBUG) 642 baij->ht_total_ct = total_ct; 643 baij->ht_insert_ct = insert_ct; 644 #endif 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 651 { 652 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 653 PetscTruth roworiented = baij->roworiented; 654 PetscErrorCode ierr; 655 PetscInt i,j,ii,jj,row,col; 656 PetscInt rstart=baij->rstart ; 657 PetscInt rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2; 658 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 659 PetscReal tmp; 660 MatScalar **HD = baij->hd,*baij_a; 661 const MatScalar *v_t,*value; 662 #if defined(PETSC_USE_DEBUG) 663 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 664 #endif 665 666 PetscFunctionBegin; 667 668 if (roworiented) { 669 stepval = (n-1)*bs; 670 } else { 671 stepval = (m-1)*bs; 672 } 673 for (i=0; i<m; i++) { 674 #if defined(PETSC_USE_DEBUG) 675 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 676 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 677 #endif 678 row = im[i]; 679 v_t = v + i*bs2; 680 if (row >= rstart && row < rend) { 681 for (j=0; j<n; j++) { 682 col = in[j]; 683 684 /* Look up into the Hash Table */ 685 key = row*Nbs+col+1; 686 h1 = HASH(size,key,tmp); 687 688 idx = h1; 689 #if defined(PETSC_USE_DEBUG) 690 total_ct++; 691 insert_ct++; 692 if (HT[idx] != key) { 693 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 694 if (idx == size) { 695 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 696 if (idx == h1) { 697 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 698 } 699 } 700 } 701 #else 702 if (HT[idx] != key) { 703 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 704 if (idx == size) { 705 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 706 if (idx == h1) { 707 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 708 } 709 } 710 } 711 #endif 712 baij_a = HD[idx]; 713 if (roworiented) { 714 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 715 /* value = v + (i*(stepval+bs)+j)*bs; */ 716 value = v_t; 717 v_t += bs; 718 if (addv == ADD_VALUES) { 719 for (ii=0; ii<bs; ii++,value+=stepval) { 720 for (jj=ii; jj<bs2; jj+=bs) { 721 baij_a[jj] += *value++; 722 } 723 } 724 } else { 725 for (ii=0; ii<bs; ii++,value+=stepval) { 726 for (jj=ii; jj<bs2; jj+=bs) { 727 baij_a[jj] = *value++; 728 } 729 } 730 } 731 } else { 732 value = v + j*(stepval+bs)*bs + i*bs; 733 if (addv == ADD_VALUES) { 734 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 735 for (jj=0; jj<bs; jj++) { 736 baij_a[jj] += *value++; 737 } 738 } 739 } else { 740 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 741 for (jj=0; jj<bs; jj++) { 742 baij_a[jj] = *value++; 743 } 744 } 745 } 746 } 747 } 748 } else { 749 if (!baij->donotstash) { 750 if (roworiented) { 751 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 752 } else { 753 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 754 } 755 } 756 } 757 } 758 #if defined(PETSC_USE_DEBUG) 759 baij->ht_total_ct = total_ct; 760 baij->ht_insert_ct = insert_ct; 761 #endif 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatGetValues_MPIBAIJ" 767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 768 { 769 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 770 PetscErrorCode ierr; 771 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 772 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 773 774 PetscFunctionBegin; 775 for (i=0; i<m; i++) { 776 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 777 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 778 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 779 row = idxm[i] - bsrstart; 780 for (j=0; j<n; j++) { 781 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 782 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 783 if (idxn[j] >= bscstart && idxn[j] < bscend){ 784 col = idxn[j] - bscstart; 785 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 786 } else { 787 if (!baij->colmap) { 788 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 789 } 790 #if defined (PETSC_USE_CTABLE) 791 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 792 data --; 793 #else 794 data = baij->colmap[idxn[j]/bs]-1; 795 #endif 796 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 797 else { 798 col = data + idxn[j]%bs; 799 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 800 } 801 } 802 } 803 } else { 804 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 805 } 806 } 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatNorm_MPIBAIJ" 812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 813 { 814 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 815 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 816 PetscErrorCode ierr; 817 PetscInt i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col; 818 PetscReal sum = 0.0; 819 MatScalar *v; 820 821 PetscFunctionBegin; 822 if (baij->size == 1) { 823 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 824 } else { 825 if (type == NORM_FROBENIUS) { 826 v = amat->a; 827 nz = amat->nz*bs2; 828 for (i=0; i<nz; i++) { 829 #if defined(PETSC_USE_COMPLEX) 830 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 831 #else 832 sum += (*v)*(*v); v++; 833 #endif 834 } 835 v = bmat->a; 836 nz = bmat->nz*bs2; 837 for (i=0; i<nz; i++) { 838 #if defined(PETSC_USE_COMPLEX) 839 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 840 #else 841 sum += (*v)*(*v); v++; 842 #endif 843 } 844 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 845 *nrm = sqrt(*nrm); 846 } else if (type == NORM_1) { /* max column sum */ 847 PetscReal *tmp,*tmp2; 848 PetscInt *jj,*garray=baij->garray,cstart=baij->cstart; 849 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 850 tmp2 = tmp + mat->N; 851 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 852 v = amat->a; jj = amat->j; 853 for (i=0; i<amat->nz; i++) { 854 for (j=0; j<bs; j++){ 855 col = bs*(cstart + *jj) + j; /* column index */ 856 for (row=0; row<bs; row++){ 857 tmp[col] += PetscAbsScalar(*v); v++; 858 } 859 } 860 jj++; 861 } 862 v = bmat->a; jj = bmat->j; 863 for (i=0; i<bmat->nz; i++) { 864 for (j=0; j<bs; j++){ 865 col = bs*garray[*jj] + j; 866 for (row=0; row<bs; row++){ 867 tmp[col] += PetscAbsScalar(*v); v++; 868 } 869 } 870 jj++; 871 } 872 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 873 *nrm = 0.0; 874 for (j=0; j<mat->N; j++) { 875 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 876 } 877 ierr = PetscFree(tmp);CHKERRQ(ierr); 878 } else if (type == NORM_INFINITY) { /* max row sum */ 879 PetscReal sums[bs]; 880 sum = 0.0; 881 for (j=0; j<amat->mbs; j++) { 882 for (row=0; row<bs; row++) sums[row] = 0.0; 883 v = amat->a + bs2*amat->i[j]; 884 nz = amat->i[j+1]-amat->i[j]; 885 for (i=0; i<nz; i++) { 886 for (col=0; col<bs; col++){ 887 for (row=0; row<bs; row++){ 888 sums[row] += PetscAbsScalar(*v); v++; 889 } 890 } 891 } 892 v = bmat->a + bs2*bmat->i[j]; 893 nz = bmat->i[j+1]-bmat->i[j]; 894 for (i=0; i<nz; i++) { 895 for (col=0; col<bs; col++){ 896 for (row=0; row<bs; row++){ 897 sums[row] += PetscAbsScalar(*v); v++; 898 } 899 } 900 } 901 for (row=0; row<bs; row++){ 902 if (sums[row] > sum) sum = sums[row]; 903 } 904 } 905 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 906 } else { 907 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 908 } 909 } 910 PetscFunctionReturn(0); 911 } 912 913 /* 914 Creates the hash table, and sets the table 915 This table is created only once. 916 If new entried need to be added to the matrix 917 then the hash table has to be destroyed and 918 recreated. 919 */ 920 #undef __FUNCT__ 921 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 922 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 923 { 924 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 925 Mat A = baij->A,B=baij->B; 926 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 927 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 928 PetscErrorCode ierr; 929 PetscInt size,bs2=baij->bs2,rstart=baij->rstart; 930 PetscInt cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 931 PetscInt *HT,key; 932 MatScalar **HD; 933 PetscReal tmp; 934 #if defined(PETSC_USE_DEBUG) 935 PetscInt ct=0,max=0; 936 #endif 937 938 PetscFunctionBegin; 939 baij->ht_size=(PetscInt)(factor*nz); 940 size = baij->ht_size; 941 942 if (baij->ht) { 943 PetscFunctionReturn(0); 944 } 945 946 /* Allocate Memory for Hash Table */ 947 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 948 baij->ht = (PetscInt*)(baij->hd + size); 949 HD = baij->hd; 950 HT = baij->ht; 951 952 953 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 954 955 956 /* Loop Over A */ 957 for (i=0; i<a->mbs; i++) { 958 for (j=ai[i]; j<ai[i+1]; j++) { 959 row = i+rstart; 960 col = aj[j]+cstart; 961 962 key = row*Nbs + col + 1; 963 h1 = HASH(size,key,tmp); 964 for (k=0; k<size; k++){ 965 if (!HT[(h1+k)%size]) { 966 HT[(h1+k)%size] = key; 967 HD[(h1+k)%size] = a->a + j*bs2; 968 break; 969 #if defined(PETSC_USE_DEBUG) 970 } else { 971 ct++; 972 #endif 973 } 974 } 975 #if defined(PETSC_USE_DEBUG) 976 if (k> max) max = k; 977 #endif 978 } 979 } 980 /* Loop Over B */ 981 for (i=0; i<b->mbs; i++) { 982 for (j=bi[i]; j<bi[i+1]; j++) { 983 row = i+rstart; 984 col = garray[bj[j]]; 985 key = row*Nbs + col + 1; 986 h1 = HASH(size,key,tmp); 987 for (k=0; k<size; k++){ 988 if (!HT[(h1+k)%size]) { 989 HT[(h1+k)%size] = key; 990 HD[(h1+k)%size] = b->a + j*bs2; 991 break; 992 #if defined(PETSC_USE_DEBUG) 993 } else { 994 ct++; 995 #endif 996 } 997 } 998 #if defined(PETSC_USE_DEBUG) 999 if (k> max) max = k; 1000 #endif 1001 } 1002 } 1003 1004 /* Print Summary */ 1005 #if defined(PETSC_USE_DEBUG) 1006 for (i=0,j=0; i<size; i++) { 1007 if (HT[i]) {j++;} 1008 } 1009 ierr = PetscLogInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr); 1010 #endif 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 1016 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 1017 { 1018 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1019 PetscErrorCode ierr; 1020 PetscInt nstash,reallocs; 1021 InsertMode addv; 1022 1023 PetscFunctionBegin; 1024 if (baij->donotstash) { 1025 PetscFunctionReturn(0); 1026 } 1027 1028 /* make sure all processors are either in INSERTMODE or ADDMODE */ 1029 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 1030 if (addv == (ADD_VALUES|INSERT_VALUES)) { 1031 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1032 } 1033 mat->insertmode = addv; /* in case this processor had no cache */ 1034 1035 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 1036 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 1037 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 1038 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 1039 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 1040 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 1046 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 1047 { 1048 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 1049 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 1050 PetscErrorCode ierr; 1051 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 1052 PetscInt *row,*col,other_disassembled; 1053 PetscTruth r1,r2,r3; 1054 MatScalar *val; 1055 InsertMode addv = mat->insertmode; 1056 PetscMPIInt n; 1057 1058 PetscFunctionBegin; 1059 if (!baij->donotstash) { 1060 while (1) { 1061 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1062 if (!flg) break; 1063 1064 for (i=0; i<n;) { 1065 /* Now identify the consecutive vals belonging to the same row */ 1066 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1067 if (j < n) ncols = j-i; 1068 else ncols = n-i; 1069 /* Now assemble all these values with a single function call */ 1070 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1071 i = j; 1072 } 1073 } 1074 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1075 /* Now process the block-stash. Since the values are stashed column-oriented, 1076 set the roworiented flag to column oriented, and after MatSetValues() 1077 restore the original flags */ 1078 r1 = baij->roworiented; 1079 r2 = a->roworiented; 1080 r3 = b->roworiented; 1081 baij->roworiented = PETSC_FALSE; 1082 a->roworiented = PETSC_FALSE; 1083 b->roworiented = PETSC_FALSE; 1084 while (1) { 1085 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1086 if (!flg) break; 1087 1088 for (i=0; i<n;) { 1089 /* Now identify the consecutive vals belonging to the same row */ 1090 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1091 if (j < n) ncols = j-i; 1092 else ncols = n-i; 1093 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1094 i = j; 1095 } 1096 } 1097 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1098 baij->roworiented = r1; 1099 a->roworiented = r2; 1100 b->roworiented = r3; 1101 } 1102 1103 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1104 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1105 1106 /* determine if any processor has disassembled, if so we must 1107 also disassemble ourselfs, in order that we may reassemble. */ 1108 /* 1109 if nonzero structure of submatrix B cannot change then we know that 1110 no processor disassembled thus we can skip this stuff 1111 */ 1112 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1113 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1114 if (mat->was_assembled && !other_disassembled) { 1115 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1116 } 1117 } 1118 1119 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1120 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1121 } 1122 b->compressedrow.use = PETSC_TRUE; 1123 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1124 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1125 1126 #if defined(PETSC_USE_DEBUG) 1127 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1128 ierr = PetscLogInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr); 1129 baij->ht_total_ct = 0; 1130 baij->ht_insert_ct = 0; 1131 } 1132 #endif 1133 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1134 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1135 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1136 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1137 } 1138 1139 if (baij->rowvalues) { 1140 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1141 baij->rowvalues = 0; 1142 } 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1148 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1149 { 1150 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1151 PetscErrorCode ierr; 1152 PetscMPIInt size = baij->size,rank = baij->rank; 1153 PetscInt bs = mat->bs; 1154 PetscTruth iascii,isdraw; 1155 PetscViewer sviewer; 1156 PetscViewerFormat format; 1157 1158 PetscFunctionBegin; 1159 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1160 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1161 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1162 if (iascii) { 1163 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1164 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1165 MatInfo info; 1166 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1167 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1168 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1169 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1170 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1171 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1172 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1173 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1174 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1175 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1176 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1179 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1182 PetscFunctionReturn(0); 1183 } 1184 } 1185 1186 if (isdraw) { 1187 PetscDraw draw; 1188 PetscTruth isnull; 1189 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1190 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1191 } 1192 1193 if (size == 1) { 1194 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1195 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1196 } else { 1197 /* assemble the entire matrix onto first processor. */ 1198 Mat A; 1199 Mat_SeqBAIJ *Aloc; 1200 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1201 MatScalar *a; 1202 1203 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1204 /* Perhaps this should be the type of mat? */ 1205 if (!rank) { 1206 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1207 } else { 1208 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 1209 } 1210 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1211 ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1212 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1213 1214 /* copy over the A part */ 1215 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1216 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1217 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1218 1219 for (i=0; i<mbs; i++) { 1220 rvals[0] = bs*(baij->rstart + i); 1221 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1222 for (j=ai[i]; j<ai[i+1]; j++) { 1223 col = (baij->cstart+aj[j])*bs; 1224 for (k=0; k<bs; k++) { 1225 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1226 col++; a += bs; 1227 } 1228 } 1229 } 1230 /* copy over the B part */ 1231 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1232 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1233 for (i=0; i<mbs; i++) { 1234 rvals[0] = bs*(baij->rstart + i); 1235 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1236 for (j=ai[i]; j<ai[i+1]; j++) { 1237 col = baij->garray[aj[j]]*bs; 1238 for (k=0; k<bs; k++) { 1239 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1240 col++; a += bs; 1241 } 1242 } 1243 } 1244 ierr = PetscFree(rvals);CHKERRQ(ierr); 1245 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1246 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1247 /* 1248 Everyone has to call to draw the matrix since the graphics waits are 1249 synchronized across all processors that share the PetscDraw object 1250 */ 1251 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1252 if (!rank) { 1253 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1254 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1255 } 1256 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1257 ierr = MatDestroy(A);CHKERRQ(ierr); 1258 } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatView_MPIBAIJ" 1264 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1265 { 1266 PetscErrorCode ierr; 1267 PetscTruth iascii,isdraw,issocket,isbinary; 1268 1269 PetscFunctionBegin; 1270 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1271 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1272 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1273 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1274 if (iascii || isdraw || issocket || isbinary) { 1275 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1276 } else { 1277 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1278 } 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1284 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1285 { 1286 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1287 PetscErrorCode ierr; 1288 1289 PetscFunctionBegin; 1290 #if defined(PETSC_USE_LOG) 1291 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 1292 #endif 1293 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1294 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1295 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1296 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1297 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1298 #if defined (PETSC_USE_CTABLE) 1299 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1300 #else 1301 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1302 #endif 1303 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1304 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1305 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1306 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1307 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1308 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1309 #if defined(PETSC_USE_MAT_SINGLE) 1310 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1311 #endif 1312 ierr = PetscFree(baij);CHKERRQ(ierr); 1313 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1318 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1319 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1320 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatMult_MPIBAIJ" 1326 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1327 { 1328 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1329 PetscErrorCode ierr; 1330 PetscInt nt; 1331 1332 PetscFunctionBegin; 1333 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1334 if (nt != A->n) { 1335 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1336 } 1337 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1338 if (nt != A->m) { 1339 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1340 } 1341 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1342 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1343 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1344 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1345 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1346 PetscFunctionReturn(0); 1347 } 1348 1349 #undef __FUNCT__ 1350 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1351 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1352 { 1353 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1354 PetscErrorCode ierr; 1355 1356 PetscFunctionBegin; 1357 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1358 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1359 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1360 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1366 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1367 { 1368 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1369 PetscErrorCode ierr; 1370 PetscTruth merged; 1371 1372 PetscFunctionBegin; 1373 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1374 /* do nondiagonal part */ 1375 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1376 if (!merged) { 1377 /* send it on its way */ 1378 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1379 /* do local part */ 1380 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1381 /* receive remote parts: note this assumes the values are not actually */ 1382 /* inserted in yy until the next line */ 1383 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1384 } else { 1385 /* do local part */ 1386 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1387 /* send it on its way */ 1388 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1389 /* values actually were received in the Begin() but we need to call this nop */ 1390 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1391 } 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1397 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1398 { 1399 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1400 PetscErrorCode ierr; 1401 1402 PetscFunctionBegin; 1403 /* do nondiagonal part */ 1404 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1405 /* send it on its way */ 1406 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1407 /* do local part */ 1408 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1409 /* receive remote parts: note this assumes the values are not actually */ 1410 /* inserted in yy until the next line, which is true for my implementation*/ 1411 /* but is not perhaps always true. */ 1412 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 /* 1417 This only works correctly for square matrices where the subblock A->A is the 1418 diagonal block 1419 */ 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1422 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1423 { 1424 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1425 PetscErrorCode ierr; 1426 1427 PetscFunctionBegin; 1428 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1429 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatScale_MPIBAIJ" 1435 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1436 { 1437 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1438 PetscErrorCode ierr; 1439 1440 PetscFunctionBegin; 1441 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1442 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1448 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1449 { 1450 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1451 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1452 PetscErrorCode ierr; 1453 PetscInt bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1454 PetscInt nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1455 PetscInt *cmap,*idx_p,cstart = mat->cstart; 1456 1457 PetscFunctionBegin; 1458 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1459 mat->getrowactive = PETSC_TRUE; 1460 1461 if (!mat->rowvalues && (idx || v)) { 1462 /* 1463 allocate enough space to hold information from the longest row. 1464 */ 1465 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1466 PetscInt max = 1,mbs = mat->mbs,tmp; 1467 for (i=0; i<mbs; i++) { 1468 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1469 if (max < tmp) { max = tmp; } 1470 } 1471 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1472 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1473 } 1474 1475 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1476 lrow = row - brstart; 1477 1478 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1479 if (!v) {pvA = 0; pvB = 0;} 1480 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1481 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1482 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1483 nztot = nzA + nzB; 1484 1485 cmap = mat->garray; 1486 if (v || idx) { 1487 if (nztot) { 1488 /* Sort by increasing column numbers, assuming A and B already sorted */ 1489 PetscInt imark = -1; 1490 if (v) { 1491 *v = v_p = mat->rowvalues; 1492 for (i=0; i<nzB; i++) { 1493 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1494 else break; 1495 } 1496 imark = i; 1497 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1498 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1499 } 1500 if (idx) { 1501 *idx = idx_p = mat->rowindices; 1502 if (imark > -1) { 1503 for (i=0; i<imark; i++) { 1504 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1505 } 1506 } else { 1507 for (i=0; i<nzB; i++) { 1508 if (cmap[cworkB[i]/bs] < cstart) 1509 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1510 else break; 1511 } 1512 imark = i; 1513 } 1514 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1515 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1516 } 1517 } else { 1518 if (idx) *idx = 0; 1519 if (v) *v = 0; 1520 } 1521 } 1522 *nz = nztot; 1523 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1524 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1525 PetscFunctionReturn(0); 1526 } 1527 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1530 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1531 { 1532 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1533 1534 PetscFunctionBegin; 1535 if (!baij->getrowactive) { 1536 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1537 } 1538 baij->getrowactive = PETSC_FALSE; 1539 PetscFunctionReturn(0); 1540 } 1541 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1544 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1545 { 1546 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1547 PetscErrorCode ierr; 1548 1549 PetscFunctionBegin; 1550 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1551 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1557 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1558 { 1559 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1560 Mat A = a->A,B = a->B; 1561 PetscErrorCode ierr; 1562 PetscReal isend[5],irecv[5]; 1563 1564 PetscFunctionBegin; 1565 info->block_size = (PetscReal)matin->bs; 1566 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1567 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1568 isend[3] = info->memory; isend[4] = info->mallocs; 1569 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1570 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1571 isend[3] += info->memory; isend[4] += info->mallocs; 1572 if (flag == MAT_LOCAL) { 1573 info->nz_used = isend[0]; 1574 info->nz_allocated = isend[1]; 1575 info->nz_unneeded = isend[2]; 1576 info->memory = isend[3]; 1577 info->mallocs = isend[4]; 1578 } else if (flag == MAT_GLOBAL_MAX) { 1579 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1580 info->nz_used = irecv[0]; 1581 info->nz_allocated = irecv[1]; 1582 info->nz_unneeded = irecv[2]; 1583 info->memory = irecv[3]; 1584 info->mallocs = irecv[4]; 1585 } else if (flag == MAT_GLOBAL_SUM) { 1586 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1587 info->nz_used = irecv[0]; 1588 info->nz_allocated = irecv[1]; 1589 info->nz_unneeded = irecv[2]; 1590 info->memory = irecv[3]; 1591 info->mallocs = irecv[4]; 1592 } else { 1593 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1594 } 1595 info->rows_global = (PetscReal)A->M; 1596 info->columns_global = (PetscReal)A->N; 1597 info->rows_local = (PetscReal)A->m; 1598 info->columns_local = (PetscReal)A->N; 1599 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1600 info->fill_ratio_needed = 0; 1601 info->factor_mallocs = 0; 1602 PetscFunctionReturn(0); 1603 } 1604 1605 #undef __FUNCT__ 1606 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1607 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1608 { 1609 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1610 PetscErrorCode ierr; 1611 1612 PetscFunctionBegin; 1613 switch (op) { 1614 case MAT_NO_NEW_NONZERO_LOCATIONS: 1615 case MAT_YES_NEW_NONZERO_LOCATIONS: 1616 case MAT_COLUMNS_UNSORTED: 1617 case MAT_COLUMNS_SORTED: 1618 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1619 case MAT_KEEP_ZEROED_ROWS: 1620 case MAT_NEW_NONZERO_LOCATION_ERR: 1621 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1622 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1623 break; 1624 case MAT_ROW_ORIENTED: 1625 a->roworiented = PETSC_TRUE; 1626 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1627 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1628 break; 1629 case MAT_ROWS_SORTED: 1630 case MAT_ROWS_UNSORTED: 1631 case MAT_YES_NEW_DIAGONALS: 1632 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1633 break; 1634 case MAT_COLUMN_ORIENTED: 1635 a->roworiented = PETSC_FALSE; 1636 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1637 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1638 break; 1639 case MAT_IGNORE_OFF_PROC_ENTRIES: 1640 a->donotstash = PETSC_TRUE; 1641 break; 1642 case MAT_NO_NEW_DIAGONALS: 1643 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1644 case MAT_USE_HASH_TABLE: 1645 a->ht_flag = PETSC_TRUE; 1646 break; 1647 case MAT_SYMMETRIC: 1648 case MAT_STRUCTURALLY_SYMMETRIC: 1649 case MAT_HERMITIAN: 1650 case MAT_SYMMETRY_ETERNAL: 1651 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1652 break; 1653 case MAT_NOT_SYMMETRIC: 1654 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1655 case MAT_NOT_HERMITIAN: 1656 case MAT_NOT_SYMMETRY_ETERNAL: 1657 break; 1658 default: 1659 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1660 } 1661 PetscFunctionReturn(0); 1662 } 1663 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1666 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1667 { 1668 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1669 Mat_SeqBAIJ *Aloc; 1670 Mat B; 1671 PetscErrorCode ierr; 1672 PetscInt M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1673 PetscInt bs=A->bs,mbs=baij->mbs; 1674 MatScalar *a; 1675 1676 PetscFunctionBegin; 1677 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1678 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1679 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1680 ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1681 1682 /* copy over the A part */ 1683 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1684 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1685 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1686 1687 for (i=0; i<mbs; i++) { 1688 rvals[0] = bs*(baij->rstart + i); 1689 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1690 for (j=ai[i]; j<ai[i+1]; j++) { 1691 col = (baij->cstart+aj[j])*bs; 1692 for (k=0; k<bs; k++) { 1693 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1694 col++; a += bs; 1695 } 1696 } 1697 } 1698 /* copy over the B part */ 1699 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1700 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1701 for (i=0; i<mbs; i++) { 1702 rvals[0] = bs*(baij->rstart + i); 1703 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1704 for (j=ai[i]; j<ai[i+1]; j++) { 1705 col = baij->garray[aj[j]]*bs; 1706 for (k=0; k<bs; k++) { 1707 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1708 col++; a += bs; 1709 } 1710 } 1711 } 1712 ierr = PetscFree(rvals);CHKERRQ(ierr); 1713 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1714 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1715 1716 if (matout) { 1717 *matout = B; 1718 } else { 1719 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1720 } 1721 PetscFunctionReturn(0); 1722 } 1723 1724 #undef __FUNCT__ 1725 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1726 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1727 { 1728 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1729 Mat a = baij->A,b = baij->B; 1730 PetscErrorCode ierr; 1731 PetscInt s1,s2,s3; 1732 1733 PetscFunctionBegin; 1734 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1735 if (rr) { 1736 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1737 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1738 /* Overlap communication with computation. */ 1739 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1740 } 1741 if (ll) { 1742 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1743 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1744 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1745 } 1746 /* scale the diagonal block */ 1747 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1748 1749 if (rr) { 1750 /* Do a scatter end and then right scale the off-diagonal block */ 1751 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1752 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1753 } 1754 1755 PetscFunctionReturn(0); 1756 } 1757 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1760 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1761 { 1762 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1763 PetscErrorCode ierr; 1764 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1765 PetscInt i,N,*rows,*owners = l->rowners; 1766 PetscInt *nprocs,j,idx,nsends,row; 1767 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1768 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1769 PetscInt *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs; 1770 MPI_Comm comm = A->comm; 1771 MPI_Request *send_waits,*recv_waits; 1772 MPI_Status recv_status,*send_status; 1773 IS istmp; 1774 #if defined(PETSC_DEBUG) 1775 PetscTruth found = PETSC_FALSE; 1776 #endif 1777 1778 PetscFunctionBegin; 1779 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1780 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1781 1782 /* first count number of contributors to each processor */ 1783 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1784 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1785 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1786 j = 0; 1787 for (i=0; i<N; i++) { 1788 if (lastidx > (idx = rows[i])) j = 0; 1789 lastidx = idx; 1790 for (; j<size; j++) { 1791 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1792 nprocs[2*j]++; 1793 nprocs[2*j+1] = 1; 1794 owner[i] = j; 1795 #if defined(PETSC_DEBUG) 1796 found = PETSC_TRUE; 1797 #endif 1798 break; 1799 } 1800 } 1801 #if defined(PETSC_DEBUG) 1802 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1803 found = PETSC_FALSE; 1804 #endif 1805 } 1806 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1807 1808 /* inform other processors of number of messages and max length*/ 1809 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1810 1811 /* post receives: */ 1812 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1813 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1814 for (i=0; i<nrecvs; i++) { 1815 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1816 } 1817 1818 /* do sends: 1819 1) starts[i] gives the starting index in svalues for stuff going to 1820 the ith processor 1821 */ 1822 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1823 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1824 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1825 starts[0] = 0; 1826 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1827 for (i=0; i<N; i++) { 1828 svalues[starts[owner[i]]++] = rows[i]; 1829 } 1830 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1831 1832 starts[0] = 0; 1833 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1834 count = 0; 1835 for (i=0; i<size; i++) { 1836 if (nprocs[2*i+1]) { 1837 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1838 } 1839 } 1840 ierr = PetscFree(starts);CHKERRQ(ierr); 1841 1842 base = owners[rank]*bs; 1843 1844 /* wait on receives */ 1845 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1846 source = lens + nrecvs; 1847 count = nrecvs; slen = 0; 1848 while (count) { 1849 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1850 /* unpack receives into our local space */ 1851 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1852 source[imdex] = recv_status.MPI_SOURCE; 1853 lens[imdex] = n; 1854 slen += n; 1855 count--; 1856 } 1857 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1858 1859 /* move the data into the send scatter */ 1860 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1861 count = 0; 1862 for (i=0; i<nrecvs; i++) { 1863 values = rvalues + i*nmax; 1864 for (j=0; j<lens[i]; j++) { 1865 lrows[count++] = values[j] - base; 1866 } 1867 } 1868 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1869 ierr = PetscFree(lens);CHKERRQ(ierr); 1870 ierr = PetscFree(owner);CHKERRQ(ierr); 1871 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1872 1873 /* actually zap the local rows */ 1874 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1875 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 1876 1877 /* 1878 Zero the required rows. If the "diagonal block" of the matrix 1879 is square and the user wishes to set the diagonal we use seperate 1880 code so that MatSetValues() is not called for each diagonal allocating 1881 new memory, thus calling lots of mallocs and slowing things down. 1882 1883 Contributed by: Mathew Knepley 1884 */ 1885 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1886 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1887 if (diag && (l->A->M == l->A->N)) { 1888 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1889 } else if (diag) { 1890 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1891 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1892 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1893 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1894 } 1895 for (i=0; i<slen; i++) { 1896 row = lrows[i] + rstart_bs; 1897 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1898 } 1899 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1900 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1901 } else { 1902 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1903 } 1904 1905 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1906 ierr = PetscFree(lrows);CHKERRQ(ierr); 1907 1908 /* wait on sends */ 1909 if (nsends) { 1910 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1911 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1912 ierr = PetscFree(send_status);CHKERRQ(ierr); 1913 } 1914 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1915 ierr = PetscFree(svalues);CHKERRQ(ierr); 1916 1917 PetscFunctionReturn(0); 1918 } 1919 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1922 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1923 { 1924 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1925 MPI_Comm comm = A->comm; 1926 static PetscTruth called = PETSC_FALSE; 1927 PetscErrorCode ierr; 1928 1929 PetscFunctionBegin; 1930 if (!a->rank) { 1931 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1932 } 1933 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1934 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1935 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1941 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1942 { 1943 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1944 PetscErrorCode ierr; 1945 1946 PetscFunctionBegin; 1947 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1948 PetscFunctionReturn(0); 1949 } 1950 1951 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1952 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatEqual_MPIBAIJ" 1955 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1956 { 1957 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1958 Mat a,b,c,d; 1959 PetscTruth flg; 1960 PetscErrorCode ierr; 1961 1962 PetscFunctionBegin; 1963 a = matA->A; b = matA->B; 1964 c = matB->A; d = matB->B; 1965 1966 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1967 if (flg) { 1968 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1969 } 1970 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1971 PetscFunctionReturn(0); 1972 } 1973 1974 1975 #undef __FUNCT__ 1976 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1977 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1978 { 1979 PetscErrorCode ierr; 1980 1981 PetscFunctionBegin; 1982 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1983 PetscFunctionReturn(0); 1984 } 1985 1986 /* -------------------------------------------------------------------*/ 1987 static struct _MatOps MatOps_Values = { 1988 MatSetValues_MPIBAIJ, 1989 MatGetRow_MPIBAIJ, 1990 MatRestoreRow_MPIBAIJ, 1991 MatMult_MPIBAIJ, 1992 /* 4*/ MatMultAdd_MPIBAIJ, 1993 MatMultTranspose_MPIBAIJ, 1994 MatMultTransposeAdd_MPIBAIJ, 1995 0, 1996 0, 1997 0, 1998 /*10*/ 0, 1999 0, 2000 0, 2001 0, 2002 MatTranspose_MPIBAIJ, 2003 /*15*/ MatGetInfo_MPIBAIJ, 2004 MatEqual_MPIBAIJ, 2005 MatGetDiagonal_MPIBAIJ, 2006 MatDiagonalScale_MPIBAIJ, 2007 MatNorm_MPIBAIJ, 2008 /*20*/ MatAssemblyBegin_MPIBAIJ, 2009 MatAssemblyEnd_MPIBAIJ, 2010 0, 2011 MatSetOption_MPIBAIJ, 2012 MatZeroEntries_MPIBAIJ, 2013 /*25*/ MatZeroRows_MPIBAIJ, 2014 0, 2015 0, 2016 0, 2017 0, 2018 /*30*/ MatSetUpPreallocation_MPIBAIJ, 2019 0, 2020 0, 2021 0, 2022 0, 2023 /*35*/ MatDuplicate_MPIBAIJ, 2024 0, 2025 0, 2026 0, 2027 0, 2028 /*40*/ 0, 2029 MatGetSubMatrices_MPIBAIJ, 2030 MatIncreaseOverlap_MPIBAIJ, 2031 MatGetValues_MPIBAIJ, 2032 0, 2033 /*45*/ MatPrintHelp_MPIBAIJ, 2034 MatScale_MPIBAIJ, 2035 0, 2036 0, 2037 0, 2038 /*50*/ 0, 2039 0, 2040 0, 2041 0, 2042 0, 2043 /*55*/ 0, 2044 0, 2045 MatSetUnfactored_MPIBAIJ, 2046 0, 2047 MatSetValuesBlocked_MPIBAIJ, 2048 /*60*/ 0, 2049 MatDestroy_MPIBAIJ, 2050 MatView_MPIBAIJ, 2051 MatGetPetscMaps_Petsc, 2052 0, 2053 /*65*/ 0, 2054 0, 2055 0, 2056 0, 2057 0, 2058 /*70*/ MatGetRowMax_MPIBAIJ, 2059 0, 2060 0, 2061 0, 2062 0, 2063 /*75*/ 0, 2064 0, 2065 0, 2066 0, 2067 0, 2068 /*80*/ 0, 2069 0, 2070 0, 2071 0, 2072 MatLoad_MPIBAIJ, 2073 /*85*/ 0, 2074 0, 2075 0, 2076 0, 2077 0, 2078 /*90*/ 0, 2079 0, 2080 0, 2081 0, 2082 0, 2083 /*95*/ 0, 2084 0, 2085 0, 2086 0}; 2087 2088 2089 EXTERN_C_BEGIN 2090 #undef __FUNCT__ 2091 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2092 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2093 { 2094 PetscFunctionBegin; 2095 *a = ((Mat_MPIBAIJ *)A->data)->A; 2096 *iscopy = PETSC_FALSE; 2097 PetscFunctionReturn(0); 2098 } 2099 EXTERN_C_END 2100 2101 EXTERN_C_BEGIN 2102 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 2103 EXTERN_C_END 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2107 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2108 { 2109 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2110 PetscInt m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2111 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2112 const PetscInt *JJ; 2113 PetscScalar *values; 2114 PetscErrorCode ierr; 2115 2116 PetscFunctionBegin; 2117 #if defined(PETSC_OPT_g) 2118 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2119 #endif 2120 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2121 o_nnz = d_nnz + m; 2122 2123 for (i=0; i<m; i++) { 2124 nnz = I[i+1]- I[i]; 2125 JJ = J + I[i]; 2126 nnz_max = PetscMax(nnz_max,nnz); 2127 #if defined(PETSC_OPT_g) 2128 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2129 #endif 2130 for (j=0; j<nnz; j++) { 2131 if (*JJ >= cstart) break; 2132 JJ++; 2133 } 2134 d = 0; 2135 for (; j<nnz; j++) { 2136 if (*JJ++ >= cend) break; 2137 d++; 2138 } 2139 d_nnz[i] = d; 2140 o_nnz[i] = nnz - d; 2141 } 2142 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2143 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2144 2145 if (v) values = (PetscScalar*)v; 2146 else { 2147 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2148 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2149 } 2150 2151 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2152 for (i=0; i<m; i++) { 2153 ii = i + rstart; 2154 nnz = I[i+1]- I[i]; 2155 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2156 } 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2160 2161 if (!v) { 2162 ierr = PetscFree(values);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2169 /*@C 2170 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2171 (the default parallel PETSc format). 2172 2173 Collective on MPI_Comm 2174 2175 Input Parameters: 2176 + A - the matrix 2177 . i - the indices into j for the start of each local row (starts with zero) 2178 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2179 - v - optional values in the matrix 2180 2181 Level: developer 2182 2183 .keywords: matrix, aij, compressed row, sparse, parallel 2184 2185 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2186 @*/ 2187 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2188 { 2189 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2190 2191 PetscFunctionBegin; 2192 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2193 if (f) { 2194 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 EXTERN_C_BEGIN 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2202 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2203 { 2204 Mat_MPIBAIJ *b; 2205 PetscErrorCode ierr; 2206 PetscInt i; 2207 2208 PetscFunctionBegin; 2209 B->preallocated = PETSC_TRUE; 2210 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2211 2212 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2213 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2214 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2215 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2216 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2217 if (d_nnz) { 2218 for (i=0; i<B->m/bs; i++) { 2219 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2220 } 2221 } 2222 if (o_nnz) { 2223 for (i=0; i<B->m/bs; i++) { 2224 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2225 } 2226 } 2227 2228 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2229 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2230 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2231 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2232 2233 b = (Mat_MPIBAIJ*)B->data; 2234 B->bs = bs; 2235 b->bs2 = bs*bs; 2236 b->mbs = B->m/bs; 2237 b->nbs = B->n/bs; 2238 b->Mbs = B->M/bs; 2239 b->Nbs = B->N/bs; 2240 2241 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2242 b->rowners[0] = 0; 2243 for (i=2; i<=b->size; i++) { 2244 b->rowners[i] += b->rowners[i-1]; 2245 } 2246 b->rstart = b->rowners[b->rank]; 2247 b->rend = b->rowners[b->rank+1]; 2248 2249 ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2250 b->cowners[0] = 0; 2251 for (i=2; i<=b->size; i++) { 2252 b->cowners[i] += b->cowners[i-1]; 2253 } 2254 b->cstart = b->cowners[b->rank]; 2255 b->cend = b->cowners[b->rank+1]; 2256 2257 for (i=0; i<=b->size; i++) { 2258 b->rowners_bs[i] = b->rowners[i]*bs; 2259 } 2260 b->rstart_bs = b->rstart*bs; 2261 b->rend_bs = b->rend*bs; 2262 b->cstart_bs = b->cstart*bs; 2263 b->cend_bs = b->cend*bs; 2264 2265 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2266 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2267 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2268 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2269 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2270 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2271 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2272 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2273 2274 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2275 2276 PetscFunctionReturn(0); 2277 } 2278 EXTERN_C_END 2279 2280 EXTERN_C_BEGIN 2281 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2282 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2283 EXTERN_C_END 2284 2285 /*MC 2286 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2287 2288 Options Database Keys: 2289 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2290 2291 Level: beginner 2292 2293 .seealso: MatCreateMPIBAIJ 2294 M*/ 2295 2296 EXTERN_C_BEGIN 2297 #undef __FUNCT__ 2298 #define __FUNCT__ "MatCreate_MPIBAIJ" 2299 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2300 { 2301 Mat_MPIBAIJ *b; 2302 PetscErrorCode ierr; 2303 PetscTruth flg; 2304 2305 PetscFunctionBegin; 2306 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2307 B->data = (void*)b; 2308 2309 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2310 B->mapping = 0; 2311 B->factor = 0; 2312 B->assembled = PETSC_FALSE; 2313 2314 B->insertmode = NOT_SET_VALUES; 2315 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2316 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2317 2318 /* build local table of row and column ownerships */ 2319 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 2320 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2321 b->cowners = b->rowners + b->size + 2; 2322 b->rowners_bs = b->cowners + b->size + 2; 2323 2324 /* build cache for off array entries formed */ 2325 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2326 b->donotstash = PETSC_FALSE; 2327 b->colmap = PETSC_NULL; 2328 b->garray = PETSC_NULL; 2329 b->roworiented = PETSC_TRUE; 2330 2331 #if defined(PETSC_USE_MAT_SINGLE) 2332 /* stuff for MatSetValues_XXX in single precision */ 2333 b->setvalueslen = 0; 2334 b->setvaluescopy = PETSC_NULL; 2335 #endif 2336 2337 /* stuff used in block assembly */ 2338 b->barray = 0; 2339 2340 /* stuff used for matrix vector multiply */ 2341 b->lvec = 0; 2342 b->Mvctx = 0; 2343 2344 /* stuff for MatGetRow() */ 2345 b->rowindices = 0; 2346 b->rowvalues = 0; 2347 b->getrowactive = PETSC_FALSE; 2348 2349 /* hash table stuff */ 2350 b->ht = 0; 2351 b->hd = 0; 2352 b->ht_size = 0; 2353 b->ht_flag = PETSC_FALSE; 2354 b->ht_fact = 0; 2355 b->ht_total_ct = 0; 2356 b->ht_insert_ct = 0; 2357 2358 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2359 if (flg) { 2360 PetscReal fact = 1.39; 2361 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2362 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2363 if (fact <= 1.0) fact = 1.39; 2364 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2365 ierr = PetscLogInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 2366 } 2367 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2368 "MatStoreValues_MPIBAIJ", 2369 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2370 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2371 "MatRetrieveValues_MPIBAIJ", 2372 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2373 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2374 "MatGetDiagonalBlock_MPIBAIJ", 2375 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2376 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2377 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2378 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2379 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2380 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2381 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2382 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2383 "MatDiagonalScaleLocal_MPIBAIJ", 2384 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2385 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2386 "MatSetHashTableFactor_MPIBAIJ", 2387 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 EXTERN_C_END 2391 2392 /*MC 2393 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2394 2395 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2396 and MATMPIBAIJ otherwise. 2397 2398 Options Database Keys: 2399 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2400 2401 Level: beginner 2402 2403 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2404 M*/ 2405 2406 EXTERN_C_BEGIN 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatCreate_BAIJ" 2409 PetscErrorCode MatCreate_BAIJ(Mat A) 2410 { 2411 PetscErrorCode ierr; 2412 PetscMPIInt size; 2413 2414 PetscFunctionBegin; 2415 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2416 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2417 if (size == 1) { 2418 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2419 } else { 2420 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2421 } 2422 PetscFunctionReturn(0); 2423 } 2424 EXTERN_C_END 2425 2426 #undef __FUNCT__ 2427 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2428 /*@C 2429 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2430 (block compressed row). For good matrix assembly performance 2431 the user should preallocate the matrix storage by setting the parameters 2432 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2433 performance can be increased by more than a factor of 50. 2434 2435 Collective on Mat 2436 2437 Input Parameters: 2438 + A - the matrix 2439 . bs - size of blockk 2440 . d_nz - number of block nonzeros per block row in diagonal portion of local 2441 submatrix (same for all local rows) 2442 . d_nnz - array containing the number of block nonzeros in the various block rows 2443 of the in diagonal portion of the local (possibly different for each block 2444 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2445 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2446 submatrix (same for all local rows). 2447 - o_nnz - array containing the number of nonzeros in the various block rows of the 2448 off-diagonal portion of the local submatrix (possibly different for 2449 each block row) or PETSC_NULL. 2450 2451 If the *_nnz parameter is given then the *_nz parameter is ignored 2452 2453 Options Database Keys: 2454 . -mat_no_unroll - uses code that does not unroll the loops in the 2455 block calculations (much slower) 2456 . -mat_block_size - size of the blocks to use 2457 2458 Notes: 2459 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2460 than it must be used on all processors that share the object for that argument. 2461 2462 Storage Information: 2463 For a square global matrix we define each processor's diagonal portion 2464 to be its local rows and the corresponding columns (a square submatrix); 2465 each processor's off-diagonal portion encompasses the remainder of the 2466 local matrix (a rectangular submatrix). 2467 2468 The user can specify preallocated storage for the diagonal part of 2469 the local submatrix with either d_nz or d_nnz (not both). Set 2470 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2471 memory allocation. Likewise, specify preallocated storage for the 2472 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2473 2474 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2475 the figure below we depict these three local rows and all columns (0-11). 2476 2477 .vb 2478 0 1 2 3 4 5 6 7 8 9 10 11 2479 ------------------- 2480 row 3 | o o o d d d o o o o o o 2481 row 4 | o o o d d d o o o o o o 2482 row 5 | o o o d d d o o o o o o 2483 ------------------- 2484 .ve 2485 2486 Thus, any entries in the d locations are stored in the d (diagonal) 2487 submatrix, and any entries in the o locations are stored in the 2488 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2489 stored simply in the MATSEQBAIJ format for compressed row storage. 2490 2491 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2492 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2493 In general, for PDE problems in which most nonzeros are near the diagonal, 2494 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2495 or you will get TERRIBLE performance; see the users' manual chapter on 2496 matrices. 2497 2498 Level: intermediate 2499 2500 .keywords: matrix, block, aij, compressed row, sparse, parallel 2501 2502 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2503 @*/ 2504 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2505 { 2506 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2507 2508 PetscFunctionBegin; 2509 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2510 if (f) { 2511 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2512 } 2513 PetscFunctionReturn(0); 2514 } 2515 2516 #undef __FUNCT__ 2517 #define __FUNCT__ "MatCreateMPIBAIJ" 2518 /*@C 2519 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2520 (block compressed row). For good matrix assembly performance 2521 the user should preallocate the matrix storage by setting the parameters 2522 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2523 performance can be increased by more than a factor of 50. 2524 2525 Collective on MPI_Comm 2526 2527 Input Parameters: 2528 + comm - MPI communicator 2529 . bs - size of blockk 2530 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2531 This value should be the same as the local size used in creating the 2532 y vector for the matrix-vector product y = Ax. 2533 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2534 This value should be the same as the local size used in creating the 2535 x vector for the matrix-vector product y = Ax. 2536 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2537 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2538 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2539 submatrix (same for all local rows) 2540 . d_nnz - array containing the number of nonzero blocks in the various block rows 2541 of the in diagonal portion of the local (possibly different for each block 2542 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2543 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2544 submatrix (same for all local rows). 2545 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2546 off-diagonal portion of the local submatrix (possibly different for 2547 each block row) or PETSC_NULL. 2548 2549 Output Parameter: 2550 . A - the matrix 2551 2552 Options Database Keys: 2553 . -mat_no_unroll - uses code that does not unroll the loops in the 2554 block calculations (much slower) 2555 . -mat_block_size - size of the blocks to use 2556 2557 Notes: 2558 If the *_nnz parameter is given then the *_nz parameter is ignored 2559 2560 A nonzero block is any block that as 1 or more nonzeros in it 2561 2562 The user MUST specify either the local or global matrix dimensions 2563 (possibly both). 2564 2565 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2566 than it must be used on all processors that share the object for that argument. 2567 2568 Storage Information: 2569 For a square global matrix we define each processor's diagonal portion 2570 to be its local rows and the corresponding columns (a square submatrix); 2571 each processor's off-diagonal portion encompasses the remainder of the 2572 local matrix (a rectangular submatrix). 2573 2574 The user can specify preallocated storage for the diagonal part of 2575 the local submatrix with either d_nz or d_nnz (not both). Set 2576 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2577 memory allocation. Likewise, specify preallocated storage for the 2578 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2579 2580 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2581 the figure below we depict these three local rows and all columns (0-11). 2582 2583 .vb 2584 0 1 2 3 4 5 6 7 8 9 10 11 2585 ------------------- 2586 row 3 | o o o d d d o o o o o o 2587 row 4 | o o o d d d o o o o o o 2588 row 5 | o o o d d d o o o o o o 2589 ------------------- 2590 .ve 2591 2592 Thus, any entries in the d locations are stored in the d (diagonal) 2593 submatrix, and any entries in the o locations are stored in the 2594 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2595 stored simply in the MATSEQBAIJ format for compressed row storage. 2596 2597 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2598 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2599 In general, for PDE problems in which most nonzeros are near the diagonal, 2600 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2601 or you will get TERRIBLE performance; see the users' manual chapter on 2602 matrices. 2603 2604 Level: intermediate 2605 2606 .keywords: matrix, block, aij, compressed row, sparse, parallel 2607 2608 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2609 @*/ 2610 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2611 { 2612 PetscErrorCode ierr; 2613 PetscMPIInt size; 2614 2615 PetscFunctionBegin; 2616 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2617 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2618 if (size > 1) { 2619 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2620 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2621 } else { 2622 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2623 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2624 } 2625 PetscFunctionReturn(0); 2626 } 2627 2628 #undef __FUNCT__ 2629 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2630 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2631 { 2632 Mat mat; 2633 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2634 PetscErrorCode ierr; 2635 PetscInt len=0; 2636 2637 PetscFunctionBegin; 2638 *newmat = 0; 2639 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2640 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2641 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2642 2643 mat->factor = matin->factor; 2644 mat->preallocated = PETSC_TRUE; 2645 mat->assembled = PETSC_TRUE; 2646 mat->insertmode = NOT_SET_VALUES; 2647 2648 a = (Mat_MPIBAIJ*)mat->data; 2649 mat->bs = matin->bs; 2650 a->bs2 = oldmat->bs2; 2651 a->mbs = oldmat->mbs; 2652 a->nbs = oldmat->nbs; 2653 a->Mbs = oldmat->Mbs; 2654 a->Nbs = oldmat->Nbs; 2655 2656 a->rstart = oldmat->rstart; 2657 a->rend = oldmat->rend; 2658 a->cstart = oldmat->cstart; 2659 a->cend = oldmat->cend; 2660 a->size = oldmat->size; 2661 a->rank = oldmat->rank; 2662 a->donotstash = oldmat->donotstash; 2663 a->roworiented = oldmat->roworiented; 2664 a->rowindices = 0; 2665 a->rowvalues = 0; 2666 a->getrowactive = PETSC_FALSE; 2667 a->barray = 0; 2668 a->rstart_bs = oldmat->rstart_bs; 2669 a->rend_bs = oldmat->rend_bs; 2670 a->cstart_bs = oldmat->cstart_bs; 2671 a->cend_bs = oldmat->cend_bs; 2672 2673 /* hash table stuff */ 2674 a->ht = 0; 2675 a->hd = 0; 2676 a->ht_size = 0; 2677 a->ht_flag = oldmat->ht_flag; 2678 a->ht_fact = oldmat->ht_fact; 2679 a->ht_total_ct = 0; 2680 a->ht_insert_ct = 0; 2681 2682 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2683 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2684 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 2685 if (oldmat->colmap) { 2686 #if defined (PETSC_USE_CTABLE) 2687 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2688 #else 2689 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2690 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2691 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2692 #endif 2693 } else a->colmap = 0; 2694 2695 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2696 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2697 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2698 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2699 } else a->garray = 0; 2700 2701 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2702 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2703 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2704 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2705 2706 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2707 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2708 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2709 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2710 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2711 *newmat = mat; 2712 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #include "petscsys.h" 2717 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatLoad_MPIBAIJ" 2720 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2721 { 2722 Mat A; 2723 PetscErrorCode ierr; 2724 int fd; 2725 PetscInt i,nz,j,rstart,rend; 2726 PetscScalar *vals,*buf; 2727 MPI_Comm comm = ((PetscObject)viewer)->comm; 2728 MPI_Status status; 2729 PetscMPIInt rank,size,maxnz; 2730 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2731 PetscInt *locrowlens,*procsnz = 0,*browners; 2732 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2733 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2734 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2735 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2736 2737 PetscFunctionBegin; 2738 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2739 2740 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2741 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2742 if (!rank) { 2743 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2744 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2745 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2746 } 2747 2748 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2749 M = header[1]; N = header[2]; 2750 2751 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2752 2753 /* 2754 This code adds extra rows to make sure the number of rows is 2755 divisible by the blocksize 2756 */ 2757 Mbs = M/bs; 2758 extra_rows = bs - M + bs*Mbs; 2759 if (extra_rows == bs) extra_rows = 0; 2760 else Mbs++; 2761 if (extra_rows && !rank) { 2762 ierr = PetscLogInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2763 } 2764 2765 /* determine ownership of all rows */ 2766 mbs = Mbs/size + ((Mbs % size) > rank); 2767 m = mbs*bs; 2768 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2769 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2770 2771 /* process 0 needs enough room for process with most rows */ 2772 if (!rank) { 2773 mmax = rowners[1]; 2774 for (i=2; i<size; i++) { 2775 mmax = PetscMax(mmax,rowners[i]); 2776 } 2777 mmax*=bs; 2778 } else mmax = m; 2779 2780 rowners[0] = 0; 2781 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2782 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2783 rstart = rowners[rank]; 2784 rend = rowners[rank+1]; 2785 2786 /* distribute row lengths to all processors */ 2787 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2788 if (!rank) { 2789 mend = m; 2790 if (size == 1) mend = mend - extra_rows; 2791 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2792 for (j=mend; j<m; j++) locrowlens[j] = 1; 2793 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2794 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2795 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2796 for (j=0; j<m; j++) { 2797 procsnz[0] += locrowlens[j]; 2798 } 2799 for (i=1; i<size; i++) { 2800 mend = browners[i+1] - browners[i]; 2801 if (i == size-1) mend = mend - extra_rows; 2802 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2803 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2804 /* calculate the number of nonzeros on each processor */ 2805 for (j=0; j<browners[i+1]-browners[i]; j++) { 2806 procsnz[i] += rowlengths[j]; 2807 } 2808 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2809 } 2810 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2811 } else { 2812 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2813 } 2814 2815 if (!rank) { 2816 /* determine max buffer needed and allocate it */ 2817 maxnz = procsnz[0]; 2818 for (i=1; i<size; i++) { 2819 maxnz = PetscMax(maxnz,procsnz[i]); 2820 } 2821 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2822 2823 /* read in my part of the matrix column indices */ 2824 nz = procsnz[0]; 2825 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2826 mycols = ibuf; 2827 if (size == 1) nz -= extra_rows; 2828 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2829 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2830 2831 /* read in every ones (except the last) and ship off */ 2832 for (i=1; i<size-1; i++) { 2833 nz = procsnz[i]; 2834 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2835 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2836 } 2837 /* read in the stuff for the last proc */ 2838 if (size != 1) { 2839 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2840 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2841 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2842 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2843 } 2844 ierr = PetscFree(cols);CHKERRQ(ierr); 2845 } else { 2846 /* determine buffer space needed for message */ 2847 nz = 0; 2848 for (i=0; i<m; i++) { 2849 nz += locrowlens[i]; 2850 } 2851 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2852 mycols = ibuf; 2853 /* receive message of column indices*/ 2854 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2855 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2856 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2857 } 2858 2859 /* loop over local rows, determining number of off diagonal entries */ 2860 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2861 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2862 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2863 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2864 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2865 rowcount = 0; nzcount = 0; 2866 for (i=0; i<mbs; i++) { 2867 dcount = 0; 2868 odcount = 0; 2869 for (j=0; j<bs; j++) { 2870 kmax = locrowlens[rowcount]; 2871 for (k=0; k<kmax; k++) { 2872 tmp = mycols[nzcount++]/bs; 2873 if (!mask[tmp]) { 2874 mask[tmp] = 1; 2875 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2876 else masked1[dcount++] = tmp; 2877 } 2878 } 2879 rowcount++; 2880 } 2881 2882 dlens[i] = dcount; 2883 odlens[i] = odcount; 2884 2885 /* zero out the mask elements we set */ 2886 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2887 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2888 } 2889 2890 /* create our matrix */ 2891 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2892 ierr = MatSetType(A,type);CHKERRQ(ierr) 2893 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2894 2895 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2896 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2897 2898 if (!rank) { 2899 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2900 /* read in my part of the matrix numerical values */ 2901 nz = procsnz[0]; 2902 vals = buf; 2903 mycols = ibuf; 2904 if (size == 1) nz -= extra_rows; 2905 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2906 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2907 2908 /* insert into matrix */ 2909 jj = rstart*bs; 2910 for (i=0; i<m; i++) { 2911 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2912 mycols += locrowlens[i]; 2913 vals += locrowlens[i]; 2914 jj++; 2915 } 2916 /* read in other processors (except the last one) and ship out */ 2917 for (i=1; i<size-1; i++) { 2918 nz = procsnz[i]; 2919 vals = buf; 2920 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2921 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2922 } 2923 /* the last proc */ 2924 if (size != 1){ 2925 nz = procsnz[i] - extra_rows; 2926 vals = buf; 2927 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2928 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2929 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2930 } 2931 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2932 } else { 2933 /* receive numeric values */ 2934 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2935 2936 /* receive message of values*/ 2937 vals = buf; 2938 mycols = ibuf; 2939 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2940 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2941 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2942 2943 /* insert into matrix */ 2944 jj = rstart*bs; 2945 for (i=0; i<m; i++) { 2946 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2947 mycols += locrowlens[i]; 2948 vals += locrowlens[i]; 2949 jj++; 2950 } 2951 } 2952 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2953 ierr = PetscFree(buf);CHKERRQ(ierr); 2954 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2955 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2956 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2957 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2958 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2959 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2960 2961 *newmat = A; 2962 PetscFunctionReturn(0); 2963 } 2964 2965 #undef __FUNCT__ 2966 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2967 /*@ 2968 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2969 2970 Input Parameters: 2971 . mat - the matrix 2972 . fact - factor 2973 2974 Collective on Mat 2975 2976 Level: advanced 2977 2978 Notes: 2979 This can also be set by the command line option: -mat_use_hash_table fact 2980 2981 .keywords: matrix, hashtable, factor, HT 2982 2983 .seealso: MatSetOption() 2984 @*/ 2985 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2986 { 2987 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2988 2989 PetscFunctionBegin; 2990 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2991 if (f) { 2992 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2993 } 2994 PetscFunctionReturn(0); 2995 } 2996 2997 #undef __FUNCT__ 2998 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2999 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3000 { 3001 Mat_MPIBAIJ *baij; 3002 3003 PetscFunctionBegin; 3004 baij = (Mat_MPIBAIJ*)mat->data; 3005 baij->ht_fact = fact; 3006 PetscFunctionReturn(0); 3007 } 3008 3009 #undef __FUNCT__ 3010 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3011 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3012 { 3013 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3014 PetscFunctionBegin; 3015 *Ad = a->A; 3016 *Ao = a->B; 3017 *colmap = a->garray; 3018 PetscFunctionReturn(0); 3019 } 3020