1 2 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 3 4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 PetscInt i; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 52 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 53 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 55 56 for (i=0; i<A->m; i++){ 57 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 58 } 59 60 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 61 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 63 64 PetscFunctionReturn(0); 65 } 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 77 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 EXTERN_C_BEGIN 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 86 { 87 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 92 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 /* 98 Local utility routine that creates a mapping from the global column 99 number to the local number in the off-diagonal part of the local 100 storage of the matrix. This is done in a non scable way since the 101 length of colmap equals the global matrix length. 102 */ 103 #undef __FUNCT__ 104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 106 { 107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 108 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109 PetscErrorCode ierr; 110 PetscInt nbs = B->nbs,i,bs=mat->bs; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 120 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 PetscInt new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatSetValues_MPIBAIJ" 285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt i,N = m*n; 290 MatScalar *vsingle; 291 292 PetscFunctionBegin; 293 if (N > b->setvalueslen) { 294 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 295 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 296 b->setvalueslen = N; 297 } 298 vsingle = b->setvaluescopy; 299 300 for (i=0; i<N; i++) { 301 vsingle[i] = v[i]; 302 } 303 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 310 { 311 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 312 PetscErrorCode ierr; 313 PetscInt i,N = m*n*b->bs2; 314 MatScalar *vsingle; 315 316 PetscFunctionBegin; 317 if (N > b->setvalueslen) { 318 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 319 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 320 b->setvalueslen = N; 321 } 322 vsingle = b->setvaluescopy; 323 for (i=0; i<N; i++) { 324 vsingle[i] = v[i]; 325 } 326 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 333 { 334 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 335 PetscErrorCode ierr; 336 PetscInt i,N = m*n; 337 MatScalar *vsingle; 338 339 PetscFunctionBegin; 340 if (N > b->setvalueslen) { 341 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 342 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 343 b->setvalueslen = N; 344 } 345 vsingle = b->setvaluescopy; 346 for (i=0; i<N; i++) { 347 vsingle[i] = v[i]; 348 } 349 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 356 { 357 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 358 PetscErrorCode ierr; 359 PetscInt i,N = m*n*b->bs2; 360 MatScalar *vsingle; 361 362 PetscFunctionBegin; 363 if (N > b->setvalueslen) { 364 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 365 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 366 b->setvalueslen = N; 367 } 368 vsingle = b->setvaluescopy; 369 for (i=0; i<N; i++) { 370 vsingle[i] = v[i]; 371 } 372 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 373 PetscFunctionReturn(0); 374 } 375 #endif 376 377 #undef __FUNCT__ 378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 380 { 381 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 382 MatScalar value; 383 PetscTruth roworiented = baij->roworiented; 384 PetscErrorCode ierr; 385 PetscInt i,j,row,col; 386 PetscInt rstart_orig=baij->rstart_bs; 387 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 388 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 389 390 /* Some Variables required in the macro */ 391 Mat A = baij->A; 392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 393 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 394 MatScalar *aa=a->a; 395 396 Mat B = baij->B; 397 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 398 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 399 MatScalar *ba=b->a; 400 401 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 402 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 403 MatScalar *ap,*bap; 404 405 PetscFunctionBegin; 406 for (i=0; i<m; i++) { 407 if (im[i] < 0) continue; 408 #if defined(PETSC_USE_DEBUG) 409 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 410 #endif 411 if (im[i] >= rstart_orig && im[i] < rend_orig) { 412 row = im[i] - rstart_orig; 413 for (j=0; j<n; j++) { 414 if (in[j] >= cstart_orig && in[j] < cend_orig){ 415 col = in[j] - cstart_orig; 416 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 417 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 418 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 419 } else if (in[j] < 0) continue; 420 #if defined(PETSC_USE_DEBUG) 421 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);} 422 #endif 423 else { 424 if (mat->was_assembled) { 425 if (!baij->colmap) { 426 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 427 } 428 #if defined (PETSC_USE_CTABLE) 429 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 430 col = col - 1; 431 #else 432 col = baij->colmap[in[j]/bs] - 1; 433 #endif 434 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 435 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 436 col = in[j]; 437 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 438 B = baij->B; 439 b = (Mat_SeqBAIJ*)(B)->data; 440 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 441 ba=b->a; 442 } else col += in[j]%bs; 443 } else col = in[j]; 444 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 445 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 446 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 447 } 448 } 449 } else { 450 if (!baij->donotstash) { 451 if (roworiented) { 452 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 453 } else { 454 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 455 } 456 } 457 } 458 } 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 const MatScalar *value; 468 MatScalar *barray=baij->barray; 469 PetscTruth roworiented = baij->roworiented; 470 PetscErrorCode ierr; 471 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 472 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 473 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 474 475 PetscFunctionBegin; 476 if(!barray) { 477 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 478 baij->barray = barray; 479 } 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 if (im[i] < 0) continue; 488 #if defined(PETSC_USE_DEBUG) 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 490 #endif 491 if (im[i] >= rstart && im[i] < rend) { 492 row = im[i] - rstart; 493 for (j=0; j<n; j++) { 494 /* If NumCol = 1 then a copy is not required */ 495 if ((roworiented) && (n == 1)) { 496 barray = (MatScalar*)v + i*bs2; 497 } else if((!roworiented) && (m == 1)) { 498 barray = (MatScalar*)v + j*bs2; 499 } else { /* Here a copy is required */ 500 if (roworiented) { 501 value = v + i*(stepval+bs)*bs + j*bs; 502 } else { 503 value = v + j*(stepval+bs)*bs + i*bs; 504 } 505 for (ii=0; ii<bs; ii++,value+=stepval) { 506 for (jj=0; jj<bs; jj++) { 507 *barray++ = *value++; 508 } 509 } 510 barray -=bs2; 511 } 512 513 if (in[j] >= cstart && in[j] < cend){ 514 col = in[j] - cstart; 515 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 516 } 517 else if (in[j] < 0) continue; 518 #if defined(PETSC_USE_DEBUG) 519 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 520 #endif 521 else { 522 if (mat->was_assembled) { 523 if (!baij->colmap) { 524 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 525 } 526 527 #if defined(PETSC_USE_DEBUG) 528 #if defined (PETSC_USE_CTABLE) 529 { PetscInt data; 530 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 531 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 532 } 533 #else 534 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 535 #endif 536 #endif 537 #if defined (PETSC_USE_CTABLE) 538 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 539 col = (col - 1)/bs; 540 #else 541 col = (baij->colmap[in[j]] - 1)/bs; 542 #endif 543 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 544 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 545 col = in[j]; 546 } 547 } 548 else col = in[j]; 549 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 550 } 551 } 552 } else { 553 if (!baij->donotstash) { 554 if (roworiented) { 555 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 556 } else { 557 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 558 } 559 } 560 } 561 } 562 PetscFunctionReturn(0); 563 } 564 565 #define HASH_KEY 0.6180339887 566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 567 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 568 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 572 { 573 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 574 PetscTruth roworiented = baij->roworiented; 575 PetscErrorCode ierr; 576 PetscInt i,j,row,col; 577 PetscInt rstart_orig=baij->rstart_bs; 578 PetscInt rend_orig=baij->rend_bs,Nbs=baij->Nbs; 579 PetscInt h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx; 580 PetscReal tmp; 581 MatScalar **HD = baij->hd,value; 582 #if defined(PETSC_USE_DEBUG) 583 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 584 #endif 585 586 PetscFunctionBegin; 587 588 for (i=0; i<m; i++) { 589 #if defined(PETSC_USE_DEBUG) 590 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 591 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 592 #endif 593 row = im[i]; 594 if (row >= rstart_orig && row < rend_orig) { 595 for (j=0; j<n; j++) { 596 col = in[j]; 597 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 598 /* Look up PetscInto the Hash Table */ 599 key = (row/bs)*Nbs+(col/bs)+1; 600 h1 = HASH(size,key,tmp); 601 602 603 idx = h1; 604 #if defined(PETSC_USE_DEBUG) 605 insert_ct++; 606 total_ct++; 607 if (HT[idx] != key) { 608 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 609 if (idx == size) { 610 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 611 if (idx == h1) { 612 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 613 } 614 } 615 } 616 #else 617 if (HT[idx] != key) { 618 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 619 if (idx == size) { 620 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 621 if (idx == h1) { 622 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 623 } 624 } 625 } 626 #endif 627 /* A HASH table entry is found, so insert the values at the correct address */ 628 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 629 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 630 } 631 } else { 632 if (!baij->donotstash) { 633 if (roworiented) { 634 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 635 } else { 636 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 637 } 638 } 639 } 640 } 641 #if defined(PETSC_USE_DEBUG) 642 baij->ht_total_ct = total_ct; 643 baij->ht_insert_ct = insert_ct; 644 #endif 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 651 { 652 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 653 PetscTruth roworiented = baij->roworiented; 654 PetscErrorCode ierr; 655 PetscInt i,j,ii,jj,row,col; 656 PetscInt rstart=baij->rstart ; 657 PetscInt rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2; 658 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 659 PetscReal tmp; 660 MatScalar **HD = baij->hd,*baij_a; 661 const MatScalar *v_t,*value; 662 #if defined(PETSC_USE_DEBUG) 663 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 664 #endif 665 666 PetscFunctionBegin; 667 668 if (roworiented) { 669 stepval = (n-1)*bs; 670 } else { 671 stepval = (m-1)*bs; 672 } 673 for (i=0; i<m; i++) { 674 #if defined(PETSC_USE_DEBUG) 675 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 676 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 677 #endif 678 row = im[i]; 679 v_t = v + i*bs2; 680 if (row >= rstart && row < rend) { 681 for (j=0; j<n; j++) { 682 col = in[j]; 683 684 /* Look up into the Hash Table */ 685 key = row*Nbs+col+1; 686 h1 = HASH(size,key,tmp); 687 688 idx = h1; 689 #if defined(PETSC_USE_DEBUG) 690 total_ct++; 691 insert_ct++; 692 if (HT[idx] != key) { 693 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 694 if (idx == size) { 695 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 696 if (idx == h1) { 697 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 698 } 699 } 700 } 701 #else 702 if (HT[idx] != key) { 703 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 704 if (idx == size) { 705 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 706 if (idx == h1) { 707 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 708 } 709 } 710 } 711 #endif 712 baij_a = HD[idx]; 713 if (roworiented) { 714 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 715 /* value = v + (i*(stepval+bs)+j)*bs; */ 716 value = v_t; 717 v_t += bs; 718 if (addv == ADD_VALUES) { 719 for (ii=0; ii<bs; ii++,value+=stepval) { 720 for (jj=ii; jj<bs2; jj+=bs) { 721 baij_a[jj] += *value++; 722 } 723 } 724 } else { 725 for (ii=0; ii<bs; ii++,value+=stepval) { 726 for (jj=ii; jj<bs2; jj+=bs) { 727 baij_a[jj] = *value++; 728 } 729 } 730 } 731 } else { 732 value = v + j*(stepval+bs)*bs + i*bs; 733 if (addv == ADD_VALUES) { 734 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 735 for (jj=0; jj<bs; jj++) { 736 baij_a[jj] += *value++; 737 } 738 } 739 } else { 740 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 741 for (jj=0; jj<bs; jj++) { 742 baij_a[jj] = *value++; 743 } 744 } 745 } 746 } 747 } 748 } else { 749 if (!baij->donotstash) { 750 if (roworiented) { 751 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 752 } else { 753 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 754 } 755 } 756 } 757 } 758 #if defined(PETSC_USE_DEBUG) 759 baij->ht_total_ct = total_ct; 760 baij->ht_insert_ct = insert_ct; 761 #endif 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatGetValues_MPIBAIJ" 767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 768 { 769 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 770 PetscErrorCode ierr; 771 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 772 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 773 774 PetscFunctionBegin; 775 for (i=0; i<m; i++) { 776 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 777 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 778 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 779 row = idxm[i] - bsrstart; 780 for (j=0; j<n; j++) { 781 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 782 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 783 if (idxn[j] >= bscstart && idxn[j] < bscend){ 784 col = idxn[j] - bscstart; 785 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 786 } else { 787 if (!baij->colmap) { 788 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 789 } 790 #if defined (PETSC_USE_CTABLE) 791 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 792 data --; 793 #else 794 data = baij->colmap[idxn[j]/bs]-1; 795 #endif 796 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 797 else { 798 col = data + idxn[j]%bs; 799 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 800 } 801 } 802 } 803 } else { 804 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 805 } 806 } 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatNorm_MPIBAIJ" 812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 813 { 814 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 815 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 816 PetscErrorCode ierr; 817 PetscInt i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col; 818 PetscReal sum = 0.0; 819 MatScalar *v; 820 821 PetscFunctionBegin; 822 if (baij->size == 1) { 823 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 824 } else { 825 if (type == NORM_FROBENIUS) { 826 v = amat->a; 827 nz = amat->nz*bs2; 828 for (i=0; i<nz; i++) { 829 #if defined(PETSC_USE_COMPLEX) 830 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 831 #else 832 sum += (*v)*(*v); v++; 833 #endif 834 } 835 v = bmat->a; 836 nz = bmat->nz*bs2; 837 for (i=0; i<nz; i++) { 838 #if defined(PETSC_USE_COMPLEX) 839 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 840 #else 841 sum += (*v)*(*v); v++; 842 #endif 843 } 844 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 845 *nrm = sqrt(*nrm); 846 } else if (type == NORM_1) { /* max column sum */ 847 PetscReal *tmp,*tmp2; 848 PetscInt *jj,*garray=baij->garray,cstart=baij->cstart; 849 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 850 tmp2 = tmp + mat->N; 851 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 852 v = amat->a; jj = amat->j; 853 for (i=0; i<amat->nz; i++) { 854 for (j=0; j<bs; j++){ 855 col = bs*(cstart + *jj) + j; /* column index */ 856 for (row=0; row<bs; row++){ 857 tmp[col] += PetscAbsScalar(*v); v++; 858 } 859 } 860 jj++; 861 } 862 v = bmat->a; jj = bmat->j; 863 for (i=0; i<bmat->nz; i++) { 864 for (j=0; j<bs; j++){ 865 col = bs*garray[*jj] + j; 866 for (row=0; row<bs; row++){ 867 tmp[col] += PetscAbsScalar(*v); v++; 868 } 869 } 870 jj++; 871 } 872 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 873 *nrm = 0.0; 874 for (j=0; j<mat->N; j++) { 875 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 876 } 877 ierr = PetscFree(tmp);CHKERRQ(ierr); 878 } else if (type == NORM_INFINITY) { /* max row sum */ 879 PetscReal *sums; 880 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 881 sum = 0.0; 882 for (j=0; j<amat->mbs; j++) { 883 for (row=0; row<bs; row++) sums[row] = 0.0; 884 v = amat->a + bs2*amat->i[j]; 885 nz = amat->i[j+1]-amat->i[j]; 886 for (i=0; i<nz; i++) { 887 for (col=0; col<bs; col++){ 888 for (row=0; row<bs; row++){ 889 sums[row] += PetscAbsScalar(*v); v++; 890 } 891 } 892 } 893 v = bmat->a + bs2*bmat->i[j]; 894 nz = bmat->i[j+1]-bmat->i[j]; 895 for (i=0; i<nz; i++) { 896 for (col=0; col<bs; col++){ 897 for (row=0; row<bs; row++){ 898 sums[row] += PetscAbsScalar(*v); v++; 899 } 900 } 901 } 902 for (row=0; row<bs; row++){ 903 if (sums[row] > sum) sum = sums[row]; 904 } 905 } 906 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 907 ierr = PetscFree(sums);CHKERRQ(ierr); 908 } else { 909 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 910 } 911 } 912 PetscFunctionReturn(0); 913 } 914 915 /* 916 Creates the hash table, and sets the table 917 This table is created only once. 918 If new entried need to be added to the matrix 919 then the hash table has to be destroyed and 920 recreated. 921 */ 922 #undef __FUNCT__ 923 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 924 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 925 { 926 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 927 Mat A = baij->A,B=baij->B; 928 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 929 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 930 PetscErrorCode ierr; 931 PetscInt size,bs2=baij->bs2,rstart=baij->rstart; 932 PetscInt cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 933 PetscInt *HT,key; 934 MatScalar **HD; 935 PetscReal tmp; 936 #if defined(PETSC_USE_DEBUG) 937 PetscInt ct=0,max=0; 938 #endif 939 940 PetscFunctionBegin; 941 baij->ht_size=(PetscInt)(factor*nz); 942 size = baij->ht_size; 943 944 if (baij->ht) { 945 PetscFunctionReturn(0); 946 } 947 948 /* Allocate Memory for Hash Table */ 949 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 950 baij->ht = (PetscInt*)(baij->hd + size); 951 HD = baij->hd; 952 HT = baij->ht; 953 954 955 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 956 957 958 /* Loop Over A */ 959 for (i=0; i<a->mbs; i++) { 960 for (j=ai[i]; j<ai[i+1]; j++) { 961 row = i+rstart; 962 col = aj[j]+cstart; 963 964 key = row*Nbs + col + 1; 965 h1 = HASH(size,key,tmp); 966 for (k=0; k<size; k++){ 967 if (!HT[(h1+k)%size]) { 968 HT[(h1+k)%size] = key; 969 HD[(h1+k)%size] = a->a + j*bs2; 970 break; 971 #if defined(PETSC_USE_DEBUG) 972 } else { 973 ct++; 974 #endif 975 } 976 } 977 #if defined(PETSC_USE_DEBUG) 978 if (k> max) max = k; 979 #endif 980 } 981 } 982 /* Loop Over B */ 983 for (i=0; i<b->mbs; i++) { 984 for (j=bi[i]; j<bi[i+1]; j++) { 985 row = i+rstart; 986 col = garray[bj[j]]; 987 key = row*Nbs + col + 1; 988 h1 = HASH(size,key,tmp); 989 for (k=0; k<size; k++){ 990 if (!HT[(h1+k)%size]) { 991 HT[(h1+k)%size] = key; 992 HD[(h1+k)%size] = b->a + j*bs2; 993 break; 994 #if defined(PETSC_USE_DEBUG) 995 } else { 996 ct++; 997 #endif 998 } 999 } 1000 #if defined(PETSC_USE_DEBUG) 1001 if (k> max) max = k; 1002 #endif 1003 } 1004 } 1005 1006 /* Print Summary */ 1007 #if defined(PETSC_USE_DEBUG) 1008 for (i=0,j=0; i<size; i++) { 1009 if (HT[i]) {j++;} 1010 } 1011 ierr = PetscLogInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr); 1012 #endif 1013 PetscFunctionReturn(0); 1014 } 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 1018 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 1019 { 1020 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1021 PetscErrorCode ierr; 1022 PetscInt nstash,reallocs; 1023 InsertMode addv; 1024 1025 PetscFunctionBegin; 1026 if (baij->donotstash) { 1027 PetscFunctionReturn(0); 1028 } 1029 1030 /* make sure all processors are either in INSERTMODE or ADDMODE */ 1031 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 1032 if (addv == (ADD_VALUES|INSERT_VALUES)) { 1033 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1034 } 1035 mat->insertmode = addv; /* in case this processor had no cache */ 1036 1037 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 1038 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 1039 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 1040 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 1041 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 1042 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 1043 PetscFunctionReturn(0); 1044 } 1045 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 1048 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 1049 { 1050 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 1051 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 1052 PetscErrorCode ierr; 1053 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 1054 PetscInt *row,*col,other_disassembled; 1055 PetscTruth r1,r2,r3; 1056 MatScalar *val; 1057 InsertMode addv = mat->insertmode; 1058 PetscMPIInt n; 1059 1060 PetscFunctionBegin; 1061 if (!baij->donotstash) { 1062 while (1) { 1063 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1064 if (!flg) break; 1065 1066 for (i=0; i<n;) { 1067 /* Now identify the consecutive vals belonging to the same row */ 1068 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1069 if (j < n) ncols = j-i; 1070 else ncols = n-i; 1071 /* Now assemble all these values with a single function call */ 1072 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1073 i = j; 1074 } 1075 } 1076 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1077 /* Now process the block-stash. Since the values are stashed column-oriented, 1078 set the roworiented flag to column oriented, and after MatSetValues() 1079 restore the original flags */ 1080 r1 = baij->roworiented; 1081 r2 = a->roworiented; 1082 r3 = b->roworiented; 1083 baij->roworiented = PETSC_FALSE; 1084 a->roworiented = PETSC_FALSE; 1085 b->roworiented = PETSC_FALSE; 1086 while (1) { 1087 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1088 if (!flg) break; 1089 1090 for (i=0; i<n;) { 1091 /* Now identify the consecutive vals belonging to the same row */ 1092 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1093 if (j < n) ncols = j-i; 1094 else ncols = n-i; 1095 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1096 i = j; 1097 } 1098 } 1099 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1100 baij->roworiented = r1; 1101 a->roworiented = r2; 1102 b->roworiented = r3; 1103 } 1104 1105 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1106 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1107 1108 /* determine if any processor has disassembled, if so we must 1109 also disassemble ourselfs, in order that we may reassemble. */ 1110 /* 1111 if nonzero structure of submatrix B cannot change then we know that 1112 no processor disassembled thus we can skip this stuff 1113 */ 1114 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1115 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1116 if (mat->was_assembled && !other_disassembled) { 1117 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1118 } 1119 } 1120 1121 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1122 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1123 } 1124 b->compressedrow.use = PETSC_TRUE; 1125 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1126 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1127 1128 #if defined(PETSC_USE_DEBUG) 1129 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1130 ierr = PetscLogInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr); 1131 baij->ht_total_ct = 0; 1132 baij->ht_insert_ct = 0; 1133 } 1134 #endif 1135 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1136 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1137 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1138 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1139 } 1140 1141 if (baij->rowvalues) { 1142 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1143 baij->rowvalues = 0; 1144 } 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1150 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1151 { 1152 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1153 PetscErrorCode ierr; 1154 PetscMPIInt size = baij->size,rank = baij->rank; 1155 PetscInt bs = mat->bs; 1156 PetscTruth iascii,isdraw; 1157 PetscViewer sviewer; 1158 PetscViewerFormat format; 1159 1160 PetscFunctionBegin; 1161 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1162 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1163 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1164 if (iascii) { 1165 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1166 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1167 MatInfo info; 1168 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1169 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1170 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1171 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1172 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1173 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1174 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1175 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1176 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1177 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1178 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1181 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1182 PetscFunctionReturn(0); 1183 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1184 PetscFunctionReturn(0); 1185 } 1186 } 1187 1188 if (isdraw) { 1189 PetscDraw draw; 1190 PetscTruth isnull; 1191 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1192 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1193 } 1194 1195 if (size == 1) { 1196 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1197 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1198 } else { 1199 /* assemble the entire matrix onto first processor. */ 1200 Mat A; 1201 Mat_SeqBAIJ *Aloc; 1202 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1203 MatScalar *a; 1204 1205 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1206 /* Perhaps this should be the type of mat? */ 1207 if (!rank) { 1208 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1209 } else { 1210 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 1211 } 1212 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1213 ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1214 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1215 1216 /* copy over the A part */ 1217 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1218 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1219 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1220 1221 for (i=0; i<mbs; i++) { 1222 rvals[0] = bs*(baij->rstart + i); 1223 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1224 for (j=ai[i]; j<ai[i+1]; j++) { 1225 col = (baij->cstart+aj[j])*bs; 1226 for (k=0; k<bs; k++) { 1227 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1228 col++; a += bs; 1229 } 1230 } 1231 } 1232 /* copy over the B part */ 1233 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1234 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1235 for (i=0; i<mbs; i++) { 1236 rvals[0] = bs*(baij->rstart + i); 1237 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1238 for (j=ai[i]; j<ai[i+1]; j++) { 1239 col = baij->garray[aj[j]]*bs; 1240 for (k=0; k<bs; k++) { 1241 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1242 col++; a += bs; 1243 } 1244 } 1245 } 1246 ierr = PetscFree(rvals);CHKERRQ(ierr); 1247 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1248 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1249 /* 1250 Everyone has to call to draw the matrix since the graphics waits are 1251 synchronized across all processors that share the PetscDraw object 1252 */ 1253 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1254 if (!rank) { 1255 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1256 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1257 } 1258 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1259 ierr = MatDestroy(A);CHKERRQ(ierr); 1260 } 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNCT__ 1265 #define __FUNCT__ "MatView_MPIBAIJ" 1266 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1267 { 1268 PetscErrorCode ierr; 1269 PetscTruth iascii,isdraw,issocket,isbinary; 1270 1271 PetscFunctionBegin; 1272 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1273 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1274 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1275 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1276 if (iascii || isdraw || issocket || isbinary) { 1277 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1278 } else { 1279 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1280 } 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1286 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1287 { 1288 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1289 PetscErrorCode ierr; 1290 1291 PetscFunctionBegin; 1292 #if defined(PETSC_USE_LOG) 1293 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 1294 #endif 1295 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1296 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1297 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1298 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1299 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1300 #if defined (PETSC_USE_CTABLE) 1301 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1302 #else 1303 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1304 #endif 1305 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1306 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1307 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1308 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1309 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1310 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1311 #if defined(PETSC_USE_MAT_SINGLE) 1312 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1313 #endif 1314 ierr = PetscFree(baij);CHKERRQ(ierr); 1315 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1318 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1319 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1320 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1321 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1322 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatMult_MPIBAIJ" 1328 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1329 { 1330 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1331 PetscErrorCode ierr; 1332 PetscInt nt; 1333 1334 PetscFunctionBegin; 1335 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1336 if (nt != A->n) { 1337 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1338 } 1339 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1340 if (nt != A->m) { 1341 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1342 } 1343 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1344 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1345 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1346 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1347 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1353 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1354 { 1355 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1356 PetscErrorCode ierr; 1357 1358 PetscFunctionBegin; 1359 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1360 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1361 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1362 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1368 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1369 { 1370 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1371 PetscErrorCode ierr; 1372 PetscTruth merged; 1373 1374 PetscFunctionBegin; 1375 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1376 /* do nondiagonal part */ 1377 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1378 if (!merged) { 1379 /* send it on its way */ 1380 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1381 /* do local part */ 1382 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1383 /* receive remote parts: note this assumes the values are not actually */ 1384 /* inserted in yy until the next line */ 1385 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1386 } else { 1387 /* do local part */ 1388 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1389 /* send it on its way */ 1390 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1391 /* values actually were received in the Begin() but we need to call this nop */ 1392 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1393 } 1394 PetscFunctionReturn(0); 1395 } 1396 1397 #undef __FUNCT__ 1398 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1399 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1400 { 1401 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1402 PetscErrorCode ierr; 1403 1404 PetscFunctionBegin; 1405 /* do nondiagonal part */ 1406 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1407 /* send it on its way */ 1408 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1409 /* do local part */ 1410 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1411 /* receive remote parts: note this assumes the values are not actually */ 1412 /* inserted in yy until the next line, which is true for my implementation*/ 1413 /* but is not perhaps always true. */ 1414 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1415 PetscFunctionReturn(0); 1416 } 1417 1418 /* 1419 This only works correctly for square matrices where the subblock A->A is the 1420 diagonal block 1421 */ 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1424 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1425 { 1426 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1427 PetscErrorCode ierr; 1428 1429 PetscFunctionBegin; 1430 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1431 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatScale_MPIBAIJ" 1437 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1438 { 1439 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1440 PetscErrorCode ierr; 1441 1442 PetscFunctionBegin; 1443 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1444 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 #undef __FUNCT__ 1449 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1450 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1451 { 1452 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1453 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1454 PetscErrorCode ierr; 1455 PetscInt bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1456 PetscInt nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1457 PetscInt *cmap,*idx_p,cstart = mat->cstart; 1458 1459 PetscFunctionBegin; 1460 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1461 mat->getrowactive = PETSC_TRUE; 1462 1463 if (!mat->rowvalues && (idx || v)) { 1464 /* 1465 allocate enough space to hold information from the longest row. 1466 */ 1467 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1468 PetscInt max = 1,mbs = mat->mbs,tmp; 1469 for (i=0; i<mbs; i++) { 1470 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1471 if (max < tmp) { max = tmp; } 1472 } 1473 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1474 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1475 } 1476 1477 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1478 lrow = row - brstart; 1479 1480 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1481 if (!v) {pvA = 0; pvB = 0;} 1482 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1483 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1484 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1485 nztot = nzA + nzB; 1486 1487 cmap = mat->garray; 1488 if (v || idx) { 1489 if (nztot) { 1490 /* Sort by increasing column numbers, assuming A and B already sorted */ 1491 PetscInt imark = -1; 1492 if (v) { 1493 *v = v_p = mat->rowvalues; 1494 for (i=0; i<nzB; i++) { 1495 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1496 else break; 1497 } 1498 imark = i; 1499 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1500 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1501 } 1502 if (idx) { 1503 *idx = idx_p = mat->rowindices; 1504 if (imark > -1) { 1505 for (i=0; i<imark; i++) { 1506 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1507 } 1508 } else { 1509 for (i=0; i<nzB; i++) { 1510 if (cmap[cworkB[i]/bs] < cstart) 1511 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1512 else break; 1513 } 1514 imark = i; 1515 } 1516 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1517 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1518 } 1519 } else { 1520 if (idx) *idx = 0; 1521 if (v) *v = 0; 1522 } 1523 } 1524 *nz = nztot; 1525 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1526 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1532 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1533 { 1534 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1535 1536 PetscFunctionBegin; 1537 if (!baij->getrowactive) { 1538 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1539 } 1540 baij->getrowactive = PETSC_FALSE; 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1546 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1547 { 1548 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1549 PetscErrorCode ierr; 1550 1551 PetscFunctionBegin; 1552 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1553 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1554 PetscFunctionReturn(0); 1555 } 1556 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1559 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1560 { 1561 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1562 Mat A = a->A,B = a->B; 1563 PetscErrorCode ierr; 1564 PetscReal isend[5],irecv[5]; 1565 1566 PetscFunctionBegin; 1567 info->block_size = (PetscReal)matin->bs; 1568 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1569 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1570 isend[3] = info->memory; isend[4] = info->mallocs; 1571 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1572 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1573 isend[3] += info->memory; isend[4] += info->mallocs; 1574 if (flag == MAT_LOCAL) { 1575 info->nz_used = isend[0]; 1576 info->nz_allocated = isend[1]; 1577 info->nz_unneeded = isend[2]; 1578 info->memory = isend[3]; 1579 info->mallocs = isend[4]; 1580 } else if (flag == MAT_GLOBAL_MAX) { 1581 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1582 info->nz_used = irecv[0]; 1583 info->nz_allocated = irecv[1]; 1584 info->nz_unneeded = irecv[2]; 1585 info->memory = irecv[3]; 1586 info->mallocs = irecv[4]; 1587 } else if (flag == MAT_GLOBAL_SUM) { 1588 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1589 info->nz_used = irecv[0]; 1590 info->nz_allocated = irecv[1]; 1591 info->nz_unneeded = irecv[2]; 1592 info->memory = irecv[3]; 1593 info->mallocs = irecv[4]; 1594 } else { 1595 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1596 } 1597 info->rows_global = (PetscReal)A->M; 1598 info->columns_global = (PetscReal)A->N; 1599 info->rows_local = (PetscReal)A->m; 1600 info->columns_local = (PetscReal)A->N; 1601 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1602 info->fill_ratio_needed = 0; 1603 info->factor_mallocs = 0; 1604 PetscFunctionReturn(0); 1605 } 1606 1607 #undef __FUNCT__ 1608 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1609 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1610 { 1611 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1612 PetscErrorCode ierr; 1613 1614 PetscFunctionBegin; 1615 switch (op) { 1616 case MAT_NO_NEW_NONZERO_LOCATIONS: 1617 case MAT_YES_NEW_NONZERO_LOCATIONS: 1618 case MAT_COLUMNS_UNSORTED: 1619 case MAT_COLUMNS_SORTED: 1620 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1621 case MAT_KEEP_ZEROED_ROWS: 1622 case MAT_NEW_NONZERO_LOCATION_ERR: 1623 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1624 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1625 break; 1626 case MAT_ROW_ORIENTED: 1627 a->roworiented = PETSC_TRUE; 1628 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1629 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1630 break; 1631 case MAT_ROWS_SORTED: 1632 case MAT_ROWS_UNSORTED: 1633 case MAT_YES_NEW_DIAGONALS: 1634 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1635 break; 1636 case MAT_COLUMN_ORIENTED: 1637 a->roworiented = PETSC_FALSE; 1638 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1639 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1640 break; 1641 case MAT_IGNORE_OFF_PROC_ENTRIES: 1642 a->donotstash = PETSC_TRUE; 1643 break; 1644 case MAT_NO_NEW_DIAGONALS: 1645 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1646 case MAT_USE_HASH_TABLE: 1647 a->ht_flag = PETSC_TRUE; 1648 break; 1649 case MAT_SYMMETRIC: 1650 case MAT_STRUCTURALLY_SYMMETRIC: 1651 case MAT_HERMITIAN: 1652 case MAT_SYMMETRY_ETERNAL: 1653 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1654 break; 1655 case MAT_NOT_SYMMETRIC: 1656 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1657 case MAT_NOT_HERMITIAN: 1658 case MAT_NOT_SYMMETRY_ETERNAL: 1659 break; 1660 default: 1661 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1662 } 1663 PetscFunctionReturn(0); 1664 } 1665 1666 #undef __FUNCT__ 1667 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1668 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1669 { 1670 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1671 Mat_SeqBAIJ *Aloc; 1672 Mat B; 1673 PetscErrorCode ierr; 1674 PetscInt M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1675 PetscInt bs=A->bs,mbs=baij->mbs; 1676 MatScalar *a; 1677 1678 PetscFunctionBegin; 1679 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1680 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1681 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1682 ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1683 1684 /* copy over the A part */ 1685 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1686 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1687 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1688 1689 for (i=0; i<mbs; i++) { 1690 rvals[0] = bs*(baij->rstart + i); 1691 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1692 for (j=ai[i]; j<ai[i+1]; j++) { 1693 col = (baij->cstart+aj[j])*bs; 1694 for (k=0; k<bs; k++) { 1695 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1696 col++; a += bs; 1697 } 1698 } 1699 } 1700 /* copy over the B part */ 1701 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1702 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1703 for (i=0; i<mbs; i++) { 1704 rvals[0] = bs*(baij->rstart + i); 1705 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1706 for (j=ai[i]; j<ai[i+1]; j++) { 1707 col = baij->garray[aj[j]]*bs; 1708 for (k=0; k<bs; k++) { 1709 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1710 col++; a += bs; 1711 } 1712 } 1713 } 1714 ierr = PetscFree(rvals);CHKERRQ(ierr); 1715 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1716 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1717 1718 if (matout) { 1719 *matout = B; 1720 } else { 1721 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1722 } 1723 PetscFunctionReturn(0); 1724 } 1725 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1728 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1729 { 1730 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1731 Mat a = baij->A,b = baij->B; 1732 PetscErrorCode ierr; 1733 PetscInt s1,s2,s3; 1734 1735 PetscFunctionBegin; 1736 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1737 if (rr) { 1738 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1739 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1740 /* Overlap communication with computation. */ 1741 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1742 } 1743 if (ll) { 1744 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1745 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1746 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1747 } 1748 /* scale the diagonal block */ 1749 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1750 1751 if (rr) { 1752 /* Do a scatter end and then right scale the off-diagonal block */ 1753 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1754 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1755 } 1756 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1762 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1763 { 1764 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1765 PetscErrorCode ierr; 1766 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1767 PetscInt i,N,*rows,*owners = l->rowners; 1768 PetscInt *nprocs,j,idx,nsends,row; 1769 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1770 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1771 PetscInt *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs; 1772 MPI_Comm comm = A->comm; 1773 MPI_Request *send_waits,*recv_waits; 1774 MPI_Status recv_status,*send_status; 1775 IS istmp; 1776 #if defined(PETSC_DEBUG) 1777 PetscTruth found = PETSC_FALSE; 1778 #endif 1779 1780 PetscFunctionBegin; 1781 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1782 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1783 1784 /* first count number of contributors to each processor */ 1785 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1786 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1787 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1788 j = 0; 1789 for (i=0; i<N; i++) { 1790 if (lastidx > (idx = rows[i])) j = 0; 1791 lastidx = idx; 1792 for (; j<size; j++) { 1793 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1794 nprocs[2*j]++; 1795 nprocs[2*j+1] = 1; 1796 owner[i] = j; 1797 #if defined(PETSC_DEBUG) 1798 found = PETSC_TRUE; 1799 #endif 1800 break; 1801 } 1802 } 1803 #if defined(PETSC_DEBUG) 1804 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1805 found = PETSC_FALSE; 1806 #endif 1807 } 1808 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1809 1810 /* inform other processors of number of messages and max length*/ 1811 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1812 1813 /* post receives: */ 1814 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1815 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1816 for (i=0; i<nrecvs; i++) { 1817 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1818 } 1819 1820 /* do sends: 1821 1) starts[i] gives the starting index in svalues for stuff going to 1822 the ith processor 1823 */ 1824 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1825 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1826 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1827 starts[0] = 0; 1828 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1829 for (i=0; i<N; i++) { 1830 svalues[starts[owner[i]]++] = rows[i]; 1831 } 1832 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1833 1834 starts[0] = 0; 1835 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1836 count = 0; 1837 for (i=0; i<size; i++) { 1838 if (nprocs[2*i+1]) { 1839 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1840 } 1841 } 1842 ierr = PetscFree(starts);CHKERRQ(ierr); 1843 1844 base = owners[rank]*bs; 1845 1846 /* wait on receives */ 1847 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1848 source = lens + nrecvs; 1849 count = nrecvs; slen = 0; 1850 while (count) { 1851 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1852 /* unpack receives into our local space */ 1853 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1854 source[imdex] = recv_status.MPI_SOURCE; 1855 lens[imdex] = n; 1856 slen += n; 1857 count--; 1858 } 1859 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1860 1861 /* move the data into the send scatter */ 1862 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1863 count = 0; 1864 for (i=0; i<nrecvs; i++) { 1865 values = rvalues + i*nmax; 1866 for (j=0; j<lens[i]; j++) { 1867 lrows[count++] = values[j] - base; 1868 } 1869 } 1870 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1871 ierr = PetscFree(lens);CHKERRQ(ierr); 1872 ierr = PetscFree(owner);CHKERRQ(ierr); 1873 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1874 1875 /* actually zap the local rows */ 1876 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1877 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 1878 1879 /* 1880 Zero the required rows. If the "diagonal block" of the matrix 1881 is square and the user wishes to set the diagonal we use seperate 1882 code so that MatSetValues() is not called for each diagonal allocating 1883 new memory, thus calling lots of mallocs and slowing things down. 1884 1885 Contributed by: Mathew Knepley 1886 */ 1887 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1888 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1889 if (diag && (l->A->M == l->A->N)) { 1890 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1891 } else if (diag) { 1892 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1893 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1894 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1895 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1896 } 1897 for (i=0; i<slen; i++) { 1898 row = lrows[i] + rstart_bs; 1899 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1900 } 1901 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1902 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1903 } else { 1904 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1905 } 1906 1907 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1908 ierr = PetscFree(lrows);CHKERRQ(ierr); 1909 1910 /* wait on sends */ 1911 if (nsends) { 1912 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1913 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1914 ierr = PetscFree(send_status);CHKERRQ(ierr); 1915 } 1916 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1917 ierr = PetscFree(svalues);CHKERRQ(ierr); 1918 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1924 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1925 { 1926 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1927 MPI_Comm comm = A->comm; 1928 static PetscTruth called = PETSC_FALSE; 1929 PetscErrorCode ierr; 1930 1931 PetscFunctionBegin; 1932 if (!a->rank) { 1933 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1934 } 1935 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1936 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1937 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1943 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1944 { 1945 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1946 PetscErrorCode ierr; 1947 1948 PetscFunctionBegin; 1949 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1950 PetscFunctionReturn(0); 1951 } 1952 1953 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1954 1955 #undef __FUNCT__ 1956 #define __FUNCT__ "MatEqual_MPIBAIJ" 1957 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1958 { 1959 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1960 Mat a,b,c,d; 1961 PetscTruth flg; 1962 PetscErrorCode ierr; 1963 1964 PetscFunctionBegin; 1965 a = matA->A; b = matA->B; 1966 c = matB->A; d = matB->B; 1967 1968 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1969 if (flg) { 1970 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1971 } 1972 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1973 PetscFunctionReturn(0); 1974 } 1975 1976 1977 #undef __FUNCT__ 1978 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1979 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1980 { 1981 PetscErrorCode ierr; 1982 1983 PetscFunctionBegin; 1984 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1985 PetscFunctionReturn(0); 1986 } 1987 1988 /* -------------------------------------------------------------------*/ 1989 static struct _MatOps MatOps_Values = { 1990 MatSetValues_MPIBAIJ, 1991 MatGetRow_MPIBAIJ, 1992 MatRestoreRow_MPIBAIJ, 1993 MatMult_MPIBAIJ, 1994 /* 4*/ MatMultAdd_MPIBAIJ, 1995 MatMultTranspose_MPIBAIJ, 1996 MatMultTransposeAdd_MPIBAIJ, 1997 0, 1998 0, 1999 0, 2000 /*10*/ 0, 2001 0, 2002 0, 2003 0, 2004 MatTranspose_MPIBAIJ, 2005 /*15*/ MatGetInfo_MPIBAIJ, 2006 MatEqual_MPIBAIJ, 2007 MatGetDiagonal_MPIBAIJ, 2008 MatDiagonalScale_MPIBAIJ, 2009 MatNorm_MPIBAIJ, 2010 /*20*/ MatAssemblyBegin_MPIBAIJ, 2011 MatAssemblyEnd_MPIBAIJ, 2012 0, 2013 MatSetOption_MPIBAIJ, 2014 MatZeroEntries_MPIBAIJ, 2015 /*25*/ MatZeroRows_MPIBAIJ, 2016 0, 2017 0, 2018 0, 2019 0, 2020 /*30*/ MatSetUpPreallocation_MPIBAIJ, 2021 0, 2022 0, 2023 0, 2024 0, 2025 /*35*/ MatDuplicate_MPIBAIJ, 2026 0, 2027 0, 2028 0, 2029 0, 2030 /*40*/ 0, 2031 MatGetSubMatrices_MPIBAIJ, 2032 MatIncreaseOverlap_MPIBAIJ, 2033 MatGetValues_MPIBAIJ, 2034 0, 2035 /*45*/ MatPrintHelp_MPIBAIJ, 2036 MatScale_MPIBAIJ, 2037 0, 2038 0, 2039 0, 2040 /*50*/ 0, 2041 0, 2042 0, 2043 0, 2044 0, 2045 /*55*/ 0, 2046 0, 2047 MatSetUnfactored_MPIBAIJ, 2048 0, 2049 MatSetValuesBlocked_MPIBAIJ, 2050 /*60*/ 0, 2051 MatDestroy_MPIBAIJ, 2052 MatView_MPIBAIJ, 2053 MatGetPetscMaps_Petsc, 2054 0, 2055 /*65*/ 0, 2056 0, 2057 0, 2058 0, 2059 0, 2060 /*70*/ MatGetRowMax_MPIBAIJ, 2061 0, 2062 0, 2063 0, 2064 0, 2065 /*75*/ 0, 2066 0, 2067 0, 2068 0, 2069 0, 2070 /*80*/ 0, 2071 0, 2072 0, 2073 0, 2074 MatLoad_MPIBAIJ, 2075 /*85*/ 0, 2076 0, 2077 0, 2078 0, 2079 0, 2080 /*90*/ 0, 2081 0, 2082 0, 2083 0, 2084 0, 2085 /*95*/ 0, 2086 0, 2087 0, 2088 0}; 2089 2090 2091 EXTERN_C_BEGIN 2092 #undef __FUNCT__ 2093 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2094 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2095 { 2096 PetscFunctionBegin; 2097 *a = ((Mat_MPIBAIJ *)A->data)->A; 2098 *iscopy = PETSC_FALSE; 2099 PetscFunctionReturn(0); 2100 } 2101 EXTERN_C_END 2102 2103 EXTERN_C_BEGIN 2104 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 2105 EXTERN_C_END 2106 2107 #undef __FUNCT__ 2108 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2109 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2110 { 2111 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2112 PetscInt m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2113 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2114 const PetscInt *JJ; 2115 PetscScalar *values; 2116 PetscErrorCode ierr; 2117 2118 PetscFunctionBegin; 2119 #if defined(PETSC_OPT_g) 2120 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2121 #endif 2122 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2123 o_nnz = d_nnz + m; 2124 2125 for (i=0; i<m; i++) { 2126 nnz = I[i+1]- I[i]; 2127 JJ = J + I[i]; 2128 nnz_max = PetscMax(nnz_max,nnz); 2129 #if defined(PETSC_OPT_g) 2130 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2131 #endif 2132 for (j=0; j<nnz; j++) { 2133 if (*JJ >= cstart) break; 2134 JJ++; 2135 } 2136 d = 0; 2137 for (; j<nnz; j++) { 2138 if (*JJ++ >= cend) break; 2139 d++; 2140 } 2141 d_nnz[i] = d; 2142 o_nnz[i] = nnz - d; 2143 } 2144 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2145 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2146 2147 if (v) values = (PetscScalar*)v; 2148 else { 2149 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2150 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2151 } 2152 2153 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2154 for (i=0; i<m; i++) { 2155 ii = i + rstart; 2156 nnz = I[i+1]- I[i]; 2157 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2158 } 2159 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2160 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2161 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2162 2163 if (!v) { 2164 ierr = PetscFree(values);CHKERRQ(ierr); 2165 } 2166 PetscFunctionReturn(0); 2167 } 2168 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2171 /*@C 2172 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2173 (the default parallel PETSc format). 2174 2175 Collective on MPI_Comm 2176 2177 Input Parameters: 2178 + A - the matrix 2179 . i - the indices into j for the start of each local row (starts with zero) 2180 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2181 - v - optional values in the matrix 2182 2183 Level: developer 2184 2185 .keywords: matrix, aij, compressed row, sparse, parallel 2186 2187 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2188 @*/ 2189 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2190 { 2191 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2192 2193 PetscFunctionBegin; 2194 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2195 if (f) { 2196 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2197 } 2198 PetscFunctionReturn(0); 2199 } 2200 2201 EXTERN_C_BEGIN 2202 #undef __FUNCT__ 2203 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2204 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2205 { 2206 Mat_MPIBAIJ *b; 2207 PetscErrorCode ierr; 2208 PetscInt i; 2209 2210 PetscFunctionBegin; 2211 B->preallocated = PETSC_TRUE; 2212 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2213 2214 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2215 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2216 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2217 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2218 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2219 if (d_nnz) { 2220 for (i=0; i<B->m/bs; i++) { 2221 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2222 } 2223 } 2224 if (o_nnz) { 2225 for (i=0; i<B->m/bs; i++) { 2226 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2227 } 2228 } 2229 2230 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2231 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2232 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2233 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2234 2235 b = (Mat_MPIBAIJ*)B->data; 2236 B->bs = bs; 2237 b->bs2 = bs*bs; 2238 b->mbs = B->m/bs; 2239 b->nbs = B->n/bs; 2240 b->Mbs = B->M/bs; 2241 b->Nbs = B->N/bs; 2242 2243 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2244 b->rowners[0] = 0; 2245 for (i=2; i<=b->size; i++) { 2246 b->rowners[i] += b->rowners[i-1]; 2247 } 2248 b->rstart = b->rowners[b->rank]; 2249 b->rend = b->rowners[b->rank+1]; 2250 2251 ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2252 b->cowners[0] = 0; 2253 for (i=2; i<=b->size; i++) { 2254 b->cowners[i] += b->cowners[i-1]; 2255 } 2256 b->cstart = b->cowners[b->rank]; 2257 b->cend = b->cowners[b->rank+1]; 2258 2259 for (i=0; i<=b->size; i++) { 2260 b->rowners_bs[i] = b->rowners[i]*bs; 2261 } 2262 b->rstart_bs = b->rstart*bs; 2263 b->rend_bs = b->rend*bs; 2264 b->cstart_bs = b->cstart*bs; 2265 b->cend_bs = b->cend*bs; 2266 2267 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2268 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2269 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2270 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2271 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2272 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2273 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2274 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2275 2276 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2277 2278 PetscFunctionReturn(0); 2279 } 2280 EXTERN_C_END 2281 2282 EXTERN_C_BEGIN 2283 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2284 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2285 EXTERN_C_END 2286 2287 /*MC 2288 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2289 2290 Options Database Keys: 2291 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2292 2293 Level: beginner 2294 2295 .seealso: MatCreateMPIBAIJ 2296 M*/ 2297 2298 EXTERN_C_BEGIN 2299 #undef __FUNCT__ 2300 #define __FUNCT__ "MatCreate_MPIBAIJ" 2301 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2302 { 2303 Mat_MPIBAIJ *b; 2304 PetscErrorCode ierr; 2305 PetscTruth flg; 2306 2307 PetscFunctionBegin; 2308 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2309 B->data = (void*)b; 2310 2311 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2312 B->mapping = 0; 2313 B->factor = 0; 2314 B->assembled = PETSC_FALSE; 2315 2316 B->insertmode = NOT_SET_VALUES; 2317 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2318 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2319 2320 /* build local table of row and column ownerships */ 2321 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 2322 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2323 b->cowners = b->rowners + b->size + 2; 2324 b->rowners_bs = b->cowners + b->size + 2; 2325 2326 /* build cache for off array entries formed */ 2327 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2328 b->donotstash = PETSC_FALSE; 2329 b->colmap = PETSC_NULL; 2330 b->garray = PETSC_NULL; 2331 b->roworiented = PETSC_TRUE; 2332 2333 #if defined(PETSC_USE_MAT_SINGLE) 2334 /* stuff for MatSetValues_XXX in single precision */ 2335 b->setvalueslen = 0; 2336 b->setvaluescopy = PETSC_NULL; 2337 #endif 2338 2339 /* stuff used in block assembly */ 2340 b->barray = 0; 2341 2342 /* stuff used for matrix vector multiply */ 2343 b->lvec = 0; 2344 b->Mvctx = 0; 2345 2346 /* stuff for MatGetRow() */ 2347 b->rowindices = 0; 2348 b->rowvalues = 0; 2349 b->getrowactive = PETSC_FALSE; 2350 2351 /* hash table stuff */ 2352 b->ht = 0; 2353 b->hd = 0; 2354 b->ht_size = 0; 2355 b->ht_flag = PETSC_FALSE; 2356 b->ht_fact = 0; 2357 b->ht_total_ct = 0; 2358 b->ht_insert_ct = 0; 2359 2360 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2361 if (flg) { 2362 PetscReal fact = 1.39; 2363 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2364 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2365 if (fact <= 1.0) fact = 1.39; 2366 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2367 ierr = PetscLogInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 2368 } 2369 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2370 "MatStoreValues_MPIBAIJ", 2371 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2372 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2373 "MatRetrieveValues_MPIBAIJ", 2374 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2375 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2376 "MatGetDiagonalBlock_MPIBAIJ", 2377 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2378 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2379 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2380 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2381 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2382 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2383 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2384 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2385 "MatDiagonalScaleLocal_MPIBAIJ", 2386 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2387 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2388 "MatSetHashTableFactor_MPIBAIJ", 2389 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2390 PetscFunctionReturn(0); 2391 } 2392 EXTERN_C_END 2393 2394 /*MC 2395 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2396 2397 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2398 and MATMPIBAIJ otherwise. 2399 2400 Options Database Keys: 2401 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2402 2403 Level: beginner 2404 2405 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2406 M*/ 2407 2408 EXTERN_C_BEGIN 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatCreate_BAIJ" 2411 PetscErrorCode MatCreate_BAIJ(Mat A) 2412 { 2413 PetscErrorCode ierr; 2414 PetscMPIInt size; 2415 2416 PetscFunctionBegin; 2417 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2418 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2419 if (size == 1) { 2420 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2421 } else { 2422 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2423 } 2424 PetscFunctionReturn(0); 2425 } 2426 EXTERN_C_END 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2430 /*@C 2431 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2432 (block compressed row). For good matrix assembly performance 2433 the user should preallocate the matrix storage by setting the parameters 2434 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2435 performance can be increased by more than a factor of 50. 2436 2437 Collective on Mat 2438 2439 Input Parameters: 2440 + A - the matrix 2441 . bs - size of blockk 2442 . d_nz - number of block nonzeros per block row in diagonal portion of local 2443 submatrix (same for all local rows) 2444 . d_nnz - array containing the number of block nonzeros in the various block rows 2445 of the in diagonal portion of the local (possibly different for each block 2446 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2447 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2448 submatrix (same for all local rows). 2449 - o_nnz - array containing the number of nonzeros in the various block rows of the 2450 off-diagonal portion of the local submatrix (possibly different for 2451 each block row) or PETSC_NULL. 2452 2453 If the *_nnz parameter is given then the *_nz parameter is ignored 2454 2455 Options Database Keys: 2456 . -mat_no_unroll - uses code that does not unroll the loops in the 2457 block calculations (much slower) 2458 . -mat_block_size - size of the blocks to use 2459 2460 Notes: 2461 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2462 than it must be used on all processors that share the object for that argument. 2463 2464 Storage Information: 2465 For a square global matrix we define each processor's diagonal portion 2466 to be its local rows and the corresponding columns (a square submatrix); 2467 each processor's off-diagonal portion encompasses the remainder of the 2468 local matrix (a rectangular submatrix). 2469 2470 The user can specify preallocated storage for the diagonal part of 2471 the local submatrix with either d_nz or d_nnz (not both). Set 2472 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2473 memory allocation. Likewise, specify preallocated storage for the 2474 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2475 2476 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2477 the figure below we depict these three local rows and all columns (0-11). 2478 2479 .vb 2480 0 1 2 3 4 5 6 7 8 9 10 11 2481 ------------------- 2482 row 3 | o o o d d d o o o o o o 2483 row 4 | o o o d d d o o o o o o 2484 row 5 | o o o d d d o o o o o o 2485 ------------------- 2486 .ve 2487 2488 Thus, any entries in the d locations are stored in the d (diagonal) 2489 submatrix, and any entries in the o locations are stored in the 2490 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2491 stored simply in the MATSEQBAIJ format for compressed row storage. 2492 2493 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2494 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2495 In general, for PDE problems in which most nonzeros are near the diagonal, 2496 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2497 or you will get TERRIBLE performance; see the users' manual chapter on 2498 matrices. 2499 2500 Level: intermediate 2501 2502 .keywords: matrix, block, aij, compressed row, sparse, parallel 2503 2504 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2505 @*/ 2506 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2507 { 2508 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2509 2510 PetscFunctionBegin; 2511 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2512 if (f) { 2513 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2514 } 2515 PetscFunctionReturn(0); 2516 } 2517 2518 #undef __FUNCT__ 2519 #define __FUNCT__ "MatCreateMPIBAIJ" 2520 /*@C 2521 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2522 (block compressed row). For good matrix assembly performance 2523 the user should preallocate the matrix storage by setting the parameters 2524 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2525 performance can be increased by more than a factor of 50. 2526 2527 Collective on MPI_Comm 2528 2529 Input Parameters: 2530 + comm - MPI communicator 2531 . bs - size of blockk 2532 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2533 This value should be the same as the local size used in creating the 2534 y vector for the matrix-vector product y = Ax. 2535 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2536 This value should be the same as the local size used in creating the 2537 x vector for the matrix-vector product y = Ax. 2538 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2539 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2540 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2541 submatrix (same for all local rows) 2542 . d_nnz - array containing the number of nonzero blocks in the various block rows 2543 of the in diagonal portion of the local (possibly different for each block 2544 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2545 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2546 submatrix (same for all local rows). 2547 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2548 off-diagonal portion of the local submatrix (possibly different for 2549 each block row) or PETSC_NULL. 2550 2551 Output Parameter: 2552 . A - the matrix 2553 2554 Options Database Keys: 2555 . -mat_no_unroll - uses code that does not unroll the loops in the 2556 block calculations (much slower) 2557 . -mat_block_size - size of the blocks to use 2558 2559 Notes: 2560 If the *_nnz parameter is given then the *_nz parameter is ignored 2561 2562 A nonzero block is any block that as 1 or more nonzeros in it 2563 2564 The user MUST specify either the local or global matrix dimensions 2565 (possibly both). 2566 2567 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2568 than it must be used on all processors that share the object for that argument. 2569 2570 Storage Information: 2571 For a square global matrix we define each processor's diagonal portion 2572 to be its local rows and the corresponding columns (a square submatrix); 2573 each processor's off-diagonal portion encompasses the remainder of the 2574 local matrix (a rectangular submatrix). 2575 2576 The user can specify preallocated storage for the diagonal part of 2577 the local submatrix with either d_nz or d_nnz (not both). Set 2578 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2579 memory allocation. Likewise, specify preallocated storage for the 2580 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2581 2582 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2583 the figure below we depict these three local rows and all columns (0-11). 2584 2585 .vb 2586 0 1 2 3 4 5 6 7 8 9 10 11 2587 ------------------- 2588 row 3 | o o o d d d o o o o o o 2589 row 4 | o o o d d d o o o o o o 2590 row 5 | o o o d d d o o o o o o 2591 ------------------- 2592 .ve 2593 2594 Thus, any entries in the d locations are stored in the d (diagonal) 2595 submatrix, and any entries in the o locations are stored in the 2596 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2597 stored simply in the MATSEQBAIJ format for compressed row storage. 2598 2599 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2600 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2601 In general, for PDE problems in which most nonzeros are near the diagonal, 2602 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2603 or you will get TERRIBLE performance; see the users' manual chapter on 2604 matrices. 2605 2606 Level: intermediate 2607 2608 .keywords: matrix, block, aij, compressed row, sparse, parallel 2609 2610 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2611 @*/ 2612 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2613 { 2614 PetscErrorCode ierr; 2615 PetscMPIInt size; 2616 2617 PetscFunctionBegin; 2618 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2619 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2620 if (size > 1) { 2621 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2622 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2623 } else { 2624 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2625 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2626 } 2627 PetscFunctionReturn(0); 2628 } 2629 2630 #undef __FUNCT__ 2631 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2632 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2633 { 2634 Mat mat; 2635 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2636 PetscErrorCode ierr; 2637 PetscInt len=0; 2638 2639 PetscFunctionBegin; 2640 *newmat = 0; 2641 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2642 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2643 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2644 2645 mat->factor = matin->factor; 2646 mat->preallocated = PETSC_TRUE; 2647 mat->assembled = PETSC_TRUE; 2648 mat->insertmode = NOT_SET_VALUES; 2649 2650 a = (Mat_MPIBAIJ*)mat->data; 2651 mat->bs = matin->bs; 2652 a->bs2 = oldmat->bs2; 2653 a->mbs = oldmat->mbs; 2654 a->nbs = oldmat->nbs; 2655 a->Mbs = oldmat->Mbs; 2656 a->Nbs = oldmat->Nbs; 2657 2658 a->rstart = oldmat->rstart; 2659 a->rend = oldmat->rend; 2660 a->cstart = oldmat->cstart; 2661 a->cend = oldmat->cend; 2662 a->size = oldmat->size; 2663 a->rank = oldmat->rank; 2664 a->donotstash = oldmat->donotstash; 2665 a->roworiented = oldmat->roworiented; 2666 a->rowindices = 0; 2667 a->rowvalues = 0; 2668 a->getrowactive = PETSC_FALSE; 2669 a->barray = 0; 2670 a->rstart_bs = oldmat->rstart_bs; 2671 a->rend_bs = oldmat->rend_bs; 2672 a->cstart_bs = oldmat->cstart_bs; 2673 a->cend_bs = oldmat->cend_bs; 2674 2675 /* hash table stuff */ 2676 a->ht = 0; 2677 a->hd = 0; 2678 a->ht_size = 0; 2679 a->ht_flag = oldmat->ht_flag; 2680 a->ht_fact = oldmat->ht_fact; 2681 a->ht_total_ct = 0; 2682 a->ht_insert_ct = 0; 2683 2684 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2685 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2686 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 2687 if (oldmat->colmap) { 2688 #if defined (PETSC_USE_CTABLE) 2689 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2690 #else 2691 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2692 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2693 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2694 #endif 2695 } else a->colmap = 0; 2696 2697 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2698 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2699 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2700 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2701 } else a->garray = 0; 2702 2703 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2704 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2705 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2706 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2707 2708 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2709 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2710 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2711 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2712 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2713 *newmat = mat; 2714 2715 PetscFunctionReturn(0); 2716 } 2717 2718 #include "petscsys.h" 2719 2720 #undef __FUNCT__ 2721 #define __FUNCT__ "MatLoad_MPIBAIJ" 2722 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2723 { 2724 Mat A; 2725 PetscErrorCode ierr; 2726 int fd; 2727 PetscInt i,nz,j,rstart,rend; 2728 PetscScalar *vals,*buf; 2729 MPI_Comm comm = ((PetscObject)viewer)->comm; 2730 MPI_Status status; 2731 PetscMPIInt rank,size,maxnz; 2732 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2733 PetscInt *locrowlens,*procsnz = 0,*browners; 2734 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2735 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2736 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2737 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2738 2739 PetscFunctionBegin; 2740 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2741 2742 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2743 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2744 if (!rank) { 2745 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2746 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2747 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2748 } 2749 2750 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2751 M = header[1]; N = header[2]; 2752 2753 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2754 2755 /* 2756 This code adds extra rows to make sure the number of rows is 2757 divisible by the blocksize 2758 */ 2759 Mbs = M/bs; 2760 extra_rows = bs - M + bs*Mbs; 2761 if (extra_rows == bs) extra_rows = 0; 2762 else Mbs++; 2763 if (extra_rows && !rank) { 2764 ierr = PetscLogInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2765 } 2766 2767 /* determine ownership of all rows */ 2768 mbs = Mbs/size + ((Mbs % size) > rank); 2769 m = mbs*bs; 2770 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2771 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2772 2773 /* process 0 needs enough room for process with most rows */ 2774 if (!rank) { 2775 mmax = rowners[1]; 2776 for (i=2; i<size; i++) { 2777 mmax = PetscMax(mmax,rowners[i]); 2778 } 2779 mmax*=bs; 2780 } else mmax = m; 2781 2782 rowners[0] = 0; 2783 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2784 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2785 rstart = rowners[rank]; 2786 rend = rowners[rank+1]; 2787 2788 /* distribute row lengths to all processors */ 2789 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2790 if (!rank) { 2791 mend = m; 2792 if (size == 1) mend = mend - extra_rows; 2793 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2794 for (j=mend; j<m; j++) locrowlens[j] = 1; 2795 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2796 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2797 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2798 for (j=0; j<m; j++) { 2799 procsnz[0] += locrowlens[j]; 2800 } 2801 for (i=1; i<size; i++) { 2802 mend = browners[i+1] - browners[i]; 2803 if (i == size-1) mend = mend - extra_rows; 2804 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2805 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2806 /* calculate the number of nonzeros on each processor */ 2807 for (j=0; j<browners[i+1]-browners[i]; j++) { 2808 procsnz[i] += rowlengths[j]; 2809 } 2810 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2811 } 2812 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2813 } else { 2814 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2815 } 2816 2817 if (!rank) { 2818 /* determine max buffer needed and allocate it */ 2819 maxnz = procsnz[0]; 2820 for (i=1; i<size; i++) { 2821 maxnz = PetscMax(maxnz,procsnz[i]); 2822 } 2823 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2824 2825 /* read in my part of the matrix column indices */ 2826 nz = procsnz[0]; 2827 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2828 mycols = ibuf; 2829 if (size == 1) nz -= extra_rows; 2830 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2831 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2832 2833 /* read in every ones (except the last) and ship off */ 2834 for (i=1; i<size-1; i++) { 2835 nz = procsnz[i]; 2836 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2837 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2838 } 2839 /* read in the stuff for the last proc */ 2840 if (size != 1) { 2841 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2842 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2843 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2844 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2845 } 2846 ierr = PetscFree(cols);CHKERRQ(ierr); 2847 } else { 2848 /* determine buffer space needed for message */ 2849 nz = 0; 2850 for (i=0; i<m; i++) { 2851 nz += locrowlens[i]; 2852 } 2853 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2854 mycols = ibuf; 2855 /* receive message of column indices*/ 2856 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2857 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2858 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2859 } 2860 2861 /* loop over local rows, determining number of off diagonal entries */ 2862 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2863 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2864 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2865 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2866 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2867 rowcount = 0; nzcount = 0; 2868 for (i=0; i<mbs; i++) { 2869 dcount = 0; 2870 odcount = 0; 2871 for (j=0; j<bs; j++) { 2872 kmax = locrowlens[rowcount]; 2873 for (k=0; k<kmax; k++) { 2874 tmp = mycols[nzcount++]/bs; 2875 if (!mask[tmp]) { 2876 mask[tmp] = 1; 2877 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2878 else masked1[dcount++] = tmp; 2879 } 2880 } 2881 rowcount++; 2882 } 2883 2884 dlens[i] = dcount; 2885 odlens[i] = odcount; 2886 2887 /* zero out the mask elements we set */ 2888 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2889 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2890 } 2891 2892 /* create our matrix */ 2893 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2894 ierr = MatSetType(A,type);CHKERRQ(ierr) 2895 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2896 2897 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2898 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2899 2900 if (!rank) { 2901 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2902 /* read in my part of the matrix numerical values */ 2903 nz = procsnz[0]; 2904 vals = buf; 2905 mycols = ibuf; 2906 if (size == 1) nz -= extra_rows; 2907 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2908 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2909 2910 /* insert into matrix */ 2911 jj = rstart*bs; 2912 for (i=0; i<m; i++) { 2913 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2914 mycols += locrowlens[i]; 2915 vals += locrowlens[i]; 2916 jj++; 2917 } 2918 /* read in other processors (except the last one) and ship out */ 2919 for (i=1; i<size-1; i++) { 2920 nz = procsnz[i]; 2921 vals = buf; 2922 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2923 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2924 } 2925 /* the last proc */ 2926 if (size != 1){ 2927 nz = procsnz[i] - extra_rows; 2928 vals = buf; 2929 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2930 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2931 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2932 } 2933 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2934 } else { 2935 /* receive numeric values */ 2936 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2937 2938 /* receive message of values*/ 2939 vals = buf; 2940 mycols = ibuf; 2941 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2942 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2943 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2944 2945 /* insert into matrix */ 2946 jj = rstart*bs; 2947 for (i=0; i<m; i++) { 2948 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2949 mycols += locrowlens[i]; 2950 vals += locrowlens[i]; 2951 jj++; 2952 } 2953 } 2954 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2955 ierr = PetscFree(buf);CHKERRQ(ierr); 2956 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2957 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2958 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2959 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2960 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2961 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2962 2963 *newmat = A; 2964 PetscFunctionReturn(0); 2965 } 2966 2967 #undef __FUNCT__ 2968 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2969 /*@ 2970 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2971 2972 Input Parameters: 2973 . mat - the matrix 2974 . fact - factor 2975 2976 Collective on Mat 2977 2978 Level: advanced 2979 2980 Notes: 2981 This can also be set by the command line option: -mat_use_hash_table fact 2982 2983 .keywords: matrix, hashtable, factor, HT 2984 2985 .seealso: MatSetOption() 2986 @*/ 2987 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2988 { 2989 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2990 2991 PetscFunctionBegin; 2992 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2993 if (f) { 2994 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2995 } 2996 PetscFunctionReturn(0); 2997 } 2998 2999 #undef __FUNCT__ 3000 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3001 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3002 { 3003 Mat_MPIBAIJ *baij; 3004 3005 PetscFunctionBegin; 3006 baij = (Mat_MPIBAIJ*)mat->data; 3007 baij->ht_fact = fact; 3008 PetscFunctionReturn(0); 3009 } 3010 3011 #undef __FUNCT__ 3012 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3013 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3014 { 3015 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3016 PetscFunctionBegin; 3017 *Ad = a->A; 3018 *Ao = a->B; 3019 *colmap = a->garray; 3020 PetscFunctionReturn(0); 3021 } 3022