1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 #include <petscsf.h> 5 6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 18 { 19 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt i,*idxb = 0; 22 PetscScalar *va,*vb; 23 Vec vtmp; 24 25 PetscFunctionBegin; 26 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 27 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 28 if (idx) { 29 for (i=0; i<A->rmap->n; i++) { 30 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 31 } 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++) { 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 41 va[i] = vb[i]; 42 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 43 } 44 } 45 46 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 47 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 48 ierr = PetscFree(idxb);CHKERRQ(ierr); 49 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 62 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scalable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined(PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++) { 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 110 { \ 111 \ 112 brow = row/bs; \ 113 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 114 rmax = aimax[brow]; nrow = ailen[brow]; \ 115 bcol = col/bs; \ 116 ridx = row % bs; cidx = col % bs; \ 117 low = 0; high = nrow; \ 118 while (high-low > 3) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > bcol) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > bcol) break; \ 125 if (rp[_i] == bcol) { \ 126 bap = ap + bs2*_i + bs*cidx + ridx; \ 127 if (addv == ADD_VALUES) *bap += value; \ 128 else *bap = value; \ 129 goto a_noinsert; \ 130 } \ 131 } \ 132 if (a->nonew == 1) goto a_noinsert; \ 133 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 134 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 135 N = nrow++ - 1; \ 136 /* shift up all the later entries in this row */ \ 137 for (ii=N; ii>=_i; ii--) { \ 138 rp[ii+1] = rp[ii]; \ 139 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 140 } \ 141 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 142 rp[_i] = bcol; \ 143 ap[bs2*_i + bs*cidx + ridx] = value; \ 144 a_noinsert:; \ 145 ailen[brow] = nrow; \ 146 } 147 148 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 149 { \ 150 brow = row/bs; \ 151 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152 rmax = bimax[brow]; nrow = bilen[brow]; \ 153 bcol = col/bs; \ 154 ridx = row % bs; cidx = col % bs; \ 155 low = 0; high = nrow; \ 156 while (high-low > 3) { \ 157 t = (low+high)/2; \ 158 if (rp[t] > bcol) high = t; \ 159 else low = t; \ 160 } \ 161 for (_i=low; _i<high; _i++) { \ 162 if (rp[_i] > bcol) break; \ 163 if (rp[_i] == bcol) { \ 164 bap = ap + bs2*_i + bs*cidx + ridx; \ 165 if (addv == ADD_VALUES) *bap += value; \ 166 else *bap = value; \ 167 goto b_noinsert; \ 168 } \ 169 } \ 170 if (b->nonew == 1) goto b_noinsert; \ 171 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 172 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 173 N = nrow++ - 1; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 178 } \ 179 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 180 rp[_i] = bcol; \ 181 ap[bs2*_i + bs*cidx + ridx] = value; \ 182 b_noinsert:; \ 183 bilen[brow] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIBAIJ" 188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 191 MatScalar value; 192 PetscBool roworiented = baij->roworiented; 193 PetscErrorCode ierr; 194 PetscInt i,j,row,col; 195 PetscInt rstart_orig=mat->rmap->rstart; 196 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 197 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 198 199 /* Some Variables required in the macro */ 200 Mat A = baij->A; 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 202 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 203 MatScalar *aa =a->a; 204 205 Mat B = baij->B; 206 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 207 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 208 MatScalar *ba =b->a; 209 210 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 211 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 212 MatScalar *ap,*bap; 213 214 PetscFunctionBegin; 215 if (v) PetscValidScalarPointer(v,6); 216 for (i=0; i<m; i++) { 217 if (im[i] < 0) continue; 218 #if defined(PETSC_USE_DEBUG) 219 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for (j=0; j<n; j++) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig) { 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; 227 else value = v[i+j*m]; 228 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } else if (in[j] < 0) continue; 231 #if defined(PETSC_USE_DEBUG) 232 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined(PETSC_USE_CTABLE) 240 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 241 col = col - 1; 242 #else 243 col = baij->colmap[in[j]/bs] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 246 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 249 B = baij->B; 250 b = (Mat_SeqBAIJ*)(B)->data; 251 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 252 ba =b->a; 253 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 254 else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; 257 else value = v[i+j*m]; 258 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 259 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 260 } 261 } 262 } else { 263 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 264 if (!baij->donotstash) { 265 mat->assembled = PETSC_FALSE; 266 if (roworiented) { 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 268 } else { 269 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 270 } 271 } 272 } 273 } 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 280 { 281 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 282 const PetscScalar *value; 283 MatScalar *barray = baij->barray; 284 PetscBool roworiented = baij->roworiented; 285 PetscErrorCode ierr; 286 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 287 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 288 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 289 290 PetscFunctionBegin; 291 if (!barray) { 292 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 293 baij->barray = barray; 294 } 295 296 if (roworiented) stepval = (n-1)*bs; 297 else stepval = (m-1)*bs; 298 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if ((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 320 barray += bs; 321 } 322 barray -= bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend) { 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined(PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined(PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 355 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 358 } else col = in[j]; 359 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 360 } 361 } 362 } else { 363 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 364 if (!baij->donotstash) { 365 if (roworiented) { 366 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 367 } else { 368 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } 370 } 371 } 372 } 373 PetscFunctionReturn(0); 374 } 375 376 #define HASH_KEY 0.6180339887 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 383 { 384 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 385 PetscBool roworiented = baij->roworiented; 386 PetscErrorCode ierr; 387 PetscInt i,j,row,col; 388 PetscInt rstart_orig=mat->rmap->rstart; 389 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 390 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 391 PetscReal tmp; 392 MatScalar **HD = baij->hd,value; 393 #if defined(PETSC_USE_DEBUG) 394 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 395 #endif 396 397 PetscFunctionBegin; 398 if (v) PetscValidScalarPointer(v,6); 399 for (i=0; i<m; i++) { 400 #if defined(PETSC_USE_DEBUG) 401 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 402 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 403 #endif 404 row = im[i]; 405 if (row >= rstart_orig && row < rend_orig) { 406 for (j=0; j<n; j++) { 407 col = in[j]; 408 if (roworiented) value = v[i*n+j]; 409 else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 423 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 424 } 425 } 426 #else 427 if (HT[idx] != key) { 428 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 429 if (idx == size) { 430 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 431 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else if (!baij->donotstash) { 440 if (roworiented) { 441 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 442 } else { 443 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 444 } 445 } 446 } 447 #if defined(PETSC_USE_DEBUG) 448 baij->ht_total_ct = total_ct; 449 baij->ht_insert_ct = insert_ct; 450 #endif 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 457 { 458 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 459 PetscBool roworiented = baij->roworiented; 460 PetscErrorCode ierr; 461 PetscInt i,j,ii,jj,row,col; 462 PetscInt rstart=baij->rstartbs; 463 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 464 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 465 PetscReal tmp; 466 MatScalar **HD = baij->hd,*baij_a; 467 const PetscScalar *v_t,*value; 468 #if defined(PETSC_USE_DEBUG) 469 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 470 #endif 471 472 PetscFunctionBegin; 473 if (roworiented) stepval = (n-1)*bs; 474 else stepval = (m-1)*bs; 475 476 for (i=0; i<m; i++) { 477 #if defined(PETSC_USE_DEBUG) 478 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 479 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 480 #endif 481 row = im[i]; 482 v_t = v + i*nbs2; 483 if (row >= rstart && row < rend) { 484 for (j=0; j<n; j++) { 485 col = in[j]; 486 487 /* Look up into the Hash Table */ 488 key = row*Nbs+col+1; 489 h1 = HASH(size,key,tmp); 490 491 idx = h1; 492 #if defined(PETSC_USE_DEBUG) 493 total_ct++; 494 insert_ct++; 495 if (HT[idx] != key) { 496 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 497 if (idx == size) { 498 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 499 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 500 } 501 } 502 #else 503 if (HT[idx] != key) { 504 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 505 if (idx == size) { 506 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 507 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 508 } 509 } 510 #endif 511 baij_a = HD[idx]; 512 if (roworiented) { 513 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 514 /* value = v + (i*(stepval+bs)+j)*bs; */ 515 value = v_t; 516 v_t += bs; 517 if (addv == ADD_VALUES) { 518 for (ii=0; ii<bs; ii++,value+=stepval) { 519 for (jj=ii; jj<bs2; jj+=bs) { 520 baij_a[jj] += *value++; 521 } 522 } 523 } else { 524 for (ii=0; ii<bs; ii++,value+=stepval) { 525 for (jj=ii; jj<bs2; jj+=bs) { 526 baij_a[jj] = *value++; 527 } 528 } 529 } 530 } else { 531 value = v + j*(stepval+bs)*bs + i*bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 534 for (jj=0; jj<bs; jj++) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 540 for (jj=0; jj<bs; jj++) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } 546 } 547 } else { 548 if (!baij->donotstash) { 549 if (roworiented) { 550 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 #if defined(PETSC_USE_DEBUG) 558 baij->ht_total_ct = total_ct; 559 baij->ht_insert_ct = insert_ct; 560 #endif 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatGetValues_MPIBAIJ" 566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 567 { 568 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 569 PetscErrorCode ierr; 570 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 571 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 572 573 PetscFunctionBegin; 574 for (i=0; i<m; i++) { 575 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 576 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 577 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 578 row = idxm[i] - bsrstart; 579 for (j=0; j<n; j++) { 580 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 581 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 582 if (idxn[j] >= bscstart && idxn[j] < bscend) { 583 col = idxn[j] - bscstart; 584 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 585 } else { 586 if (!baij->colmap) { 587 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 591 data--; 592 #else 593 data = baij->colmap[idxn[j]/bs]-1; 594 #endif 595 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 596 else { 597 col = data + idxn[j]%bs; 598 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } 600 } 601 } 602 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatNorm_MPIBAIJ" 609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 610 { 611 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 612 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 613 PetscErrorCode ierr; 614 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 615 PetscReal sum = 0.0; 616 MatScalar *v; 617 618 PetscFunctionBegin; 619 if (baij->size == 1) { 620 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 621 } else { 622 if (type == NORM_FROBENIUS) { 623 v = amat->a; 624 nz = amat->nz*bs2; 625 for (i=0; i<nz; i++) { 626 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 627 } 628 v = bmat->a; 629 nz = bmat->nz*bs2; 630 for (i=0; i<nz; i++) { 631 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 632 } 633 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 634 *nrm = PetscSqrtReal(*nrm); 635 } else if (type == NORM_1) { /* max column sum */ 636 PetscReal *tmp,*tmp2; 637 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 638 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 639 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 640 v = amat->a; jj = amat->j; 641 for (i=0; i<amat->nz; i++) { 642 for (j=0; j<bs; j++) { 643 col = bs*(cstart + *jj) + j; /* column index */ 644 for (row=0; row<bs; row++) { 645 tmp[col] += PetscAbsScalar(*v); v++; 646 } 647 } 648 jj++; 649 } 650 v = bmat->a; jj = bmat->j; 651 for (i=0; i<bmat->nz; i++) { 652 for (j=0; j<bs; j++) { 653 col = bs*garray[*jj] + j; 654 for (row=0; row<bs; row++) { 655 tmp[col] += PetscAbsScalar(*v); v++; 656 } 657 } 658 jj++; 659 } 660 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 661 *nrm = 0.0; 662 for (j=0; j<mat->cmap->N; j++) { 663 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 664 } 665 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 666 } else if (type == NORM_INFINITY) { /* max row sum */ 667 PetscReal *sums; 668 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 669 sum = 0.0; 670 for (j=0; j<amat->mbs; j++) { 671 for (row=0; row<bs; row++) sums[row] = 0.0; 672 v = amat->a + bs2*amat->i[j]; 673 nz = amat->i[j+1]-amat->i[j]; 674 for (i=0; i<nz; i++) { 675 for (col=0; col<bs; col++) { 676 for (row=0; row<bs; row++) { 677 sums[row] += PetscAbsScalar(*v); v++; 678 } 679 } 680 } 681 v = bmat->a + bs2*bmat->i[j]; 682 nz = bmat->i[j+1]-bmat->i[j]; 683 for (i=0; i<nz; i++) { 684 for (col=0; col<bs; col++) { 685 for (row=0; row<bs; row++) { 686 sums[row] += PetscAbsScalar(*v); v++; 687 } 688 } 689 } 690 for (row=0; row<bs; row++) { 691 if (sums[row] > sum) sum = sums[row]; 692 } 693 } 694 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 695 ierr = PetscFree(sums);CHKERRQ(ierr); 696 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 /* 702 Creates the hash table, and sets the table 703 This table is created only once. 704 If new entried need to be added to the matrix 705 then the hash table has to be destroyed and 706 recreated. 707 */ 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 711 { 712 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 713 Mat A = baij->A,B=baij->B; 714 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 715 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 716 PetscErrorCode ierr; 717 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 718 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 719 PetscInt *HT,key; 720 MatScalar **HD; 721 PetscReal tmp; 722 #if defined(PETSC_USE_INFO) 723 PetscInt ct=0,max=0; 724 #endif 725 726 PetscFunctionBegin; 727 if (baij->ht) PetscFunctionReturn(0); 728 729 baij->ht_size = (PetscInt)(factor*nz); 730 ht_size = baij->ht_size; 731 732 /* Allocate Memory for Hash Table */ 733 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 734 HD = baij->hd; 735 HT = baij->ht; 736 737 /* Loop Over A */ 738 for (i=0; i<a->mbs; i++) { 739 for (j=ai[i]; j<ai[i+1]; j++) { 740 row = i+rstart; 741 col = aj[j]+cstart; 742 743 key = row*Nbs + col + 1; 744 h1 = HASH(ht_size,key,tmp); 745 for (k=0; k<ht_size; k++) { 746 if (!HT[(h1+k)%ht_size]) { 747 HT[(h1+k)%ht_size] = key; 748 HD[(h1+k)%ht_size] = a->a + j*bs2; 749 break; 750 #if defined(PETSC_USE_INFO) 751 } else { 752 ct++; 753 #endif 754 } 755 } 756 #if defined(PETSC_USE_INFO) 757 if (k> max) max = k; 758 #endif 759 } 760 } 761 /* Loop Over B */ 762 for (i=0; i<b->mbs; i++) { 763 for (j=bi[i]; j<bi[i+1]; j++) { 764 row = i+rstart; 765 col = garray[bj[j]]; 766 key = row*Nbs + col + 1; 767 h1 = HASH(ht_size,key,tmp); 768 for (k=0; k<ht_size; k++) { 769 if (!HT[(h1+k)%ht_size]) { 770 HT[(h1+k)%ht_size] = key; 771 HD[(h1+k)%ht_size] = b->a + j*bs2; 772 break; 773 #if defined(PETSC_USE_INFO) 774 } else { 775 ct++; 776 #endif 777 } 778 } 779 #if defined(PETSC_USE_INFO) 780 if (k> max) max = k; 781 #endif 782 } 783 } 784 785 /* Print Summary */ 786 #if defined(PETSC_USE_INFO) 787 for (i=0,j=0; i<ht_size; i++) { 788 if (HT[i]) j++; 789 } 790 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 791 #endif 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 797 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 InsertMode addv; 803 804 PetscFunctionBegin; 805 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 806 807 /* make sure all processors are either in INSERTMODE or ADDMODE */ 808 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 809 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 810 mat->insertmode = addv; /* in case this processor had no cache */ 811 812 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 813 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 814 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 815 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 816 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 817 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 823 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 824 { 825 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 826 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 827 PetscErrorCode ierr; 828 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 829 PetscInt *row,*col; 830 PetscBool r1,r2,r3,other_disassembled; 831 MatScalar *val; 832 InsertMode addv = mat->insertmode; 833 PetscMPIInt n; 834 835 PetscFunctionBegin; 836 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 837 if (!baij->donotstash && !mat->nooffprocentries) { 838 while (1) { 839 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 840 if (!flg) break; 841 842 for (i=0; i<n;) { 843 /* Now identify the consecutive vals belonging to the same row */ 844 for (j=i,rstart=row[j]; j<n; j++) { 845 if (row[j] != rstart) break; 846 } 847 if (j < n) ncols = j-i; 848 else ncols = n-i; 849 /* Now assemble all these values with a single function call */ 850 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 851 i = j; 852 } 853 } 854 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 855 /* Now process the block-stash. Since the values are stashed column-oriented, 856 set the roworiented flag to column oriented, and after MatSetValues() 857 restore the original flags */ 858 r1 = baij->roworiented; 859 r2 = a->roworiented; 860 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 861 862 baij->roworiented = PETSC_FALSE; 863 a->roworiented = PETSC_FALSE; 864 865 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 866 while (1) { 867 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 868 if (!flg) break; 869 870 for (i=0; i<n;) { 871 /* Now identify the consecutive vals belonging to the same row */ 872 for (j=i,rstart=row[j]; j<n; j++) { 873 if (row[j] != rstart) break; 874 } 875 if (j < n) ncols = j-i; 876 else ncols = n-i; 877 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 878 i = j; 879 } 880 } 881 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 882 883 baij->roworiented = r1; 884 a->roworiented = r2; 885 886 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 887 } 888 889 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 890 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 891 892 /* determine if any processor has disassembled, if so we must 893 also disassemble ourselfs, in order that we may reassemble. */ 894 /* 895 if nonzero structure of submatrix B cannot change then we know that 896 no processor disassembled thus we can skip this stuff 897 */ 898 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 899 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 900 if (mat->was_assembled && !other_disassembled) { 901 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 902 } 903 } 904 905 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 906 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 907 } 908 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 910 911 #if defined(PETSC_USE_INFO) 912 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 913 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 914 915 baij->ht_total_ct = 0; 916 baij->ht_insert_ct = 0; 917 } 918 #endif 919 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 920 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 921 922 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 923 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 924 } 925 926 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 927 928 baij->rowvalues = 0; 929 PetscFunctionReturn(0); 930 } 931 932 #include <petscdraw.h> 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 935 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 936 { 937 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 938 PetscErrorCode ierr; 939 PetscMPIInt size = baij->size,rank = baij->rank; 940 PetscInt bs = mat->rmap->bs; 941 PetscBool iascii,isdraw; 942 PetscViewer sviewer; 943 PetscViewerFormat format; 944 945 PetscFunctionBegin; 946 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 947 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 948 if (iascii) { 949 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 950 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 951 MatInfo info; 952 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 953 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 956 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 957 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 958 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 959 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 961 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 962 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 963 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 964 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } else if (format == PETSC_VIEWER_ASCII_INFO) { 967 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 970 PetscFunctionReturn(0); 971 } 972 } 973 974 if (isdraw) { 975 PetscDraw draw; 976 PetscBool isnull; 977 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 978 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 979 } 980 981 if (size == 1) { 982 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 983 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 984 } else { 985 /* assemble the entire matrix onto first processor. */ 986 Mat A; 987 Mat_SeqBAIJ *Aloc; 988 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 989 MatScalar *a; 990 991 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 992 /* Perhaps this should be the type of mat? */ 993 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 994 if (!rank) { 995 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 996 } else { 997 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 998 } 999 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1000 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1001 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1002 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1003 1004 /* copy over the A part */ 1005 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1006 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1007 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1008 1009 for (i=0; i<mbs; i++) { 1010 rvals[0] = bs*(baij->rstartbs + i); 1011 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1012 for (j=ai[i]; j<ai[i+1]; j++) { 1013 col = (baij->cstartbs+aj[j])*bs; 1014 for (k=0; k<bs; k++) { 1015 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1016 col++; a += bs; 1017 } 1018 } 1019 } 1020 /* copy over the B part */ 1021 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1022 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1023 for (i=0; i<mbs; i++) { 1024 rvals[0] = bs*(baij->rstartbs + i); 1025 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1026 for (j=ai[i]; j<ai[i+1]; j++) { 1027 col = baij->garray[aj[j]]*bs; 1028 for (k=0; k<bs; k++) { 1029 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1030 col++; a += bs; 1031 } 1032 } 1033 } 1034 ierr = PetscFree(rvals);CHKERRQ(ierr); 1035 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1036 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1037 /* 1038 Everyone has to call to draw the matrix since the graphics waits are 1039 synchronized across all processors that share the PetscDraw object 1040 */ 1041 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1042 if (!rank) { 1043 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1044 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1045 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1046 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1047 } 1048 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1049 ierr = MatDestroy(&A);CHKERRQ(ierr); 1050 } 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1056 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1057 { 1058 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1059 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1060 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1061 PetscErrorCode ierr; 1062 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1063 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1064 int fd; 1065 PetscScalar *column_values; 1066 FILE *file; 1067 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1068 PetscInt message_count,flowcontrolcount; 1069 1070 PetscFunctionBegin; 1071 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1072 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1073 nz = bs2*(A->nz + B->nz); 1074 rlen = mat->rmap->n; 1075 if (!rank) { 1076 header[0] = MAT_FILE_CLASSID; 1077 header[1] = mat->rmap->N; 1078 header[2] = mat->cmap->N; 1079 1080 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1081 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1082 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1083 /* get largest number of rows any processor has */ 1084 range = mat->rmap->range; 1085 for (i=1; i<size; i++) { 1086 rlen = PetscMax(rlen,range[i+1] - range[i]); 1087 } 1088 } else { 1089 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1090 } 1091 1092 ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr); 1093 /* compute lengths of each row */ 1094 for (i=0; i<a->mbs; i++) { 1095 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1096 } 1097 /* store the row lengths to the file */ 1098 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1099 if (!rank) { 1100 MPI_Status status; 1101 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1102 rlen = (range[1] - range[0])/bs; 1103 for (i=0; i<rlen; i++) { 1104 for (j=0; j<bs; j++) { 1105 row_lens[i*bs+j] = bs*crow_lens[i]; 1106 } 1107 } 1108 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1109 for (i=1; i<size; i++) { 1110 rlen = (range[i+1] - range[i])/bs; 1111 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1112 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1113 for (k=0; k<rlen; k++) { 1114 for (j=0; j<bs; j++) { 1115 row_lens[k*bs+j] = bs*crow_lens[k]; 1116 } 1117 } 1118 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1119 } 1120 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1121 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1122 } else { 1123 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1124 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1125 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1126 } 1127 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1128 1129 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1130 information needed to make it for each row from a block row. This does require more communication but still not more than 1131 the communication needed for the nonzero values */ 1132 nzmax = nz; /* space a largest processor needs */ 1133 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1134 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1135 cnt = 0; 1136 for (i=0; i<a->mbs; i++) { 1137 pcnt = cnt; 1138 for (j=B->i[i]; j<B->i[i+1]; j++) { 1139 if ((col = garray[B->j[j]]) > cstart) break; 1140 for (l=0; l<bs; l++) { 1141 column_indices[cnt++] = bs*col+l; 1142 } 1143 } 1144 for (k=A->i[i]; k<A->i[i+1]; k++) { 1145 for (l=0; l<bs; l++) { 1146 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1147 } 1148 } 1149 for (; j<B->i[i+1]; j++) { 1150 for (l=0; l<bs; l++) { 1151 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1152 } 1153 } 1154 len = cnt - pcnt; 1155 for (k=1; k<bs; k++) { 1156 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1157 cnt += len; 1158 } 1159 } 1160 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1161 1162 /* store the columns to the file */ 1163 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1164 if (!rank) { 1165 MPI_Status status; 1166 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1167 for (i=1; i<size; i++) { 1168 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1169 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1170 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1171 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1172 } 1173 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1174 } else { 1175 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1176 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1177 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1178 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1179 } 1180 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1181 1182 /* load up the numerical values */ 1183 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1184 cnt = 0; 1185 for (i=0; i<a->mbs; i++) { 1186 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1187 for (j=B->i[i]; j<B->i[i+1]; j++) { 1188 if (garray[B->j[j]] > cstart) break; 1189 for (l=0; l<bs; l++) { 1190 for (ll=0; ll<bs; ll++) { 1191 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1192 } 1193 } 1194 cnt += bs; 1195 } 1196 for (k=A->i[i]; k<A->i[i+1]; k++) { 1197 for (l=0; l<bs; l++) { 1198 for (ll=0; ll<bs; ll++) { 1199 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1200 } 1201 } 1202 cnt += bs; 1203 } 1204 for (; j<B->i[i+1]; j++) { 1205 for (l=0; l<bs; l++) { 1206 for (ll=0; ll<bs; ll++) { 1207 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1208 } 1209 } 1210 cnt += bs; 1211 } 1212 cnt += (bs-1)*rlen; 1213 } 1214 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1215 1216 /* store the column values to the file */ 1217 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1218 if (!rank) { 1219 MPI_Status status; 1220 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1221 for (i=1; i<size; i++) { 1222 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1223 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1224 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1225 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1226 } 1227 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1228 } else { 1229 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1230 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1231 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(column_values);CHKERRQ(ierr); 1235 1236 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1237 if (file) { 1238 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1239 } 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatView_MPIBAIJ" 1245 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1246 { 1247 PetscErrorCode ierr; 1248 PetscBool iascii,isdraw,issocket,isbinary; 1249 1250 PetscFunctionBegin; 1251 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1252 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1254 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1255 if (iascii || isdraw || issocket) { 1256 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1257 } else if (isbinary) { 1258 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1259 } 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1265 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1266 { 1267 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1268 PetscErrorCode ierr; 1269 1270 PetscFunctionBegin; 1271 #if defined(PETSC_USE_LOG) 1272 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1273 #endif 1274 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1275 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1276 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1277 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1278 #if defined(PETSC_USE_CTABLE) 1279 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1280 #else 1281 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1282 #endif 1283 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1284 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1285 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1286 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1287 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1288 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1289 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1290 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1291 1292 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatMult_MPIBAIJ" 1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1308 { 1309 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1310 PetscErrorCode ierr; 1311 PetscInt nt; 1312 1313 PetscFunctionBegin; 1314 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1315 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1316 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1317 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1318 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1319 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1320 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1321 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1328 { 1329 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1335 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1343 { 1344 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1345 PetscErrorCode ierr; 1346 PetscBool merged; 1347 1348 PetscFunctionBegin; 1349 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1350 /* do nondiagonal part */ 1351 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1352 if (!merged) { 1353 /* send it on its way */ 1354 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1355 /* do local part */ 1356 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1357 /* receive remote parts: note this assumes the values are not actually */ 1358 /* inserted in yy until the next line */ 1359 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1360 } else { 1361 /* do local part */ 1362 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1363 /* send it on its way */ 1364 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1365 /* values actually were received in the Begin() but we need to call this nop */ 1366 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 } 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1374 { 1375 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1376 PetscErrorCode ierr; 1377 1378 PetscFunctionBegin; 1379 /* do nondiagonal part */ 1380 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1381 /* send it on its way */ 1382 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 /* do local part */ 1384 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1385 /* receive remote parts: note this assumes the values are not actually */ 1386 /* inserted in yy until the next line, which is true for my implementation*/ 1387 /* but is not perhaps always true. */ 1388 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 /* 1393 This only works correctly for square matrices where the subblock A->A is the 1394 diagonal block 1395 */ 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1399 { 1400 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1405 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatScale_MPIBAIJ" 1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1412 { 1413 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1414 PetscErrorCode ierr; 1415 1416 PetscFunctionBegin; 1417 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1418 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1425 { 1426 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1427 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1428 PetscErrorCode ierr; 1429 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1430 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1431 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1432 1433 PetscFunctionBegin; 1434 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1435 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1436 mat->getrowactive = PETSC_TRUE; 1437 1438 if (!mat->rowvalues && (idx || v)) { 1439 /* 1440 allocate enough space to hold information from the longest row. 1441 */ 1442 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1443 PetscInt max = 1,mbs = mat->mbs,tmp; 1444 for (i=0; i<mbs; i++) { 1445 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1446 if (max < tmp) max = tmp; 1447 } 1448 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1449 } 1450 lrow = row - brstart; 1451 1452 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1453 if (!v) {pvA = 0; pvB = 0;} 1454 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1455 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1456 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1457 nztot = nzA + nzB; 1458 1459 cmap = mat->garray; 1460 if (v || idx) { 1461 if (nztot) { 1462 /* Sort by increasing column numbers, assuming A and B already sorted */ 1463 PetscInt imark = -1; 1464 if (v) { 1465 *v = v_p = mat->rowvalues; 1466 for (i=0; i<nzB; i++) { 1467 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1468 else break; 1469 } 1470 imark = i; 1471 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1472 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1473 } 1474 if (idx) { 1475 *idx = idx_p = mat->rowindices; 1476 if (imark > -1) { 1477 for (i=0; i<imark; i++) { 1478 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1479 } 1480 } else { 1481 for (i=0; i<nzB; i++) { 1482 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1483 else break; 1484 } 1485 imark = i; 1486 } 1487 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1488 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1489 } 1490 } else { 1491 if (idx) *idx = 0; 1492 if (v) *v = 0; 1493 } 1494 } 1495 *nz = nztot; 1496 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1497 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1503 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1504 { 1505 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1506 1507 PetscFunctionBegin; 1508 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1509 baij->getrowactive = PETSC_FALSE; 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1515 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1516 { 1517 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1518 PetscErrorCode ierr; 1519 1520 PetscFunctionBegin; 1521 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1522 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1528 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1529 { 1530 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1531 Mat A = a->A,B = a->B; 1532 PetscErrorCode ierr; 1533 PetscReal isend[5],irecv[5]; 1534 1535 PetscFunctionBegin; 1536 info->block_size = (PetscReal)matin->rmap->bs; 1537 1538 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1539 1540 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1541 isend[3] = info->memory; isend[4] = info->mallocs; 1542 1543 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1544 1545 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1546 isend[3] += info->memory; isend[4] += info->mallocs; 1547 1548 if (flag == MAT_LOCAL) { 1549 info->nz_used = isend[0]; 1550 info->nz_allocated = isend[1]; 1551 info->nz_unneeded = isend[2]; 1552 info->memory = isend[3]; 1553 info->mallocs = isend[4]; 1554 } else if (flag == MAT_GLOBAL_MAX) { 1555 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1556 1557 info->nz_used = irecv[0]; 1558 info->nz_allocated = irecv[1]; 1559 info->nz_unneeded = irecv[2]; 1560 info->memory = irecv[3]; 1561 info->mallocs = irecv[4]; 1562 } else if (flag == MAT_GLOBAL_SUM) { 1563 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1564 1565 info->nz_used = irecv[0]; 1566 info->nz_allocated = irecv[1]; 1567 info->nz_unneeded = irecv[2]; 1568 info->memory = irecv[3]; 1569 info->mallocs = irecv[4]; 1570 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1571 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1572 info->fill_ratio_needed = 0; 1573 info->factor_mallocs = 0; 1574 PetscFunctionReturn(0); 1575 } 1576 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1579 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1580 { 1581 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1582 PetscErrorCode ierr; 1583 1584 PetscFunctionBegin; 1585 switch (op) { 1586 case MAT_NEW_NONZERO_LOCATIONS: 1587 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1588 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1589 case MAT_KEEP_NONZERO_PATTERN: 1590 case MAT_NEW_NONZERO_LOCATION_ERR: 1591 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1592 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1593 break; 1594 case MAT_ROW_ORIENTED: 1595 a->roworiented = flg; 1596 1597 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1598 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1599 break; 1600 case MAT_NEW_DIAGONALS: 1601 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1602 break; 1603 case MAT_IGNORE_OFF_PROC_ENTRIES: 1604 a->donotstash = flg; 1605 break; 1606 case MAT_USE_HASH_TABLE: 1607 a->ht_flag = flg; 1608 break; 1609 case MAT_SYMMETRIC: 1610 case MAT_STRUCTURALLY_SYMMETRIC: 1611 case MAT_HERMITIAN: 1612 case MAT_SYMMETRY_ETERNAL: 1613 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1614 break; 1615 default: 1616 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1617 } 1618 PetscFunctionReturn(0); 1619 } 1620 1621 #undef __FUNCT__ 1622 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1623 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1624 { 1625 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1626 Mat_SeqBAIJ *Aloc; 1627 Mat B; 1628 PetscErrorCode ierr; 1629 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1630 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1631 MatScalar *a; 1632 1633 PetscFunctionBegin; 1634 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1635 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1636 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1637 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1638 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1639 /* Do not know preallocation information, but must set block size */ 1640 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1641 } else { 1642 B = *matout; 1643 } 1644 1645 /* copy over the A part */ 1646 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1647 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1648 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1649 1650 for (i=0; i<mbs; i++) { 1651 rvals[0] = bs*(baij->rstartbs + i); 1652 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1653 for (j=ai[i]; j<ai[i+1]; j++) { 1654 col = (baij->cstartbs+aj[j])*bs; 1655 for (k=0; k<bs; k++) { 1656 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1657 1658 col++; a += bs; 1659 } 1660 } 1661 } 1662 /* copy over the B part */ 1663 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1664 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1665 for (i=0; i<mbs; i++) { 1666 rvals[0] = bs*(baij->rstartbs + i); 1667 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1668 for (j=ai[i]; j<ai[i+1]; j++) { 1669 col = baij->garray[aj[j]]*bs; 1670 for (k=0; k<bs; k++) { 1671 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1672 col++; 1673 a += bs; 1674 } 1675 } 1676 } 1677 ierr = PetscFree(rvals);CHKERRQ(ierr); 1678 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1679 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1680 1681 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1682 else { 1683 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1684 } 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1690 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1691 { 1692 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1693 Mat a = baij->A,b = baij->B; 1694 PetscErrorCode ierr; 1695 PetscInt s1,s2,s3; 1696 1697 PetscFunctionBegin; 1698 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1699 if (rr) { 1700 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1701 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1702 /* Overlap communication with computation. */ 1703 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1704 } 1705 if (ll) { 1706 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1707 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1708 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1709 } 1710 /* scale the diagonal block */ 1711 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1712 1713 if (rr) { 1714 /* Do a scatter end and then right scale the off-diagonal block */ 1715 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1716 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1717 } 1718 PetscFunctionReturn(0); 1719 } 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1723 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1724 { 1725 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1726 PetscInt *owners = A->rmap->range; 1727 PetscInt n = A->rmap->n; 1728 PetscMPIInt size = l->size; 1729 PetscSF sf; 1730 PetscInt *lrows; 1731 PetscSFNode *rrows; 1732 PetscInt lastidx = -1, r, p = 0, len = 0; 1733 PetscErrorCode ierr; 1734 1735 PetscFunctionBegin; 1736 /* Create SF where leaves are input rows and roots are owned rows */ 1737 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1738 for (r = 0; r < n; ++r) lrows[r] = -1; 1739 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1740 for (r = 0; r < N; ++r) { 1741 const PetscInt idx = rows[r]; 1742 PetscBool found = PETSC_FALSE; 1743 /* Trick for efficient searching for sorted rows */ 1744 if (lastidx > idx) p = 0; 1745 lastidx = idx; 1746 for (; p < size; ++p) { 1747 if (idx >= owners[p] && idx < owners[p+1]) { 1748 rrows[r].rank = p; 1749 rrows[r].index = rows[r] - owners[p]; 1750 found = PETSC_TRUE; 1751 break; 1752 } 1753 } 1754 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 1755 } 1756 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1757 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1758 /* Collect flags for rows to be zeroed */ 1759 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1760 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1761 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1762 /* Compress and put in row numbers */ 1763 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1764 /* fix right hand side if needed */ 1765 if (x && b) { 1766 const PetscScalar *xx; 1767 PetscScalar *bb; 1768 1769 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1770 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1771 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1772 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1773 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1774 } 1775 1776 /* actually zap the local rows */ 1777 /* 1778 Zero the required rows. If the "diagonal block" of the matrix 1779 is square and the user wishes to set the diagonal we use separate 1780 code so that MatSetValues() is not called for each diagonal allocating 1781 new memory, thus calling lots of mallocs and slowing things down. 1782 1783 */ 1784 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1785 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr); 1786 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1787 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr); 1788 } else if (diag != 0.0) { 1789 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1790 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1791 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1792 for (r = 0; r < len; ++r) { 1793 const PetscInt row = lrows[r] + A->rmap->rstart; 1794 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1795 } 1796 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1797 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1798 } else { 1799 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1800 } 1801 ierr = PetscFree(lrows);CHKERRQ(ierr); 1802 PetscFunctionReturn(0); 1803 } 1804 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1807 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1808 { 1809 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1810 PetscErrorCode ierr; 1811 1812 PetscFunctionBegin; 1813 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1814 PetscFunctionReturn(0); 1815 } 1816 1817 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1818 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "MatEqual_MPIBAIJ" 1821 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1822 { 1823 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1824 Mat a,b,c,d; 1825 PetscBool flg; 1826 PetscErrorCode ierr; 1827 1828 PetscFunctionBegin; 1829 a = matA->A; b = matA->B; 1830 c = matB->A; d = matB->B; 1831 1832 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1833 if (flg) { 1834 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1835 } 1836 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1837 PetscFunctionReturn(0); 1838 } 1839 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "MatCopy_MPIBAIJ" 1842 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1843 { 1844 PetscErrorCode ierr; 1845 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1846 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1847 1848 PetscFunctionBegin; 1849 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1850 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1851 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1852 } else { 1853 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1854 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1861 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1862 { 1863 PetscErrorCode ierr; 1864 1865 PetscFunctionBegin; 1866 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1867 PetscFunctionReturn(0); 1868 } 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1872 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1873 { 1874 PetscErrorCode ierr; 1875 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1876 PetscBLASInt bnz,one=1; 1877 Mat_SeqBAIJ *x,*y; 1878 1879 PetscFunctionBegin; 1880 if (str == SAME_NONZERO_PATTERN) { 1881 PetscScalar alpha = a; 1882 x = (Mat_SeqBAIJ*)xx->A->data; 1883 y = (Mat_SeqBAIJ*)yy->A->data; 1884 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1885 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1886 x = (Mat_SeqBAIJ*)xx->B->data; 1887 y = (Mat_SeqBAIJ*)yy->B->data; 1888 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1889 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1890 } else { 1891 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1892 } 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1898 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1899 { 1900 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1901 PetscErrorCode ierr; 1902 1903 PetscFunctionBegin; 1904 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1905 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 #undef __FUNCT__ 1910 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1911 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1912 { 1913 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1914 PetscErrorCode ierr; 1915 1916 PetscFunctionBegin; 1917 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1918 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1924 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1925 { 1926 PetscErrorCode ierr; 1927 IS iscol_local; 1928 PetscInt csize; 1929 1930 PetscFunctionBegin; 1931 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1932 if (call == MAT_REUSE_MATRIX) { 1933 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1934 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1935 } else { 1936 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1937 } 1938 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1939 if (call == MAT_INITIAL_MATRIX) { 1940 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1941 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1942 } 1943 PetscFunctionReturn(0); 1944 } 1945 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 1948 /* 1949 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1950 in local and then by concatenating the local matrices the end result. 1951 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1952 */ 1953 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1954 { 1955 PetscErrorCode ierr; 1956 PetscMPIInt rank,size; 1957 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1958 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 1959 Mat M,Mreuse; 1960 MatScalar *vwork,*aa; 1961 MPI_Comm comm; 1962 IS isrow_new, iscol_new; 1963 PetscBool idflag,allrows, allcols; 1964 Mat_SeqBAIJ *aij; 1965 1966 PetscFunctionBegin; 1967 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 1968 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1969 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1970 /* The compression and expansion should be avoided. Doesn't point 1971 out errors, might change the indices, hence buggey */ 1972 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 1973 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 1974 1975 /* Check for special case: each processor gets entire matrix columns */ 1976 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 1977 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 1978 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 1979 else allcols = PETSC_FALSE; 1980 1981 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 1982 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 1983 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 1984 else allrows = PETSC_FALSE; 1985 1986 if (call == MAT_REUSE_MATRIX) { 1987 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 1988 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1989 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 1990 } else { 1991 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 1992 } 1993 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 1994 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 1995 /* 1996 m - number of local rows 1997 n - number of columns (same on all processors) 1998 rstart - first row in new global matrix generated 1999 */ 2000 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2001 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2002 m = m/bs; 2003 n = n/bs; 2004 2005 if (call == MAT_INITIAL_MATRIX) { 2006 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2007 ii = aij->i; 2008 jj = aij->j; 2009 2010 /* 2011 Determine the number of non-zeros in the diagonal and off-diagonal 2012 portions of the matrix in order to do correct preallocation 2013 */ 2014 2015 /* first get start and end of "diagonal" columns */ 2016 if (csize == PETSC_DECIDE) { 2017 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2018 if (mglobal == n*bs) { /* square matrix */ 2019 nlocal = m; 2020 } else { 2021 nlocal = n/size + ((n % size) > rank); 2022 } 2023 } else { 2024 nlocal = csize/bs; 2025 } 2026 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2027 rstart = rend - nlocal; 2028 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2029 2030 /* next, compute all the lengths */ 2031 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2032 for (i=0; i<m; i++) { 2033 jend = ii[i+1] - ii[i]; 2034 olen = 0; 2035 dlen = 0; 2036 for (j=0; j<jend; j++) { 2037 if (*jj < rstart || *jj >= rend) olen++; 2038 else dlen++; 2039 jj++; 2040 } 2041 olens[i] = olen; 2042 dlens[i] = dlen; 2043 } 2044 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2045 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2046 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2047 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2048 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2049 } else { 2050 PetscInt ml,nl; 2051 2052 M = *newmat; 2053 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2054 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2055 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2056 /* 2057 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2058 rather than the slower MatSetValues(). 2059 */ 2060 M->was_assembled = PETSC_TRUE; 2061 M->assembled = PETSC_FALSE; 2062 } 2063 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2064 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2065 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2066 ii = aij->i; 2067 jj = aij->j; 2068 aa = aij->a; 2069 for (i=0; i<m; i++) { 2070 row = rstart/bs + i; 2071 nz = ii[i+1] - ii[i]; 2072 cwork = jj; jj += nz; 2073 vwork = aa; aa += nz*bs*bs; 2074 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2075 } 2076 2077 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 *newmat = M; 2080 2081 /* save submatrix used in processor for next request */ 2082 if (call == MAT_INITIAL_MATRIX) { 2083 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2084 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2085 } 2086 PetscFunctionReturn(0); 2087 } 2088 2089 #undef __FUNCT__ 2090 #define __FUNCT__ "MatPermute_MPIBAIJ" 2091 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2092 { 2093 MPI_Comm comm,pcomm; 2094 PetscInt clocal_size,nrows; 2095 const PetscInt *rows; 2096 PetscMPIInt size; 2097 IS crowp,lcolp; 2098 PetscErrorCode ierr; 2099 2100 PetscFunctionBegin; 2101 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2102 /* make a collective version of 'rowp' */ 2103 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2104 if (pcomm==comm) { 2105 crowp = rowp; 2106 } else { 2107 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2108 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2109 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2110 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2111 } 2112 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2113 /* make a local version of 'colp' */ 2114 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2115 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2116 if (size==1) { 2117 lcolp = colp; 2118 } else { 2119 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2120 } 2121 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2122 /* now we just get the submatrix */ 2123 ierr = MatGetLocalSize(A,PETSC_NULL,&clocal_size);CHKERRQ(ierr); 2124 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2125 /* clean up */ 2126 if (pcomm!=comm) { 2127 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2128 } 2129 if (size>1) { 2130 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2131 } 2132 PetscFunctionReturn(0); 2133 } 2134 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2137 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2138 { 2139 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2140 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2141 2142 PetscFunctionBegin; 2143 if (nghosts) *nghosts = B->nbs; 2144 if (ghosts) *ghosts = baij->garray; 2145 PetscFunctionReturn(0); 2146 } 2147 2148 #undef __FUNCT__ 2149 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2150 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2151 { 2152 Mat B; 2153 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2154 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2155 Mat_SeqAIJ *b; 2156 PetscErrorCode ierr; 2157 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2158 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2159 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2160 2161 PetscFunctionBegin; 2162 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2163 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2164 2165 /* ---------------------------------------------------------------- 2166 Tell every processor the number of nonzeros per row 2167 */ 2168 ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr); 2169 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2170 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2171 } 2172 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2173 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2174 displs = recvcounts + size; 2175 for (i=0; i<size; i++) { 2176 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2177 displs[i] = A->rmap->range[i]/bs; 2178 } 2179 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2180 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2181 #else 2182 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2183 #endif 2184 /* --------------------------------------------------------------- 2185 Create the sequential matrix of the same type as the local block diagonal 2186 */ 2187 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2188 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2189 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2190 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2191 b = (Mat_SeqAIJ*)B->data; 2192 2193 /*-------------------------------------------------------------------- 2194 Copy my part of matrix column indices over 2195 */ 2196 sendcount = ad->nz + bd->nz; 2197 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2198 a_jsendbuf = ad->j; 2199 b_jsendbuf = bd->j; 2200 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2201 cnt = 0; 2202 for (i=0; i<n; i++) { 2203 2204 /* put in lower diagonal portion */ 2205 m = bd->i[i+1] - bd->i[i]; 2206 while (m > 0) { 2207 /* is it above diagonal (in bd (compressed) numbering) */ 2208 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2209 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2210 m--; 2211 } 2212 2213 /* put in diagonal portion */ 2214 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2215 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2216 } 2217 2218 /* put in upper diagonal portion */ 2219 while (m-- > 0) { 2220 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2221 } 2222 } 2223 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2224 2225 /*-------------------------------------------------------------------- 2226 Gather all column indices to all processors 2227 */ 2228 for (i=0; i<size; i++) { 2229 recvcounts[i] = 0; 2230 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2231 recvcounts[i] += lens[j]; 2232 } 2233 } 2234 displs[0] = 0; 2235 for (i=1; i<size; i++) { 2236 displs[i] = displs[i-1] + recvcounts[i-1]; 2237 } 2238 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2239 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2240 #else 2241 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2242 #endif 2243 /*-------------------------------------------------------------------- 2244 Assemble the matrix into useable form (note numerical values not yet set) 2245 */ 2246 /* set the b->ilen (length of each row) values */ 2247 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2248 /* set the b->i indices */ 2249 b->i[0] = 0; 2250 for (i=1; i<=A->rmap->N/bs; i++) { 2251 b->i[i] = b->i[i-1] + lens[i-1]; 2252 } 2253 ierr = PetscFree(lens);CHKERRQ(ierr); 2254 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2255 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2256 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2257 2258 if (A->symmetric) { 2259 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2260 } else if (A->hermitian) { 2261 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2262 } else if (A->structurally_symmetric) { 2263 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2264 } 2265 *newmat = B; 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatSOR_MPIBAIJ" 2271 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2272 { 2273 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2274 PetscErrorCode ierr; 2275 Vec bb1 = 0; 2276 2277 PetscFunctionBegin; 2278 if (flag == SOR_APPLY_UPPER) { 2279 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2280 PetscFunctionReturn(0); 2281 } 2282 2283 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2284 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2285 } 2286 2287 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2288 if (flag & SOR_ZERO_INITIAL_GUESS) { 2289 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2290 its--; 2291 } 2292 2293 while (its--) { 2294 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2295 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2296 2297 /* update rhs: bb1 = bb - B*x */ 2298 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2299 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2300 2301 /* local sweep */ 2302 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2303 } 2304 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2305 if (flag & SOR_ZERO_INITIAL_GUESS) { 2306 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2307 its--; 2308 } 2309 while (its--) { 2310 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2311 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2312 2313 /* update rhs: bb1 = bb - B*x */ 2314 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2315 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2316 2317 /* local sweep */ 2318 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2319 } 2320 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2321 if (flag & SOR_ZERO_INITIAL_GUESS) { 2322 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2323 its--; 2324 } 2325 while (its--) { 2326 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2327 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2328 2329 /* update rhs: bb1 = bb - B*x */ 2330 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2331 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2332 2333 /* local sweep */ 2334 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2335 } 2336 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2337 2338 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2339 PetscFunctionReturn(0); 2340 } 2341 2342 #undef __FUNCT__ 2343 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2344 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2345 { 2346 PetscErrorCode ierr; 2347 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2348 PetscInt N,i,*garray = aij->garray; 2349 PetscInt ib,jb,bs = A->rmap->bs; 2350 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2351 MatScalar *a_val = a_aij->a; 2352 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2353 MatScalar *b_val = b_aij->a; 2354 PetscReal *work; 2355 2356 PetscFunctionBegin; 2357 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2358 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2359 if (type == NORM_2) { 2360 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2361 for (jb=0; jb<bs; jb++) { 2362 for (ib=0; ib<bs; ib++) { 2363 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2364 a_val++; 2365 } 2366 } 2367 } 2368 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2369 for (jb=0; jb<bs; jb++) { 2370 for (ib=0; ib<bs; ib++) { 2371 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2372 b_val++; 2373 } 2374 } 2375 } 2376 } else if (type == NORM_1) { 2377 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2378 for (jb=0; jb<bs; jb++) { 2379 for (ib=0; ib<bs; ib++) { 2380 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2381 a_val++; 2382 } 2383 } 2384 } 2385 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2386 for (jb=0; jb<bs; jb++) { 2387 for (ib=0; ib<bs; ib++) { 2388 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2389 b_val++; 2390 } 2391 } 2392 } 2393 } else if (type == NORM_INFINITY) { 2394 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2395 for (jb=0; jb<bs; jb++) { 2396 for (ib=0; ib<bs; ib++) { 2397 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2398 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2399 a_val++; 2400 } 2401 } 2402 } 2403 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2404 for (jb=0; jb<bs; jb++) { 2405 for (ib=0; ib<bs; ib++) { 2406 int col = garray[b_aij->j[i]] * bs + jb; 2407 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2408 b_val++; 2409 } 2410 } 2411 } 2412 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2413 if (type == NORM_INFINITY) { 2414 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2415 } else { 2416 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2417 } 2418 ierr = PetscFree(work);CHKERRQ(ierr); 2419 if (type == NORM_2) { 2420 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2421 } 2422 PetscFunctionReturn(0); 2423 } 2424 2425 #undef __FUNCT__ 2426 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2427 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2428 { 2429 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2430 PetscErrorCode ierr; 2431 2432 PetscFunctionBegin; 2433 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 2438 /* -------------------------------------------------------------------*/ 2439 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2440 MatGetRow_MPIBAIJ, 2441 MatRestoreRow_MPIBAIJ, 2442 MatMult_MPIBAIJ, 2443 /* 4*/ MatMultAdd_MPIBAIJ, 2444 MatMultTranspose_MPIBAIJ, 2445 MatMultTransposeAdd_MPIBAIJ, 2446 0, 2447 0, 2448 0, 2449 /*10*/ 0, 2450 0, 2451 0, 2452 MatSOR_MPIBAIJ, 2453 MatTranspose_MPIBAIJ, 2454 /*15*/ MatGetInfo_MPIBAIJ, 2455 MatEqual_MPIBAIJ, 2456 MatGetDiagonal_MPIBAIJ, 2457 MatDiagonalScale_MPIBAIJ, 2458 MatNorm_MPIBAIJ, 2459 /*20*/ MatAssemblyBegin_MPIBAIJ, 2460 MatAssemblyEnd_MPIBAIJ, 2461 MatSetOption_MPIBAIJ, 2462 MatZeroEntries_MPIBAIJ, 2463 /*24*/ MatZeroRows_MPIBAIJ, 2464 0, 2465 0, 2466 0, 2467 0, 2468 /*29*/ MatSetUp_MPIBAIJ, 2469 0, 2470 0, 2471 0, 2472 0, 2473 /*34*/ MatDuplicate_MPIBAIJ, 2474 0, 2475 0, 2476 0, 2477 0, 2478 /*39*/ MatAXPY_MPIBAIJ, 2479 MatGetSubMatrices_MPIBAIJ, 2480 MatIncreaseOverlap_MPIBAIJ, 2481 MatGetValues_MPIBAIJ, 2482 MatCopy_MPIBAIJ, 2483 /*44*/ 0, 2484 MatScale_MPIBAIJ, 2485 0, 2486 0, 2487 0, 2488 /*49*/ 0, 2489 0, 2490 0, 2491 0, 2492 0, 2493 /*54*/ MatFDColoringCreate_MPIXAIJ, 2494 0, 2495 MatSetUnfactored_MPIBAIJ, 2496 MatPermute_MPIBAIJ, 2497 MatSetValuesBlocked_MPIBAIJ, 2498 /*59*/ MatGetSubMatrix_MPIBAIJ, 2499 MatDestroy_MPIBAIJ, 2500 MatView_MPIBAIJ, 2501 0, 2502 0, 2503 /*64*/ 0, 2504 0, 2505 0, 2506 0, 2507 0, 2508 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2509 0, 2510 0, 2511 0, 2512 0, 2513 /*74*/ 0, 2514 MatFDColoringApply_BAIJ, 2515 0, 2516 0, 2517 0, 2518 /*79*/ 0, 2519 0, 2520 0, 2521 0, 2522 MatLoad_MPIBAIJ, 2523 /*84*/ 0, 2524 0, 2525 0, 2526 0, 2527 0, 2528 /*89*/ 0, 2529 0, 2530 0, 2531 0, 2532 0, 2533 /*94*/ 0, 2534 0, 2535 0, 2536 0, 2537 0, 2538 /*99*/ 0, 2539 0, 2540 0, 2541 0, 2542 0, 2543 /*104*/0, 2544 MatRealPart_MPIBAIJ, 2545 MatImaginaryPart_MPIBAIJ, 2546 0, 2547 0, 2548 /*109*/0, 2549 0, 2550 0, 2551 0, 2552 0, 2553 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2554 0, 2555 MatGetGhosts_MPIBAIJ, 2556 0, 2557 0, 2558 /*119*/0, 2559 0, 2560 0, 2561 0, 2562 MatGetMultiProcBlock_MPIBAIJ, 2563 /*124*/0, 2564 MatGetColumnNorms_MPIBAIJ, 2565 MatInvertBlockDiagonal_MPIBAIJ, 2566 0, 2567 0, 2568 /*129*/ 0, 2569 0, 2570 0, 2571 0, 2572 0, 2573 /*134*/ 0, 2574 0, 2575 0, 2576 0, 2577 0, 2578 /*139*/ 0, 2579 0, 2580 0, 2581 MatFDColoringSetUp_MPIXAIJ 2582 }; 2583 2584 #undef __FUNCT__ 2585 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2586 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2587 { 2588 PetscFunctionBegin; 2589 *a = ((Mat_MPIBAIJ*)A->data)->A; 2590 PetscFunctionReturn(0); 2591 } 2592 2593 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2594 2595 #undef __FUNCT__ 2596 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2597 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2598 { 2599 PetscInt m,rstart,cstart,cend; 2600 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2601 const PetscInt *JJ =0; 2602 PetscScalar *values=0; 2603 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2604 PetscErrorCode ierr; 2605 2606 PetscFunctionBegin; 2607 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2608 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2609 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2610 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2611 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2612 m = B->rmap->n/bs; 2613 rstart = B->rmap->rstart/bs; 2614 cstart = B->cmap->rstart/bs; 2615 cend = B->cmap->rend/bs; 2616 2617 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2618 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2619 for (i=0; i<m; i++) { 2620 nz = ii[i+1] - ii[i]; 2621 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2622 nz_max = PetscMax(nz_max,nz); 2623 JJ = jj + ii[i]; 2624 for (j=0; j<nz; j++) { 2625 if (*JJ >= cstart) break; 2626 JJ++; 2627 } 2628 d = 0; 2629 for (; j<nz; j++) { 2630 if (*JJ++ >= cend) break; 2631 d++; 2632 } 2633 d_nnz[i] = d; 2634 o_nnz[i] = nz - d; 2635 } 2636 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2637 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2638 2639 values = (PetscScalar*)V; 2640 if (!values) { 2641 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2642 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2643 } 2644 for (i=0; i<m; i++) { 2645 PetscInt row = i + rstart; 2646 PetscInt ncols = ii[i+1] - ii[i]; 2647 const PetscInt *icols = jj + ii[i]; 2648 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2649 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2650 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2651 } else { /* block ordering does not match so we can only insert one block at a time. */ 2652 PetscInt j; 2653 for (j=0; j<ncols; j++) { 2654 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2655 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2656 } 2657 } 2658 } 2659 2660 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2661 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2662 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2663 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2664 PetscFunctionReturn(0); 2665 } 2666 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2669 /*@C 2670 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2671 (the default parallel PETSc format). 2672 2673 Collective on MPI_Comm 2674 2675 Input Parameters: 2676 + A - the matrix 2677 . bs - the block size 2678 . i - the indices into j for the start of each local row (starts with zero) 2679 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2680 - v - optional values in the matrix 2681 2682 Level: developer 2683 2684 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2685 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2686 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2687 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2688 block column and the second index is over columns within a block. 2689 2690 .keywords: matrix, aij, compressed row, sparse, parallel 2691 2692 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2693 @*/ 2694 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2695 { 2696 PetscErrorCode ierr; 2697 2698 PetscFunctionBegin; 2699 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2700 PetscValidType(B,1); 2701 PetscValidLogicalCollectiveInt(B,bs,2); 2702 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2703 PetscFunctionReturn(0); 2704 } 2705 2706 #undef __FUNCT__ 2707 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2708 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2709 { 2710 Mat_MPIBAIJ *b; 2711 PetscErrorCode ierr; 2712 PetscInt i; 2713 2714 PetscFunctionBegin; 2715 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2716 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2717 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2718 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2719 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2720 2721 if (d_nnz) { 2722 for (i=0; i<B->rmap->n/bs; i++) { 2723 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2724 } 2725 } 2726 if (o_nnz) { 2727 for (i=0; i<B->rmap->n/bs; i++) { 2728 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2729 } 2730 } 2731 2732 b = (Mat_MPIBAIJ*)B->data; 2733 b->bs2 = bs*bs; 2734 b->mbs = B->rmap->n/bs; 2735 b->nbs = B->cmap->n/bs; 2736 b->Mbs = B->rmap->N/bs; 2737 b->Nbs = B->cmap->N/bs; 2738 2739 for (i=0; i<=b->size; i++) { 2740 b->rangebs[i] = B->rmap->range[i]/bs; 2741 } 2742 b->rstartbs = B->rmap->rstart/bs; 2743 b->rendbs = B->rmap->rend/bs; 2744 b->cstartbs = B->cmap->rstart/bs; 2745 b->cendbs = B->cmap->rend/bs; 2746 2747 if (!B->preallocated) { 2748 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2749 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2750 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2751 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2752 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2753 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2754 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2755 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2756 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2757 } 2758 2759 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2760 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2761 B->preallocated = PETSC_TRUE; 2762 PetscFunctionReturn(0); 2763 } 2764 2765 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2766 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2767 2768 #undef __FUNCT__ 2769 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2770 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2771 { 2772 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2773 PetscErrorCode ierr; 2774 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2775 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2776 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2777 2778 PetscFunctionBegin; 2779 ierr = PetscMalloc1((M+1),&ii);CHKERRQ(ierr); 2780 ii[0] = 0; 2781 for (i=0; i<M; i++) { 2782 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2783 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2784 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2785 /* remove one from count of matrix has diagonal */ 2786 for (j=id[i]; j<id[i+1]; j++) { 2787 if (jd[j] == i) {ii[i+1]--;break;} 2788 } 2789 } 2790 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2791 cnt = 0; 2792 for (i=0; i<M; i++) { 2793 for (j=io[i]; j<io[i+1]; j++) { 2794 if (garray[jo[j]] > rstart) break; 2795 jj[cnt++] = garray[jo[j]]; 2796 } 2797 for (k=id[i]; k<id[i+1]; k++) { 2798 if (jd[k] != i) { 2799 jj[cnt++] = rstart + jd[k]; 2800 } 2801 } 2802 for (; j<io[i+1]; j++) { 2803 jj[cnt++] = garray[jo[j]]; 2804 } 2805 } 2806 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2807 PetscFunctionReturn(0); 2808 } 2809 2810 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2811 2812 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2813 2814 #undef __FUNCT__ 2815 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 2816 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2817 { 2818 PetscErrorCode ierr; 2819 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2820 Mat B; 2821 Mat_MPIAIJ *b; 2822 2823 PetscFunctionBegin; 2824 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2825 2826 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2827 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2828 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2829 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2830 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2831 b = (Mat_MPIAIJ*) B->data; 2832 2833 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2834 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2835 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2836 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2837 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2838 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2839 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2840 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2841 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2842 if (reuse == MAT_REUSE_MATRIX) { 2843 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2844 } else { 2845 *newmat = B; 2846 } 2847 PetscFunctionReturn(0); 2848 } 2849 2850 #if defined(PETSC_HAVE_MUMPS) 2851 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2852 #endif 2853 2854 /*MC 2855 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2856 2857 Options Database Keys: 2858 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2859 . -mat_block_size <bs> - set the blocksize used to store the matrix 2860 - -mat_use_hash_table <fact> 2861 2862 Level: beginner 2863 2864 .seealso: MatCreateMPIBAIJ 2865 M*/ 2866 2867 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2868 2869 #undef __FUNCT__ 2870 #define __FUNCT__ "MatCreate_MPIBAIJ" 2871 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2872 { 2873 Mat_MPIBAIJ *b; 2874 PetscErrorCode ierr; 2875 PetscBool flg; 2876 2877 PetscFunctionBegin; 2878 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2879 B->data = (void*)b; 2880 2881 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2882 B->assembled = PETSC_FALSE; 2883 2884 B->insertmode = NOT_SET_VALUES; 2885 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2886 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2887 2888 /* build local table of row and column ownerships */ 2889 ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr); 2890 2891 /* build cache for off array entries formed */ 2892 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2893 2894 b->donotstash = PETSC_FALSE; 2895 b->colmap = NULL; 2896 b->garray = NULL; 2897 b->roworiented = PETSC_TRUE; 2898 2899 /* stuff used in block assembly */ 2900 b->barray = 0; 2901 2902 /* stuff used for matrix vector multiply */ 2903 b->lvec = 0; 2904 b->Mvctx = 0; 2905 2906 /* stuff for MatGetRow() */ 2907 b->rowindices = 0; 2908 b->rowvalues = 0; 2909 b->getrowactive = PETSC_FALSE; 2910 2911 /* hash table stuff */ 2912 b->ht = 0; 2913 b->hd = 0; 2914 b->ht_size = 0; 2915 b->ht_flag = PETSC_FALSE; 2916 b->ht_fact = 0; 2917 b->ht_total_ct = 0; 2918 b->ht_insert_ct = 0; 2919 2920 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2921 b->ijonly = PETSC_FALSE; 2922 2923 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2924 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 2925 if (flg) { 2926 PetscReal fact = 1.39; 2927 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2928 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2929 if (fact <= 1.0) fact = 1.39; 2930 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2931 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2932 } 2933 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2934 2935 #if defined(PETSC_HAVE_MUMPS) 2936 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr); 2937 #endif 2938 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 2939 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 2940 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 2941 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2942 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2943 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2944 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2945 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2946 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2947 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2948 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 2949 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2950 PetscFunctionReturn(0); 2951 } 2952 2953 /*MC 2954 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2955 2956 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2957 and MATMPIBAIJ otherwise. 2958 2959 Options Database Keys: 2960 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2961 2962 Level: beginner 2963 2964 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2965 M*/ 2966 2967 #undef __FUNCT__ 2968 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2969 /*@C 2970 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2971 (block compressed row). For good matrix assembly performance 2972 the user should preallocate the matrix storage by setting the parameters 2973 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2974 performance can be increased by more than a factor of 50. 2975 2976 Collective on Mat 2977 2978 Input Parameters: 2979 + A - the matrix 2980 . bs - size of block 2981 . d_nz - number of block nonzeros per block row in diagonal portion of local 2982 submatrix (same for all local rows) 2983 . d_nnz - array containing the number of block nonzeros in the various block rows 2984 of the in diagonal portion of the local (possibly different for each block 2985 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2986 set it even if it is zero. 2987 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2988 submatrix (same for all local rows). 2989 - o_nnz - array containing the number of nonzeros in the various block rows of the 2990 off-diagonal portion of the local submatrix (possibly different for 2991 each block row) or NULL. 2992 2993 If the *_nnz parameter is given then the *_nz parameter is ignored 2994 2995 Options Database Keys: 2996 + -mat_block_size - size of the blocks to use 2997 - -mat_use_hash_table <fact> 2998 2999 Notes: 3000 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3001 than it must be used on all processors that share the object for that argument. 3002 3003 Storage Information: 3004 For a square global matrix we define each processor's diagonal portion 3005 to be its local rows and the corresponding columns (a square submatrix); 3006 each processor's off-diagonal portion encompasses the remainder of the 3007 local matrix (a rectangular submatrix). 3008 3009 The user can specify preallocated storage for the diagonal part of 3010 the local submatrix with either d_nz or d_nnz (not both). Set 3011 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3012 memory allocation. Likewise, specify preallocated storage for the 3013 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3014 3015 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3016 the figure below we depict these three local rows and all columns (0-11). 3017 3018 .vb 3019 0 1 2 3 4 5 6 7 8 9 10 11 3020 -------------------------- 3021 row 3 |o o o d d d o o o o o o 3022 row 4 |o o o d d d o o o o o o 3023 row 5 |o o o d d d o o o o o o 3024 -------------------------- 3025 .ve 3026 3027 Thus, any entries in the d locations are stored in the d (diagonal) 3028 submatrix, and any entries in the o locations are stored in the 3029 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3030 stored simply in the MATSEQBAIJ format for compressed row storage. 3031 3032 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3033 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3034 In general, for PDE problems in which most nonzeros are near the diagonal, 3035 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3036 or you will get TERRIBLE performance; see the users' manual chapter on 3037 matrices. 3038 3039 You can call MatGetInfo() to get information on how effective the preallocation was; 3040 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3041 You can also run with the option -info and look for messages with the string 3042 malloc in them to see if additional memory allocation was needed. 3043 3044 Level: intermediate 3045 3046 .keywords: matrix, block, aij, compressed row, sparse, parallel 3047 3048 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3049 @*/ 3050 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3051 { 3052 PetscErrorCode ierr; 3053 3054 PetscFunctionBegin; 3055 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3056 PetscValidType(B,1); 3057 PetscValidLogicalCollectiveInt(B,bs,2); 3058 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3059 PetscFunctionReturn(0); 3060 } 3061 3062 #undef __FUNCT__ 3063 #define __FUNCT__ "MatCreateBAIJ" 3064 /*@C 3065 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3066 (block compressed row). For good matrix assembly performance 3067 the user should preallocate the matrix storage by setting the parameters 3068 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3069 performance can be increased by more than a factor of 50. 3070 3071 Collective on MPI_Comm 3072 3073 Input Parameters: 3074 + comm - MPI communicator 3075 . bs - size of blockk 3076 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3077 This value should be the same as the local size used in creating the 3078 y vector for the matrix-vector product y = Ax. 3079 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3080 This value should be the same as the local size used in creating the 3081 x vector for the matrix-vector product y = Ax. 3082 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3083 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3084 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3085 submatrix (same for all local rows) 3086 . d_nnz - array containing the number of nonzero blocks in the various block rows 3087 of the in diagonal portion of the local (possibly different for each block 3088 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3089 and set it even if it is zero. 3090 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3091 submatrix (same for all local rows). 3092 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3093 off-diagonal portion of the local submatrix (possibly different for 3094 each block row) or NULL. 3095 3096 Output Parameter: 3097 . A - the matrix 3098 3099 Options Database Keys: 3100 + -mat_block_size - size of the blocks to use 3101 - -mat_use_hash_table <fact> 3102 3103 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3104 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3105 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3106 3107 Notes: 3108 If the *_nnz parameter is given then the *_nz parameter is ignored 3109 3110 A nonzero block is any block that as 1 or more nonzeros in it 3111 3112 The user MUST specify either the local or global matrix dimensions 3113 (possibly both). 3114 3115 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3116 than it must be used on all processors that share the object for that argument. 3117 3118 Storage Information: 3119 For a square global matrix we define each processor's diagonal portion 3120 to be its local rows and the corresponding columns (a square submatrix); 3121 each processor's off-diagonal portion encompasses the remainder of the 3122 local matrix (a rectangular submatrix). 3123 3124 The user can specify preallocated storage for the diagonal part of 3125 the local submatrix with either d_nz or d_nnz (not both). Set 3126 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3127 memory allocation. Likewise, specify preallocated storage for the 3128 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3129 3130 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3131 the figure below we depict these three local rows and all columns (0-11). 3132 3133 .vb 3134 0 1 2 3 4 5 6 7 8 9 10 11 3135 -------------------------- 3136 row 3 |o o o d d d o o o o o o 3137 row 4 |o o o d d d o o o o o o 3138 row 5 |o o o d d d o o o o o o 3139 -------------------------- 3140 .ve 3141 3142 Thus, any entries in the d locations are stored in the d (diagonal) 3143 submatrix, and any entries in the o locations are stored in the 3144 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3145 stored simply in the MATSEQBAIJ format for compressed row storage. 3146 3147 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3148 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3149 In general, for PDE problems in which most nonzeros are near the diagonal, 3150 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3151 or you will get TERRIBLE performance; see the users' manual chapter on 3152 matrices. 3153 3154 Level: intermediate 3155 3156 .keywords: matrix, block, aij, compressed row, sparse, parallel 3157 3158 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3159 @*/ 3160 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3161 { 3162 PetscErrorCode ierr; 3163 PetscMPIInt size; 3164 3165 PetscFunctionBegin; 3166 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3167 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3168 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3169 if (size > 1) { 3170 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3171 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3172 } else { 3173 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3174 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3175 } 3176 PetscFunctionReturn(0); 3177 } 3178 3179 #undef __FUNCT__ 3180 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3181 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3182 { 3183 Mat mat; 3184 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3185 PetscErrorCode ierr; 3186 PetscInt len=0; 3187 3188 PetscFunctionBegin; 3189 *newmat = 0; 3190 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3191 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3192 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3193 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3194 3195 mat->factortype = matin->factortype; 3196 mat->preallocated = PETSC_TRUE; 3197 mat->assembled = PETSC_TRUE; 3198 mat->insertmode = NOT_SET_VALUES; 3199 3200 a = (Mat_MPIBAIJ*)mat->data; 3201 mat->rmap->bs = matin->rmap->bs; 3202 a->bs2 = oldmat->bs2; 3203 a->mbs = oldmat->mbs; 3204 a->nbs = oldmat->nbs; 3205 a->Mbs = oldmat->Mbs; 3206 a->Nbs = oldmat->Nbs; 3207 3208 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3209 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3210 3211 a->size = oldmat->size; 3212 a->rank = oldmat->rank; 3213 a->donotstash = oldmat->donotstash; 3214 a->roworiented = oldmat->roworiented; 3215 a->rowindices = 0; 3216 a->rowvalues = 0; 3217 a->getrowactive = PETSC_FALSE; 3218 a->barray = 0; 3219 a->rstartbs = oldmat->rstartbs; 3220 a->rendbs = oldmat->rendbs; 3221 a->cstartbs = oldmat->cstartbs; 3222 a->cendbs = oldmat->cendbs; 3223 3224 /* hash table stuff */ 3225 a->ht = 0; 3226 a->hd = 0; 3227 a->ht_size = 0; 3228 a->ht_flag = oldmat->ht_flag; 3229 a->ht_fact = oldmat->ht_fact; 3230 a->ht_total_ct = 0; 3231 a->ht_insert_ct = 0; 3232 3233 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3234 if (oldmat->colmap) { 3235 #if defined(PETSC_USE_CTABLE) 3236 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3237 #else 3238 ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr); 3239 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3240 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3241 #endif 3242 } else a->colmap = 0; 3243 3244 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3245 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3246 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3247 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3248 } else a->garray = 0; 3249 3250 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3251 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3252 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3253 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3254 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3255 3256 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3257 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3258 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3259 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3260 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3261 *newmat = mat; 3262 PetscFunctionReturn(0); 3263 } 3264 3265 #undef __FUNCT__ 3266 #define __FUNCT__ "MatLoad_MPIBAIJ" 3267 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3268 { 3269 PetscErrorCode ierr; 3270 int fd; 3271 PetscInt i,nz,j,rstart,rend; 3272 PetscScalar *vals,*buf; 3273 MPI_Comm comm; 3274 MPI_Status status; 3275 PetscMPIInt rank,size,maxnz; 3276 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3277 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3278 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3279 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3280 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3281 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3282 3283 PetscFunctionBegin; 3284 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3285 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3286 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3287 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3288 3289 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3290 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3291 if (!rank) { 3292 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3293 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3294 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3295 } 3296 3297 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3298 3299 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3300 M = header[1]; N = header[2]; 3301 3302 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3303 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3304 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3305 3306 /* If global sizes are set, check if they are consistent with that given in the file */ 3307 if (sizesset) { 3308 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3309 } 3310 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3311 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3312 3313 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3314 3315 /* 3316 This code adds extra rows to make sure the number of rows is 3317 divisible by the blocksize 3318 */ 3319 Mbs = M/bs; 3320 extra_rows = bs - M + bs*Mbs; 3321 if (extra_rows == bs) extra_rows = 0; 3322 else Mbs++; 3323 if (extra_rows && !rank) { 3324 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3325 } 3326 3327 /* determine ownership of all rows */ 3328 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3329 mbs = Mbs/size + ((Mbs % size) > rank); 3330 m = mbs*bs; 3331 } else { /* User set */ 3332 m = newmat->rmap->n; 3333 mbs = m/bs; 3334 } 3335 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3336 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3337 3338 /* process 0 needs enough room for process with most rows */ 3339 if (!rank) { 3340 mmax = rowners[1]; 3341 for (i=2; i<=size; i++) { 3342 mmax = PetscMax(mmax,rowners[i]); 3343 } 3344 mmax*=bs; 3345 } else mmax = -1; /* unused, but compiler warns anyway */ 3346 3347 rowners[0] = 0; 3348 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3349 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3350 rstart = rowners[rank]; 3351 rend = rowners[rank+1]; 3352 3353 /* distribute row lengths to all processors */ 3354 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3355 if (!rank) { 3356 mend = m; 3357 if (size == 1) mend = mend - extra_rows; 3358 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3359 for (j=mend; j<m; j++) locrowlens[j] = 1; 3360 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3361 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3362 for (j=0; j<m; j++) { 3363 procsnz[0] += locrowlens[j]; 3364 } 3365 for (i=1; i<size; i++) { 3366 mend = browners[i+1] - browners[i]; 3367 if (i == size-1) mend = mend - extra_rows; 3368 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3369 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3370 /* calculate the number of nonzeros on each processor */ 3371 for (j=0; j<browners[i+1]-browners[i]; j++) { 3372 procsnz[i] += rowlengths[j]; 3373 } 3374 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3375 } 3376 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3377 } else { 3378 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3379 } 3380 3381 if (!rank) { 3382 /* determine max buffer needed and allocate it */ 3383 maxnz = procsnz[0]; 3384 for (i=1; i<size; i++) { 3385 maxnz = PetscMax(maxnz,procsnz[i]); 3386 } 3387 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3388 3389 /* read in my part of the matrix column indices */ 3390 nz = procsnz[0]; 3391 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3392 mycols = ibuf; 3393 if (size == 1) nz -= extra_rows; 3394 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3395 if (size == 1) { 3396 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3397 } 3398 3399 /* read in every ones (except the last) and ship off */ 3400 for (i=1; i<size-1; i++) { 3401 nz = procsnz[i]; 3402 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3403 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3404 } 3405 /* read in the stuff for the last proc */ 3406 if (size != 1) { 3407 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3408 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3409 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3410 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3411 } 3412 ierr = PetscFree(cols);CHKERRQ(ierr); 3413 } else { 3414 /* determine buffer space needed for message */ 3415 nz = 0; 3416 for (i=0; i<m; i++) { 3417 nz += locrowlens[i]; 3418 } 3419 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3420 mycols = ibuf; 3421 /* receive message of column indices*/ 3422 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3423 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3424 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3425 } 3426 3427 /* loop over local rows, determining number of off diagonal entries */ 3428 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3429 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3430 rowcount = 0; nzcount = 0; 3431 for (i=0; i<mbs; i++) { 3432 dcount = 0; 3433 odcount = 0; 3434 for (j=0; j<bs; j++) { 3435 kmax = locrowlens[rowcount]; 3436 for (k=0; k<kmax; k++) { 3437 tmp = mycols[nzcount++]/bs; 3438 if (!mask[tmp]) { 3439 mask[tmp] = 1; 3440 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3441 else masked1[dcount++] = tmp; 3442 } 3443 } 3444 rowcount++; 3445 } 3446 3447 dlens[i] = dcount; 3448 odlens[i] = odcount; 3449 3450 /* zero out the mask elements we set */ 3451 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3452 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3453 } 3454 3455 3456 if (!sizesset) { 3457 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3458 } 3459 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3460 3461 if (!rank) { 3462 ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr); 3463 /* read in my part of the matrix numerical values */ 3464 nz = procsnz[0]; 3465 vals = buf; 3466 mycols = ibuf; 3467 if (size == 1) nz -= extra_rows; 3468 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3469 if (size == 1) { 3470 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3471 } 3472 3473 /* insert into matrix */ 3474 jj = rstart*bs; 3475 for (i=0; i<m; i++) { 3476 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3477 mycols += locrowlens[i]; 3478 vals += locrowlens[i]; 3479 jj++; 3480 } 3481 /* read in other processors (except the last one) and ship out */ 3482 for (i=1; i<size-1; i++) { 3483 nz = procsnz[i]; 3484 vals = buf; 3485 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3486 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3487 } 3488 /* the last proc */ 3489 if (size != 1) { 3490 nz = procsnz[i] - extra_rows; 3491 vals = buf; 3492 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3493 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3494 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3495 } 3496 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3497 } else { 3498 /* receive numeric values */ 3499 ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr); 3500 3501 /* receive message of values*/ 3502 vals = buf; 3503 mycols = ibuf; 3504 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3505 3506 /* insert into matrix */ 3507 jj = rstart*bs; 3508 for (i=0; i<m; i++) { 3509 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3510 mycols += locrowlens[i]; 3511 vals += locrowlens[i]; 3512 jj++; 3513 } 3514 } 3515 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3516 ierr = PetscFree(buf);CHKERRQ(ierr); 3517 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3518 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3519 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3520 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3521 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3522 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3523 PetscFunctionReturn(0); 3524 } 3525 3526 #undef __FUNCT__ 3527 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3528 /*@ 3529 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3530 3531 Input Parameters: 3532 . mat - the matrix 3533 . fact - factor 3534 3535 Not Collective, each process can use a different factor 3536 3537 Level: advanced 3538 3539 Notes: 3540 This can also be set by the command line option: -mat_use_hash_table <fact> 3541 3542 .keywords: matrix, hashtable, factor, HT 3543 3544 .seealso: MatSetOption() 3545 @*/ 3546 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3547 { 3548 PetscErrorCode ierr; 3549 3550 PetscFunctionBegin; 3551 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3552 PetscFunctionReturn(0); 3553 } 3554 3555 #undef __FUNCT__ 3556 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3557 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3558 { 3559 Mat_MPIBAIJ *baij; 3560 3561 PetscFunctionBegin; 3562 baij = (Mat_MPIBAIJ*)mat->data; 3563 baij->ht_fact = fact; 3564 PetscFunctionReturn(0); 3565 } 3566 3567 #undef __FUNCT__ 3568 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3569 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3570 { 3571 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3572 3573 PetscFunctionBegin; 3574 *Ad = a->A; 3575 *Ao = a->B; 3576 *colmap = a->garray; 3577 PetscFunctionReturn(0); 3578 } 3579 3580 /* 3581 Special version for direct calls from Fortran (to eliminate two function call overheads 3582 */ 3583 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3584 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3585 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3586 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3587 #endif 3588 3589 #undef __FUNCT__ 3590 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3591 /*@C 3592 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3593 3594 Collective on Mat 3595 3596 Input Parameters: 3597 + mat - the matrix 3598 . min - number of input rows 3599 . im - input rows 3600 . nin - number of input columns 3601 . in - input columns 3602 . v - numerical values input 3603 - addvin - INSERT_VALUES or ADD_VALUES 3604 3605 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3606 3607 Level: advanced 3608 3609 .seealso: MatSetValuesBlocked() 3610 @*/ 3611 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3612 { 3613 /* convert input arguments to C version */ 3614 Mat mat = *matin; 3615 PetscInt m = *min, n = *nin; 3616 InsertMode addv = *addvin; 3617 3618 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3619 const MatScalar *value; 3620 MatScalar *barray = baij->barray; 3621 PetscBool roworiented = baij->roworiented; 3622 PetscErrorCode ierr; 3623 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3624 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3625 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3626 3627 PetscFunctionBegin; 3628 /* tasks normally handled by MatSetValuesBlocked() */ 3629 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3630 #if defined(PETSC_USE_DEBUG) 3631 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3632 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3633 #endif 3634 if (mat->assembled) { 3635 mat->was_assembled = PETSC_TRUE; 3636 mat->assembled = PETSC_FALSE; 3637 } 3638 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3639 3640 3641 if (!barray) { 3642 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3643 baij->barray = barray; 3644 } 3645 3646 if (roworiented) stepval = (n-1)*bs; 3647 else stepval = (m-1)*bs; 3648 3649 for (i=0; i<m; i++) { 3650 if (im[i] < 0) continue; 3651 #if defined(PETSC_USE_DEBUG) 3652 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3653 #endif 3654 if (im[i] >= rstart && im[i] < rend) { 3655 row = im[i] - rstart; 3656 for (j=0; j<n; j++) { 3657 /* If NumCol = 1 then a copy is not required */ 3658 if ((roworiented) && (n == 1)) { 3659 barray = (MatScalar*)v + i*bs2; 3660 } else if ((!roworiented) && (m == 1)) { 3661 barray = (MatScalar*)v + j*bs2; 3662 } else { /* Here a copy is required */ 3663 if (roworiented) { 3664 value = v + i*(stepval+bs)*bs + j*bs; 3665 } else { 3666 value = v + j*(stepval+bs)*bs + i*bs; 3667 } 3668 for (ii=0; ii<bs; ii++,value+=stepval) { 3669 for (jj=0; jj<bs; jj++) { 3670 *barray++ = *value++; 3671 } 3672 } 3673 barray -=bs2; 3674 } 3675 3676 if (in[j] >= cstart && in[j] < cend) { 3677 col = in[j] - cstart; 3678 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3679 } else if (in[j] < 0) continue; 3680 #if defined(PETSC_USE_DEBUG) 3681 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3682 #endif 3683 else { 3684 if (mat->was_assembled) { 3685 if (!baij->colmap) { 3686 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3687 } 3688 3689 #if defined(PETSC_USE_DEBUG) 3690 #if defined(PETSC_USE_CTABLE) 3691 { PetscInt data; 3692 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3693 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3694 } 3695 #else 3696 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3697 #endif 3698 #endif 3699 #if defined(PETSC_USE_CTABLE) 3700 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3701 col = (col - 1)/bs; 3702 #else 3703 col = (baij->colmap[in[j]] - 1)/bs; 3704 #endif 3705 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3706 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3707 col = in[j]; 3708 } 3709 } else col = in[j]; 3710 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3711 } 3712 } 3713 } else { 3714 if (!baij->donotstash) { 3715 if (roworiented) { 3716 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3717 } else { 3718 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3719 } 3720 } 3721 } 3722 } 3723 3724 /* task normally handled by MatSetValuesBlocked() */ 3725 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3726 PetscFunctionReturn(0); 3727 } 3728 3729 #undef __FUNCT__ 3730 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3731 /*@ 3732 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3733 CSR format the local rows. 3734 3735 Collective on MPI_Comm 3736 3737 Input Parameters: 3738 + comm - MPI communicator 3739 . bs - the block size, only a block size of 1 is supported 3740 . m - number of local rows (Cannot be PETSC_DECIDE) 3741 . n - This value should be the same as the local size used in creating the 3742 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3743 calculated if N is given) For square matrices n is almost always m. 3744 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3745 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3746 . i - row indices 3747 . j - column indices 3748 - a - matrix values 3749 3750 Output Parameter: 3751 . mat - the matrix 3752 3753 Level: intermediate 3754 3755 Notes: 3756 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3757 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3758 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3759 3760 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3761 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3762 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3763 with column-major ordering within blocks. 3764 3765 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3766 3767 .keywords: matrix, aij, compressed row, sparse, parallel 3768 3769 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3770 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3771 @*/ 3772 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3773 { 3774 PetscErrorCode ierr; 3775 3776 PetscFunctionBegin; 3777 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3778 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3779 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3780 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3781 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3782 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3783 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3784 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3785 PetscFunctionReturn(0); 3786 } 3787