1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 #include <petscsf.h> 5 6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 18 { 19 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt i,*idxb = 0; 22 PetscScalar *va,*vb; 23 Vec vtmp; 24 25 PetscFunctionBegin; 26 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 27 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 28 if (idx) { 29 for (i=0; i<A->rmap->n; i++) { 30 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 31 } 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++) { 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 41 va[i] = vb[i]; 42 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 43 } 44 } 45 46 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 47 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 48 ierr = PetscFree(idxb);CHKERRQ(ierr); 49 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 62 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scalable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined(PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++) { 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 110 { \ 111 \ 112 brow = row/bs; \ 113 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 114 rmax = aimax[brow]; nrow = ailen[brow]; \ 115 bcol = col/bs; \ 116 ridx = row % bs; cidx = col % bs; \ 117 low = 0; high = nrow; \ 118 while (high-low > 3) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > bcol) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > bcol) break; \ 125 if (rp[_i] == bcol) { \ 126 bap = ap + bs2*_i + bs*cidx + ridx; \ 127 if (addv == ADD_VALUES) *bap += value; \ 128 else *bap = value; \ 129 goto a_noinsert; \ 130 } \ 131 } \ 132 if (a->nonew == 1) goto a_noinsert; \ 133 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 134 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 135 N = nrow++ - 1; \ 136 /* shift up all the later entries in this row */ \ 137 for (ii=N; ii>=_i; ii--) { \ 138 rp[ii+1] = rp[ii]; \ 139 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 140 } \ 141 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 142 rp[_i] = bcol; \ 143 ap[bs2*_i + bs*cidx + ridx] = value; \ 144 a_noinsert:; \ 145 ailen[brow] = nrow; \ 146 } 147 148 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 149 { \ 150 brow = row/bs; \ 151 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152 rmax = bimax[brow]; nrow = bilen[brow]; \ 153 bcol = col/bs; \ 154 ridx = row % bs; cidx = col % bs; \ 155 low = 0; high = nrow; \ 156 while (high-low > 3) { \ 157 t = (low+high)/2; \ 158 if (rp[t] > bcol) high = t; \ 159 else low = t; \ 160 } \ 161 for (_i=low; _i<high; _i++) { \ 162 if (rp[_i] > bcol) break; \ 163 if (rp[_i] == bcol) { \ 164 bap = ap + bs2*_i + bs*cidx + ridx; \ 165 if (addv == ADD_VALUES) *bap += value; \ 166 else *bap = value; \ 167 goto b_noinsert; \ 168 } \ 169 } \ 170 if (b->nonew == 1) goto b_noinsert; \ 171 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 172 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 173 N = nrow++ - 1; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 178 } \ 179 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 180 rp[_i] = bcol; \ 181 ap[bs2*_i + bs*cidx + ridx] = value; \ 182 b_noinsert:; \ 183 bilen[brow] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIBAIJ" 188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 191 MatScalar value; 192 PetscBool roworiented = baij->roworiented; 193 PetscErrorCode ierr; 194 PetscInt i,j,row,col; 195 PetscInt rstart_orig=mat->rmap->rstart; 196 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 197 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 198 199 /* Some Variables required in the macro */ 200 Mat A = baij->A; 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 202 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 203 MatScalar *aa =a->a; 204 205 Mat B = baij->B; 206 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 207 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 208 MatScalar *ba =b->a; 209 210 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 211 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 212 MatScalar *ap,*bap; 213 214 PetscFunctionBegin; 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig) { 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; 226 else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } else if (in[j] < 0) continue; 230 #if defined(PETSC_USE_DEBUG) 231 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 232 #endif 233 else { 234 if (mat->was_assembled) { 235 if (!baij->colmap) { 236 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined(PETSC_USE_CTABLE) 239 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 240 col = col - 1; 241 #else 242 col = baij->colmap[in[j]/bs] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 245 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 248 B = baij->B; 249 b = (Mat_SeqBAIJ*)(B)->data; 250 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 251 ba =b->a; 252 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 253 else col += in[j]%bs; 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; 256 else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 mat->assembled = PETSC_FALSE; 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray = baij->barray; 283 PetscBool roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if (!barray) { 291 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) stepval = (n-1)*bs; 296 else stepval = (m-1)*bs; 297 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if ((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + (i*(stepval+bs) + j)*bs; 314 } else { 315 value = v + (j*(stepval+bs) + i)*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 318 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend) { 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } else if (in[j] < 0) continue; 328 #if defined(PETSC_USE_DEBUG) 329 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 330 #endif 331 else { 332 if (mat->was_assembled) { 333 if (!baij->colmap) { 334 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 335 } 336 337 #if defined(PETSC_USE_DEBUG) 338 #if defined(PETSC_USE_CTABLE) 339 { PetscInt data; 340 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 341 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 342 } 343 #else 344 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 345 #endif 346 #endif 347 #if defined(PETSC_USE_CTABLE) 348 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 349 col = (col - 1)/bs; 350 #else 351 col = (baij->colmap[in[j]] - 1)/bs; 352 #endif 353 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 354 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 355 col = in[j]; 356 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 357 } else col = in[j]; 358 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 359 } 360 } 361 } else { 362 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 363 if (!baij->donotstash) { 364 if (roworiented) { 365 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 366 } else { 367 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 375 #define HASH_KEY 0.6180339887 376 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 377 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 378 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 379 #undef __FUNCT__ 380 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 381 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 382 { 383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 384 PetscBool roworiented = baij->roworiented; 385 PetscErrorCode ierr; 386 PetscInt i,j,row,col; 387 PetscInt rstart_orig=mat->rmap->rstart; 388 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 389 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 390 PetscReal tmp; 391 MatScalar **HD = baij->hd,value; 392 #if defined(PETSC_USE_DEBUG) 393 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 394 #endif 395 396 PetscFunctionBegin; 397 for (i=0; i<m; i++) { 398 #if defined(PETSC_USE_DEBUG) 399 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 400 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 401 #endif 402 row = im[i]; 403 if (row >= rstart_orig && row < rend_orig) { 404 for (j=0; j<n; j++) { 405 col = in[j]; 406 if (roworiented) value = v[i*n+j]; 407 else value = v[i+j*m]; 408 /* Look up PetscInto the Hash Table */ 409 key = (row/bs)*Nbs+(col/bs)+1; 410 h1 = HASH(size,key,tmp); 411 412 413 idx = h1; 414 #if defined(PETSC_USE_DEBUG) 415 insert_ct++; 416 total_ct++; 417 if (HT[idx] != key) { 418 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 419 if (idx == size) { 420 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 421 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 422 } 423 } 424 #else 425 if (HT[idx] != key) { 426 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 427 if (idx == size) { 428 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 429 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 430 } 431 } 432 #endif 433 /* A HASH table entry is found, so insert the values at the correct address */ 434 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 435 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 436 } 437 } else if (!baij->donotstash) { 438 if (roworiented) { 439 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 440 } else { 441 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 442 } 443 } 444 } 445 #if defined(PETSC_USE_DEBUG) 446 baij->ht_total_ct = total_ct; 447 baij->ht_insert_ct = insert_ct; 448 #endif 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 454 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 457 PetscBool roworiented = baij->roworiented; 458 PetscErrorCode ierr; 459 PetscInt i,j,ii,jj,row,col; 460 PetscInt rstart=baij->rstartbs; 461 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 462 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 463 PetscReal tmp; 464 MatScalar **HD = baij->hd,*baij_a; 465 const PetscScalar *v_t,*value; 466 #if defined(PETSC_USE_DEBUG) 467 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 468 #endif 469 470 PetscFunctionBegin; 471 if (roworiented) stepval = (n-1)*bs; 472 else stepval = (m-1)*bs; 473 474 for (i=0; i<m; i++) { 475 #if defined(PETSC_USE_DEBUG) 476 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 477 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 478 #endif 479 row = im[i]; 480 v_t = v + i*nbs2; 481 if (row >= rstart && row < rend) { 482 for (j=0; j<n; j++) { 483 col = in[j]; 484 485 /* Look up into the Hash Table */ 486 key = row*Nbs+col+1; 487 h1 = HASH(size,key,tmp); 488 489 idx = h1; 490 #if defined(PETSC_USE_DEBUG) 491 total_ct++; 492 insert_ct++; 493 if (HT[idx] != key) { 494 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 495 if (idx == size) { 496 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 497 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 498 } 499 } 500 #else 501 if (HT[idx] != key) { 502 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 503 if (idx == size) { 504 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 505 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 506 } 507 } 508 #endif 509 baij_a = HD[idx]; 510 if (roworiented) { 511 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 512 /* value = v + (i*(stepval+bs)+j)*bs; */ 513 value = v_t; 514 v_t += bs; 515 if (addv == ADD_VALUES) { 516 for (ii=0; ii<bs; ii++,value+=stepval) { 517 for (jj=ii; jj<bs2; jj+=bs) { 518 baij_a[jj] += *value++; 519 } 520 } 521 } else { 522 for (ii=0; ii<bs; ii++,value+=stepval) { 523 for (jj=ii; jj<bs2; jj+=bs) { 524 baij_a[jj] = *value++; 525 } 526 } 527 } 528 } else { 529 value = v + j*(stepval+bs)*bs + i*bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 532 for (jj=0; jj<bs; jj++) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 538 for (jj=0; jj<bs; jj++) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } 544 } 545 } else { 546 if (!baij->donotstash) { 547 if (roworiented) { 548 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 #if defined(PETSC_USE_DEBUG) 556 baij->ht_total_ct = total_ct; 557 baij->ht_insert_ct = insert_ct; 558 #endif 559 PetscFunctionReturn(0); 560 } 561 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatGetValues_MPIBAIJ" 564 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 565 { 566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567 PetscErrorCode ierr; 568 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 569 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 570 571 PetscFunctionBegin; 572 for (i=0; i<m; i++) { 573 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 574 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 575 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 576 row = idxm[i] - bsrstart; 577 for (j=0; j<n; j++) { 578 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 579 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 580 if (idxn[j] >= bscstart && idxn[j] < bscend) { 581 col = idxn[j] - bscstart; 582 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 583 } else { 584 if (!baij->colmap) { 585 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 586 } 587 #if defined(PETSC_USE_CTABLE) 588 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 589 data--; 590 #else 591 data = baij->colmap[idxn[j]/bs]-1; 592 #endif 593 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 594 else { 595 col = data + idxn[j]%bs; 596 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 597 } 598 } 599 } 600 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 601 } 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatNorm_MPIBAIJ" 607 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 608 { 609 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 610 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 611 PetscErrorCode ierr; 612 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 613 PetscReal sum = 0.0; 614 MatScalar *v; 615 616 PetscFunctionBegin; 617 if (baij->size == 1) { 618 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 619 } else { 620 if (type == NORM_FROBENIUS) { 621 v = amat->a; 622 nz = amat->nz*bs2; 623 for (i=0; i<nz; i++) { 624 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 625 } 626 v = bmat->a; 627 nz = bmat->nz*bs2; 628 for (i=0; i<nz; i++) { 629 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 630 } 631 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 632 *nrm = PetscSqrtReal(*nrm); 633 } else if (type == NORM_1) { /* max column sum */ 634 PetscReal *tmp,*tmp2; 635 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 636 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 637 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 638 v = amat->a; jj = amat->j; 639 for (i=0; i<amat->nz; i++) { 640 for (j=0; j<bs; j++) { 641 col = bs*(cstart + *jj) + j; /* column index */ 642 for (row=0; row<bs; row++) { 643 tmp[col] += PetscAbsScalar(*v); v++; 644 } 645 } 646 jj++; 647 } 648 v = bmat->a; jj = bmat->j; 649 for (i=0; i<bmat->nz; i++) { 650 for (j=0; j<bs; j++) { 651 col = bs*garray[*jj] + j; 652 for (row=0; row<bs; row++) { 653 tmp[col] += PetscAbsScalar(*v); v++; 654 } 655 } 656 jj++; 657 } 658 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 659 *nrm = 0.0; 660 for (j=0; j<mat->cmap->N; j++) { 661 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 662 } 663 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 664 } else if (type == NORM_INFINITY) { /* max row sum */ 665 PetscReal *sums; 666 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 667 sum = 0.0; 668 for (j=0; j<amat->mbs; j++) { 669 for (row=0; row<bs; row++) sums[row] = 0.0; 670 v = amat->a + bs2*amat->i[j]; 671 nz = amat->i[j+1]-amat->i[j]; 672 for (i=0; i<nz; i++) { 673 for (col=0; col<bs; col++) { 674 for (row=0; row<bs; row++) { 675 sums[row] += PetscAbsScalar(*v); v++; 676 } 677 } 678 } 679 v = bmat->a + bs2*bmat->i[j]; 680 nz = bmat->i[j+1]-bmat->i[j]; 681 for (i=0; i<nz; i++) { 682 for (col=0; col<bs; col++) { 683 for (row=0; row<bs; row++) { 684 sums[row] += PetscAbsScalar(*v); v++; 685 } 686 } 687 } 688 for (row=0; row<bs; row++) { 689 if (sums[row] > sum) sum = sums[row]; 690 } 691 } 692 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 693 ierr = PetscFree(sums);CHKERRQ(ierr); 694 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 695 } 696 PetscFunctionReturn(0); 697 } 698 699 /* 700 Creates the hash table, and sets the table 701 This table is created only once. 702 If new entried need to be added to the matrix 703 then the hash table has to be destroyed and 704 recreated. 705 */ 706 #undef __FUNCT__ 707 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 708 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 709 { 710 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 711 Mat A = baij->A,B=baij->B; 712 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 713 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 714 PetscErrorCode ierr; 715 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 716 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 717 PetscInt *HT,key; 718 MatScalar **HD; 719 PetscReal tmp; 720 #if defined(PETSC_USE_INFO) 721 PetscInt ct=0,max=0; 722 #endif 723 724 PetscFunctionBegin; 725 if (baij->ht) PetscFunctionReturn(0); 726 727 baij->ht_size = (PetscInt)(factor*nz); 728 ht_size = baij->ht_size; 729 730 /* Allocate Memory for Hash Table */ 731 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 732 HD = baij->hd; 733 HT = baij->ht; 734 735 /* Loop Over A */ 736 for (i=0; i<a->mbs; i++) { 737 for (j=ai[i]; j<ai[i+1]; j++) { 738 row = i+rstart; 739 col = aj[j]+cstart; 740 741 key = row*Nbs + col + 1; 742 h1 = HASH(ht_size,key,tmp); 743 for (k=0; k<ht_size; k++) { 744 if (!HT[(h1+k)%ht_size]) { 745 HT[(h1+k)%ht_size] = key; 746 HD[(h1+k)%ht_size] = a->a + j*bs2; 747 break; 748 #if defined(PETSC_USE_INFO) 749 } else { 750 ct++; 751 #endif 752 } 753 } 754 #if defined(PETSC_USE_INFO) 755 if (k> max) max = k; 756 #endif 757 } 758 } 759 /* Loop Over B */ 760 for (i=0; i<b->mbs; i++) { 761 for (j=bi[i]; j<bi[i+1]; j++) { 762 row = i+rstart; 763 col = garray[bj[j]]; 764 key = row*Nbs + col + 1; 765 h1 = HASH(ht_size,key,tmp); 766 for (k=0; k<ht_size; k++) { 767 if (!HT[(h1+k)%ht_size]) { 768 HT[(h1+k)%ht_size] = key; 769 HD[(h1+k)%ht_size] = b->a + j*bs2; 770 break; 771 #if defined(PETSC_USE_INFO) 772 } else { 773 ct++; 774 #endif 775 } 776 } 777 #if defined(PETSC_USE_INFO) 778 if (k> max) max = k; 779 #endif 780 } 781 } 782 783 /* Print Summary */ 784 #if defined(PETSC_USE_INFO) 785 for (i=0,j=0; i<ht_size; i++) { 786 if (HT[i]) j++; 787 } 788 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 789 #endif 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 795 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 796 { 797 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 798 PetscErrorCode ierr; 799 PetscInt nstash,reallocs; 800 InsertMode addv; 801 802 PetscFunctionBegin; 803 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 804 805 /* make sure all processors are either in INSERTMODE or ADDMODE */ 806 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 807 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 808 mat->insertmode = addv; /* in case this processor had no cache */ 809 810 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 811 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 812 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 813 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 814 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 815 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 821 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 822 { 823 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 824 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 825 PetscErrorCode ierr; 826 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 827 PetscInt *row,*col; 828 PetscBool r1,r2,r3,other_disassembled; 829 MatScalar *val; 830 InsertMode addv = mat->insertmode; 831 PetscMPIInt n; 832 833 PetscFunctionBegin; 834 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 835 if (!baij->donotstash && !mat->nooffprocentries) { 836 while (1) { 837 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 838 if (!flg) break; 839 840 for (i=0; i<n;) { 841 /* Now identify the consecutive vals belonging to the same row */ 842 for (j=i,rstart=row[j]; j<n; j++) { 843 if (row[j] != rstart) break; 844 } 845 if (j < n) ncols = j-i; 846 else ncols = n-i; 847 /* Now assemble all these values with a single function call */ 848 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 849 i = j; 850 } 851 } 852 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 853 /* Now process the block-stash. Since the values are stashed column-oriented, 854 set the roworiented flag to column oriented, and after MatSetValues() 855 restore the original flags */ 856 r1 = baij->roworiented; 857 r2 = a->roworiented; 858 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 859 860 baij->roworiented = PETSC_FALSE; 861 a->roworiented = PETSC_FALSE; 862 863 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 864 while (1) { 865 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 866 if (!flg) break; 867 868 for (i=0; i<n;) { 869 /* Now identify the consecutive vals belonging to the same row */ 870 for (j=i,rstart=row[j]; j<n; j++) { 871 if (row[j] != rstart) break; 872 } 873 if (j < n) ncols = j-i; 874 else ncols = n-i; 875 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 880 881 baij->roworiented = r1; 882 a->roworiented = r2; 883 884 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 885 } 886 887 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 888 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 889 890 /* determine if any processor has disassembled, if so we must 891 also disassemble ourselfs, in order that we may reassemble. */ 892 /* 893 if nonzero structure of submatrix B cannot change then we know that 894 no processor disassembled thus we can skip this stuff 895 */ 896 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 897 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 898 if (mat->was_assembled && !other_disassembled) { 899 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 900 } 901 } 902 903 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 904 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 905 } 906 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 907 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 908 909 #if defined(PETSC_USE_INFO) 910 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 911 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 912 913 baij->ht_total_ct = 0; 914 baij->ht_insert_ct = 0; 915 } 916 #endif 917 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 918 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 919 920 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 921 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 922 } 923 924 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 925 926 baij->rowvalues = 0; 927 928 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 929 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 930 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 931 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 932 } 933 PetscFunctionReturn(0); 934 } 935 936 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 937 #include <petscdraw.h> 938 #undef __FUNCT__ 939 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 940 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 941 { 942 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 943 PetscErrorCode ierr; 944 PetscMPIInt rank = baij->rank; 945 PetscInt bs = mat->rmap->bs; 946 PetscBool iascii,isdraw; 947 PetscViewer sviewer; 948 PetscViewerFormat format; 949 950 PetscFunctionBegin; 951 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 952 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 953 if (iascii) { 954 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 955 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 956 MatInfo info; 957 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 958 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 959 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 961 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 962 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 963 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 964 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 965 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 966 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 967 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 968 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 969 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } else if (format == PETSC_VIEWER_ASCII_INFO) { 972 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 975 PetscFunctionReturn(0); 976 } 977 } 978 979 if (isdraw) { 980 PetscDraw draw; 981 PetscBool isnull; 982 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 983 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 984 } 985 986 { 987 /* assemble the entire matrix onto first processor. */ 988 Mat A; 989 Mat_SeqBAIJ *Aloc; 990 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 991 MatScalar *a; 992 const char *matname; 993 994 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 995 /* Perhaps this should be the type of mat? */ 996 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 997 if (!rank) { 998 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 999 } else { 1000 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1001 } 1002 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1003 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1004 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1005 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1006 1007 /* copy over the A part */ 1008 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1009 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1010 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1011 1012 for (i=0; i<mbs; i++) { 1013 rvals[0] = bs*(baij->rstartbs + i); 1014 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1015 for (j=ai[i]; j<ai[i+1]; j++) { 1016 col = (baij->cstartbs+aj[j])*bs; 1017 for (k=0; k<bs; k++) { 1018 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1019 col++; a += bs; 1020 } 1021 } 1022 } 1023 /* copy over the B part */ 1024 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1025 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1026 for (i=0; i<mbs; i++) { 1027 rvals[0] = bs*(baij->rstartbs + i); 1028 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1029 for (j=ai[i]; j<ai[i+1]; j++) { 1030 col = baij->garray[aj[j]]*bs; 1031 for (k=0; k<bs; k++) { 1032 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1033 col++; a += bs; 1034 } 1035 } 1036 } 1037 ierr = PetscFree(rvals);CHKERRQ(ierr); 1038 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1040 /* 1041 Everyone has to call to draw the matrix since the graphics waits are 1042 synchronized across all processors that share the PetscDraw object 1043 */ 1044 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1045 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1046 if (!rank) { 1047 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1048 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1049 } 1050 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1051 ierr = MatDestroy(&A);CHKERRQ(ierr); 1052 } 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1058 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1059 { 1060 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1061 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1062 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1063 PetscErrorCode ierr; 1064 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1065 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1066 int fd; 1067 PetscScalar *column_values; 1068 FILE *file; 1069 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1070 PetscInt message_count,flowcontrolcount; 1071 1072 PetscFunctionBegin; 1073 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1074 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1075 nz = bs2*(A->nz + B->nz); 1076 rlen = mat->rmap->n; 1077 if (!rank) { 1078 header[0] = MAT_FILE_CLASSID; 1079 header[1] = mat->rmap->N; 1080 header[2] = mat->cmap->N; 1081 1082 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1083 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1084 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1085 /* get largest number of rows any processor has */ 1086 range = mat->rmap->range; 1087 for (i=1; i<size; i++) { 1088 rlen = PetscMax(rlen,range[i+1] - range[i]); 1089 } 1090 } else { 1091 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1092 } 1093 1094 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1095 /* compute lengths of each row */ 1096 for (i=0; i<a->mbs; i++) { 1097 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1098 } 1099 /* store the row lengths to the file */ 1100 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1101 if (!rank) { 1102 MPI_Status status; 1103 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1104 rlen = (range[1] - range[0])/bs; 1105 for (i=0; i<rlen; i++) { 1106 for (j=0; j<bs; j++) { 1107 row_lens[i*bs+j] = bs*crow_lens[i]; 1108 } 1109 } 1110 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1111 for (i=1; i<size; i++) { 1112 rlen = (range[i+1] - range[i])/bs; 1113 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1114 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1115 for (k=0; k<rlen; k++) { 1116 for (j=0; j<bs; j++) { 1117 row_lens[k*bs+j] = bs*crow_lens[k]; 1118 } 1119 } 1120 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1121 } 1122 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1123 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1124 } else { 1125 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1126 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1127 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1128 } 1129 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1130 1131 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1132 information needed to make it for each row from a block row. This does require more communication but still not more than 1133 the communication needed for the nonzero values */ 1134 nzmax = nz; /* space a largest processor needs */ 1135 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1136 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1137 cnt = 0; 1138 for (i=0; i<a->mbs; i++) { 1139 pcnt = cnt; 1140 for (j=B->i[i]; j<B->i[i+1]; j++) { 1141 if ((col = garray[B->j[j]]) > cstart) break; 1142 for (l=0; l<bs; l++) { 1143 column_indices[cnt++] = bs*col+l; 1144 } 1145 } 1146 for (k=A->i[i]; k<A->i[i+1]; k++) { 1147 for (l=0; l<bs; l++) { 1148 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1149 } 1150 } 1151 for (; j<B->i[i+1]; j++) { 1152 for (l=0; l<bs; l++) { 1153 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1154 } 1155 } 1156 len = cnt - pcnt; 1157 for (k=1; k<bs; k++) { 1158 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1159 cnt += len; 1160 } 1161 } 1162 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1163 1164 /* store the columns to the file */ 1165 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1166 if (!rank) { 1167 MPI_Status status; 1168 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1169 for (i=1; i<size; i++) { 1170 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1171 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1172 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1173 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1174 } 1175 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1176 } else { 1177 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1178 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1179 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1180 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1181 } 1182 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1183 1184 /* load up the numerical values */ 1185 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1186 cnt = 0; 1187 for (i=0; i<a->mbs; i++) { 1188 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1189 for (j=B->i[i]; j<B->i[i+1]; j++) { 1190 if (garray[B->j[j]] > cstart) break; 1191 for (l=0; l<bs; l++) { 1192 for (ll=0; ll<bs; ll++) { 1193 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1194 } 1195 } 1196 cnt += bs; 1197 } 1198 for (k=A->i[i]; k<A->i[i+1]; k++) { 1199 for (l=0; l<bs; l++) { 1200 for (ll=0; ll<bs; ll++) { 1201 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1202 } 1203 } 1204 cnt += bs; 1205 } 1206 for (; j<B->i[i+1]; j++) { 1207 for (l=0; l<bs; l++) { 1208 for (ll=0; ll<bs; ll++) { 1209 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1210 } 1211 } 1212 cnt += bs; 1213 } 1214 cnt += (bs-1)*rlen; 1215 } 1216 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1217 1218 /* store the column values to the file */ 1219 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1220 if (!rank) { 1221 MPI_Status status; 1222 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1223 for (i=1; i<size; i++) { 1224 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1225 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1226 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1227 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1228 } 1229 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1230 } else { 1231 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1232 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1233 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1234 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1235 } 1236 ierr = PetscFree(column_values);CHKERRQ(ierr); 1237 1238 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1239 if (file) { 1240 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1241 } 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatView_MPIBAIJ" 1247 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1248 { 1249 PetscErrorCode ierr; 1250 PetscBool iascii,isdraw,issocket,isbinary; 1251 1252 PetscFunctionBegin; 1253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1254 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1255 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1256 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1257 if (iascii || isdraw || issocket) { 1258 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1259 } else if (isbinary) { 1260 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1261 } 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1267 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1268 { 1269 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1270 PetscErrorCode ierr; 1271 1272 PetscFunctionBegin; 1273 #if defined(PETSC_USE_LOG) 1274 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1275 #endif 1276 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1277 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1278 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1279 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1280 #if defined(PETSC_USE_CTABLE) 1281 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1282 #else 1283 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1284 #endif 1285 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1286 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1287 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1288 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1289 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1290 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1291 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1292 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1293 1294 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatMult_MPIBAIJ" 1309 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1310 { 1311 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1312 PetscErrorCode ierr; 1313 PetscInt nt; 1314 1315 PetscFunctionBegin; 1316 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1317 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1318 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1319 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1320 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1321 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1322 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1323 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1329 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1330 { 1331 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1332 PetscErrorCode ierr; 1333 1334 PetscFunctionBegin; 1335 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1337 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1338 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1344 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1345 { 1346 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1347 PetscErrorCode ierr; 1348 PetscBool merged; 1349 1350 PetscFunctionBegin; 1351 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1352 /* do nondiagonal part */ 1353 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1354 if (!merged) { 1355 /* send it on its way */ 1356 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1357 /* do local part */ 1358 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1359 /* receive remote parts: note this assumes the values are not actually */ 1360 /* inserted in yy until the next line */ 1361 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1362 } else { 1363 /* do local part */ 1364 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1365 /* send it on its way */ 1366 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 /* values actually were received in the Begin() but we need to call this nop */ 1368 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1369 } 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1375 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1376 { 1377 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1378 PetscErrorCode ierr; 1379 1380 PetscFunctionBegin; 1381 /* do nondiagonal part */ 1382 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1383 /* send it on its way */ 1384 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1385 /* do local part */ 1386 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1387 /* receive remote parts: note this assumes the values are not actually */ 1388 /* inserted in yy until the next line, which is true for my implementation*/ 1389 /* but is not perhaps always true. */ 1390 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1391 PetscFunctionReturn(0); 1392 } 1393 1394 /* 1395 This only works correctly for square matrices where the subblock A->A is the 1396 diagonal block 1397 */ 1398 #undef __FUNCT__ 1399 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1400 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1401 { 1402 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1403 PetscErrorCode ierr; 1404 1405 PetscFunctionBegin; 1406 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1407 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatScale_MPIBAIJ" 1413 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1414 { 1415 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1420 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1426 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1427 { 1428 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1429 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1430 PetscErrorCode ierr; 1431 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1432 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1433 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1434 1435 PetscFunctionBegin; 1436 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1437 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1438 mat->getrowactive = PETSC_TRUE; 1439 1440 if (!mat->rowvalues && (idx || v)) { 1441 /* 1442 allocate enough space to hold information from the longest row. 1443 */ 1444 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1445 PetscInt max = 1,mbs = mat->mbs,tmp; 1446 for (i=0; i<mbs; i++) { 1447 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1448 if (max < tmp) max = tmp; 1449 } 1450 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1451 } 1452 lrow = row - brstart; 1453 1454 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1455 if (!v) {pvA = 0; pvB = 0;} 1456 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1457 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1458 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1459 nztot = nzA + nzB; 1460 1461 cmap = mat->garray; 1462 if (v || idx) { 1463 if (nztot) { 1464 /* Sort by increasing column numbers, assuming A and B already sorted */ 1465 PetscInt imark = -1; 1466 if (v) { 1467 *v = v_p = mat->rowvalues; 1468 for (i=0; i<nzB; i++) { 1469 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1470 else break; 1471 } 1472 imark = i; 1473 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1474 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1475 } 1476 if (idx) { 1477 *idx = idx_p = mat->rowindices; 1478 if (imark > -1) { 1479 for (i=0; i<imark; i++) { 1480 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1481 } 1482 } else { 1483 for (i=0; i<nzB; i++) { 1484 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1485 else break; 1486 } 1487 imark = i; 1488 } 1489 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1490 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1491 } 1492 } else { 1493 if (idx) *idx = 0; 1494 if (v) *v = 0; 1495 } 1496 } 1497 *nz = nztot; 1498 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1499 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1505 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1506 { 1507 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1508 1509 PetscFunctionBegin; 1510 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1511 baij->getrowactive = PETSC_FALSE; 1512 PetscFunctionReturn(0); 1513 } 1514 1515 #undef __FUNCT__ 1516 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1517 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1518 { 1519 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1520 PetscErrorCode ierr; 1521 1522 PetscFunctionBegin; 1523 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1524 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1525 PetscFunctionReturn(0); 1526 } 1527 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1530 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1531 { 1532 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1533 Mat A = a->A,B = a->B; 1534 PetscErrorCode ierr; 1535 PetscReal isend[5],irecv[5]; 1536 1537 PetscFunctionBegin; 1538 info->block_size = (PetscReal)matin->rmap->bs; 1539 1540 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1541 1542 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1543 isend[3] = info->memory; isend[4] = info->mallocs; 1544 1545 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1546 1547 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1548 isend[3] += info->memory; isend[4] += info->mallocs; 1549 1550 if (flag == MAT_LOCAL) { 1551 info->nz_used = isend[0]; 1552 info->nz_allocated = isend[1]; 1553 info->nz_unneeded = isend[2]; 1554 info->memory = isend[3]; 1555 info->mallocs = isend[4]; 1556 } else if (flag == MAT_GLOBAL_MAX) { 1557 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1558 1559 info->nz_used = irecv[0]; 1560 info->nz_allocated = irecv[1]; 1561 info->nz_unneeded = irecv[2]; 1562 info->memory = irecv[3]; 1563 info->mallocs = irecv[4]; 1564 } else if (flag == MAT_GLOBAL_SUM) { 1565 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1566 1567 info->nz_used = irecv[0]; 1568 info->nz_allocated = irecv[1]; 1569 info->nz_unneeded = irecv[2]; 1570 info->memory = irecv[3]; 1571 info->mallocs = irecv[4]; 1572 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1573 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1574 info->fill_ratio_needed = 0; 1575 info->factor_mallocs = 0; 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1581 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1582 { 1583 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1584 PetscErrorCode ierr; 1585 1586 PetscFunctionBegin; 1587 switch (op) { 1588 case MAT_NEW_NONZERO_LOCATIONS: 1589 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1590 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1591 case MAT_KEEP_NONZERO_PATTERN: 1592 case MAT_NEW_NONZERO_LOCATION_ERR: 1593 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1594 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1595 break; 1596 case MAT_ROW_ORIENTED: 1597 a->roworiented = flg; 1598 1599 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1600 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1601 break; 1602 case MAT_NEW_DIAGONALS: 1603 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1604 break; 1605 case MAT_IGNORE_OFF_PROC_ENTRIES: 1606 a->donotstash = flg; 1607 break; 1608 case MAT_USE_HASH_TABLE: 1609 a->ht_flag = flg; 1610 break; 1611 case MAT_SYMMETRIC: 1612 case MAT_STRUCTURALLY_SYMMETRIC: 1613 case MAT_HERMITIAN: 1614 case MAT_SYMMETRY_ETERNAL: 1615 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1616 break; 1617 default: 1618 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1619 } 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1625 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1626 { 1627 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1628 Mat_SeqBAIJ *Aloc; 1629 Mat B; 1630 PetscErrorCode ierr; 1631 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1632 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1633 MatScalar *a; 1634 1635 PetscFunctionBegin; 1636 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1637 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1638 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1639 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1640 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1641 /* Do not know preallocation information, but must set block size */ 1642 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1643 } else { 1644 B = *matout; 1645 } 1646 1647 /* copy over the A part */ 1648 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1649 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1650 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1651 1652 for (i=0; i<mbs; i++) { 1653 rvals[0] = bs*(baij->rstartbs + i); 1654 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1655 for (j=ai[i]; j<ai[i+1]; j++) { 1656 col = (baij->cstartbs+aj[j])*bs; 1657 for (k=0; k<bs; k++) { 1658 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1659 1660 col++; a += bs; 1661 } 1662 } 1663 } 1664 /* copy over the B part */ 1665 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1666 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1667 for (i=0; i<mbs; i++) { 1668 rvals[0] = bs*(baij->rstartbs + i); 1669 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1670 for (j=ai[i]; j<ai[i+1]; j++) { 1671 col = baij->garray[aj[j]]*bs; 1672 for (k=0; k<bs; k++) { 1673 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1674 col++; 1675 a += bs; 1676 } 1677 } 1678 } 1679 ierr = PetscFree(rvals);CHKERRQ(ierr); 1680 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1681 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1682 1683 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1684 else { 1685 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1686 } 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1692 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1693 { 1694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1695 Mat a = baij->A,b = baij->B; 1696 PetscErrorCode ierr; 1697 PetscInt s1,s2,s3; 1698 1699 PetscFunctionBegin; 1700 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1701 if (rr) { 1702 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1703 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1704 /* Overlap communication with computation. */ 1705 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1706 } 1707 if (ll) { 1708 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1709 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1710 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1711 } 1712 /* scale the diagonal block */ 1713 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1714 1715 if (rr) { 1716 /* Do a scatter end and then right scale the off-diagonal block */ 1717 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1718 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1719 } 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1725 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1726 { 1727 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1728 PetscInt *owners = A->rmap->range; 1729 PetscInt n = A->rmap->n; 1730 PetscSF sf; 1731 PetscInt *lrows; 1732 PetscSFNode *rrows; 1733 PetscInt r, p = 0, len = 0; 1734 PetscErrorCode ierr; 1735 1736 PetscFunctionBegin; 1737 /* Create SF where leaves are input rows and roots are owned rows */ 1738 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1739 for (r = 0; r < n; ++r) lrows[r] = -1; 1740 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 1741 for (r = 0; r < N; ++r) { 1742 const PetscInt idx = rows[r]; 1743 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1744 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1745 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1746 } 1747 if (A->nooffproczerorows) { 1748 if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank); 1749 lrows[len++] = idx - owners[p]; 1750 } else { 1751 rrows[r].rank = p; 1752 rrows[r].index = rows[r] - owners[p]; 1753 } 1754 } 1755 if (!A->nooffproczerorows) { 1756 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1757 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1758 /* Collect flags for rows to be zeroed */ 1759 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1760 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1761 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1762 /* Compress and put in row numbers */ 1763 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1764 } 1765 /* fix right hand side if needed */ 1766 if (x && b) { 1767 const PetscScalar *xx; 1768 PetscScalar *bb; 1769 1770 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1771 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1772 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1773 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1774 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1775 } 1776 1777 /* actually zap the local rows */ 1778 /* 1779 Zero the required rows. If the "diagonal block" of the matrix 1780 is square and the user wishes to set the diagonal we use separate 1781 code so that MatSetValues() is not called for each diagonal allocating 1782 new memory, thus calling lots of mallocs and slowing things down. 1783 1784 */ 1785 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1786 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1787 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1788 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1789 } else if (diag != 0.0) { 1790 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1791 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1792 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1793 for (r = 0; r < len; ++r) { 1794 const PetscInt row = lrows[r] + A->rmap->rstart; 1795 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1796 } 1797 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1798 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1799 } else { 1800 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1801 } 1802 ierr = PetscFree(lrows);CHKERRQ(ierr); 1803 1804 /* only change matrix nonzero state if pattern was allowed to be changed */ 1805 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1806 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1807 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1808 } 1809 PetscFunctionReturn(0); 1810 } 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 1814 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1815 { 1816 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1817 PetscErrorCode ierr; 1818 PetscMPIInt n = A->rmap->n; 1819 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1820 PetscInt *lrows,*owners = A->rmap->range; 1821 PetscSFNode *rrows; 1822 PetscSF sf; 1823 const PetscScalar *xx; 1824 PetscScalar *bb,*mask; 1825 Vec xmask,lmask; 1826 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1827 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1828 PetscScalar *aa; 1829 1830 PetscFunctionBegin; 1831 /* Create SF where leaves are input rows and roots are owned rows */ 1832 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1833 for (r = 0; r < n; ++r) lrows[r] = -1; 1834 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1835 for (r = 0; r < N; ++r) { 1836 const PetscInt idx = rows[r]; 1837 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1838 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1839 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1840 } 1841 rrows[r].rank = p; 1842 rrows[r].index = rows[r] - owners[p]; 1843 } 1844 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1845 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1846 /* Collect flags for rows to be zeroed */ 1847 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1848 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1849 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1850 /* Compress and put in row numbers */ 1851 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1852 /* zero diagonal part of matrix */ 1853 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1854 /* handle off diagonal part of matrix */ 1855 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1856 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1857 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1858 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1859 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1860 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1861 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1862 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1863 if (x) { 1864 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1865 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1866 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1867 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1868 } 1869 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1870 /* remove zeroed rows of off diagonal matrix */ 1871 for (i = 0; i < len; ++i) { 1872 row = lrows[i]; 1873 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1874 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1875 for (k = 0; k < count; ++k) { 1876 aa[0] = 0.0; 1877 aa += bs; 1878 } 1879 } 1880 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1881 for (i = 0; i < l->B->rmap->N; ++i) { 1882 row = i/bs; 1883 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1884 for (k = 0; k < bs; ++k) { 1885 col = bs*baij->j[j] + k; 1886 if (PetscAbsScalar(mask[col])) { 1887 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1888 if (b) bb[i] -= aa[0]*xx[col]; 1889 aa[0] = 0.0; 1890 } 1891 } 1892 } 1893 } 1894 if (x) { 1895 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1896 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1897 } 1898 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1899 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1900 ierr = PetscFree(lrows);CHKERRQ(ierr); 1901 1902 /* only change matrix nonzero state if pattern was allowed to be changed */ 1903 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1904 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1905 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1906 } 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1912 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1913 { 1914 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1915 PetscErrorCode ierr; 1916 1917 PetscFunctionBegin; 1918 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1923 1924 #undef __FUNCT__ 1925 #define __FUNCT__ "MatEqual_MPIBAIJ" 1926 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1927 { 1928 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1929 Mat a,b,c,d; 1930 PetscBool flg; 1931 PetscErrorCode ierr; 1932 1933 PetscFunctionBegin; 1934 a = matA->A; b = matA->B; 1935 c = matB->A; d = matB->B; 1936 1937 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1938 if (flg) { 1939 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1940 } 1941 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1942 PetscFunctionReturn(0); 1943 } 1944 1945 #undef __FUNCT__ 1946 #define __FUNCT__ "MatCopy_MPIBAIJ" 1947 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1948 { 1949 PetscErrorCode ierr; 1950 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1951 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1952 1953 PetscFunctionBegin; 1954 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1955 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1956 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1957 } else { 1958 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1959 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1960 } 1961 PetscFunctionReturn(0); 1962 } 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1966 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1967 { 1968 PetscErrorCode ierr; 1969 1970 PetscFunctionBegin; 1971 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1972 PetscFunctionReturn(0); 1973 } 1974 1975 #undef __FUNCT__ 1976 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ" 1977 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1978 { 1979 PetscErrorCode ierr; 1980 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1981 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1982 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1983 1984 PetscFunctionBegin; 1985 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1986 PetscFunctionReturn(0); 1987 } 1988 1989 #undef __FUNCT__ 1990 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1991 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1992 { 1993 PetscErrorCode ierr; 1994 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1995 PetscBLASInt bnz,one=1; 1996 Mat_SeqBAIJ *x,*y; 1997 1998 PetscFunctionBegin; 1999 if (str == SAME_NONZERO_PATTERN) { 2000 PetscScalar alpha = a; 2001 x = (Mat_SeqBAIJ*)xx->A->data; 2002 y = (Mat_SeqBAIJ*)yy->A->data; 2003 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2004 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2005 x = (Mat_SeqBAIJ*)xx->B->data; 2006 y = (Mat_SeqBAIJ*)yy->B->data; 2007 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2008 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2009 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2010 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2011 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2012 } else { 2013 Mat B; 2014 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 2015 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2016 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2017 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2018 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2019 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2020 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2021 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 2022 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2023 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2024 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2025 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 2026 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2027 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2028 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2029 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2030 } 2031 PetscFunctionReturn(0); 2032 } 2033 2034 #undef __FUNCT__ 2035 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2036 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2037 { 2038 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2039 PetscErrorCode ierr; 2040 2041 PetscFunctionBegin; 2042 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2043 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2044 PetscFunctionReturn(0); 2045 } 2046 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2049 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2050 { 2051 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2052 PetscErrorCode ierr; 2053 2054 PetscFunctionBegin; 2055 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2056 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2057 PetscFunctionReturn(0); 2058 } 2059 2060 #undef __FUNCT__ 2061 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2062 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2063 { 2064 PetscErrorCode ierr; 2065 IS iscol_local; 2066 PetscInt csize; 2067 2068 PetscFunctionBegin; 2069 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2070 if (call == MAT_REUSE_MATRIX) { 2071 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2072 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2073 } else { 2074 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2075 } 2076 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2077 if (call == MAT_INITIAL_MATRIX) { 2078 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2079 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2080 } 2081 PetscFunctionReturn(0); 2082 } 2083 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2084 #undef __FUNCT__ 2085 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2086 /* 2087 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2088 in local and then by concatenating the local matrices the end result. 2089 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ(). 2090 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 2091 */ 2092 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2093 { 2094 PetscErrorCode ierr; 2095 PetscMPIInt rank,size; 2096 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2097 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2098 Mat M,Mreuse; 2099 MatScalar *vwork,*aa; 2100 MPI_Comm comm; 2101 IS isrow_new, iscol_new; 2102 PetscBool idflag,allrows, allcols; 2103 Mat_SeqBAIJ *aij; 2104 2105 PetscFunctionBegin; 2106 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2107 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2108 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2109 /* The compression and expansion should be avoided. Doesn't point 2110 out errors, might change the indices, hence buggey */ 2111 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2112 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2113 2114 /* Check for special case: each processor gets entire matrix columns */ 2115 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2116 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2117 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2118 else allcols = PETSC_FALSE; 2119 2120 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2121 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2122 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2123 else allrows = PETSC_FALSE; 2124 2125 if (call == MAT_REUSE_MATRIX) { 2126 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2127 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2128 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2129 } else { 2130 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2131 } 2132 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2133 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2134 /* 2135 m - number of local rows 2136 n - number of columns (same on all processors) 2137 rstart - first row in new global matrix generated 2138 */ 2139 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2140 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2141 m = m/bs; 2142 n = n/bs; 2143 2144 if (call == MAT_INITIAL_MATRIX) { 2145 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2146 ii = aij->i; 2147 jj = aij->j; 2148 2149 /* 2150 Determine the number of non-zeros in the diagonal and off-diagonal 2151 portions of the matrix in order to do correct preallocation 2152 */ 2153 2154 /* first get start and end of "diagonal" columns */ 2155 if (csize == PETSC_DECIDE) { 2156 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2157 if (mglobal == n*bs) { /* square matrix */ 2158 nlocal = m; 2159 } else { 2160 nlocal = n/size + ((n % size) > rank); 2161 } 2162 } else { 2163 nlocal = csize/bs; 2164 } 2165 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2166 rstart = rend - nlocal; 2167 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2168 2169 /* next, compute all the lengths */ 2170 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2171 for (i=0; i<m; i++) { 2172 jend = ii[i+1] - ii[i]; 2173 olen = 0; 2174 dlen = 0; 2175 for (j=0; j<jend; j++) { 2176 if (*jj < rstart || *jj >= rend) olen++; 2177 else dlen++; 2178 jj++; 2179 } 2180 olens[i] = olen; 2181 dlens[i] = dlen; 2182 } 2183 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2184 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2185 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2186 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2187 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2188 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2189 } else { 2190 PetscInt ml,nl; 2191 2192 M = *newmat; 2193 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2194 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2195 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2196 /* 2197 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2198 rather than the slower MatSetValues(). 2199 */ 2200 M->was_assembled = PETSC_TRUE; 2201 M->assembled = PETSC_FALSE; 2202 } 2203 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2204 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2205 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2206 ii = aij->i; 2207 jj = aij->j; 2208 aa = aij->a; 2209 for (i=0; i<m; i++) { 2210 row = rstart/bs + i; 2211 nz = ii[i+1] - ii[i]; 2212 cwork = jj; jj += nz; 2213 vwork = aa; aa += nz*bs*bs; 2214 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2215 } 2216 2217 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2218 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2219 *newmat = M; 2220 2221 /* save submatrix used in processor for next request */ 2222 if (call == MAT_INITIAL_MATRIX) { 2223 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2224 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2225 } 2226 PetscFunctionReturn(0); 2227 } 2228 2229 #undef __FUNCT__ 2230 #define __FUNCT__ "MatPermute_MPIBAIJ" 2231 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2232 { 2233 MPI_Comm comm,pcomm; 2234 PetscInt clocal_size,nrows; 2235 const PetscInt *rows; 2236 PetscMPIInt size; 2237 IS crowp,lcolp; 2238 PetscErrorCode ierr; 2239 2240 PetscFunctionBegin; 2241 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2242 /* make a collective version of 'rowp' */ 2243 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2244 if (pcomm==comm) { 2245 crowp = rowp; 2246 } else { 2247 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2248 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2249 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2250 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2251 } 2252 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2253 /* make a local version of 'colp' */ 2254 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2255 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2256 if (size==1) { 2257 lcolp = colp; 2258 } else { 2259 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2260 } 2261 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2262 /* now we just get the submatrix */ 2263 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2264 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2265 /* clean up */ 2266 if (pcomm!=comm) { 2267 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2268 } 2269 if (size>1) { 2270 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2271 } 2272 PetscFunctionReturn(0); 2273 } 2274 2275 #undef __FUNCT__ 2276 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2277 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2278 { 2279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2280 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2281 2282 PetscFunctionBegin; 2283 if (nghosts) *nghosts = B->nbs; 2284 if (ghosts) *ghosts = baij->garray; 2285 PetscFunctionReturn(0); 2286 } 2287 2288 #undef __FUNCT__ 2289 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2290 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2291 { 2292 Mat B; 2293 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2294 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2295 Mat_SeqAIJ *b; 2296 PetscErrorCode ierr; 2297 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2298 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2299 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2300 2301 PetscFunctionBegin; 2302 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2303 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2304 2305 /* ---------------------------------------------------------------- 2306 Tell every processor the number of nonzeros per row 2307 */ 2308 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2309 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2310 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2311 } 2312 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2313 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2314 displs = recvcounts + size; 2315 for (i=0; i<size; i++) { 2316 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2317 displs[i] = A->rmap->range[i]/bs; 2318 } 2319 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2320 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2321 #else 2322 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2323 #endif 2324 /* --------------------------------------------------------------- 2325 Create the sequential matrix of the same type as the local block diagonal 2326 */ 2327 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2328 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2329 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2330 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2331 b = (Mat_SeqAIJ*)B->data; 2332 2333 /*-------------------------------------------------------------------- 2334 Copy my part of matrix column indices over 2335 */ 2336 sendcount = ad->nz + bd->nz; 2337 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2338 a_jsendbuf = ad->j; 2339 b_jsendbuf = bd->j; 2340 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2341 cnt = 0; 2342 for (i=0; i<n; i++) { 2343 2344 /* put in lower diagonal portion */ 2345 m = bd->i[i+1] - bd->i[i]; 2346 while (m > 0) { 2347 /* is it above diagonal (in bd (compressed) numbering) */ 2348 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2349 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2350 m--; 2351 } 2352 2353 /* put in diagonal portion */ 2354 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2355 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2356 } 2357 2358 /* put in upper diagonal portion */ 2359 while (m-- > 0) { 2360 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2361 } 2362 } 2363 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2364 2365 /*-------------------------------------------------------------------- 2366 Gather all column indices to all processors 2367 */ 2368 for (i=0; i<size; i++) { 2369 recvcounts[i] = 0; 2370 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2371 recvcounts[i] += lens[j]; 2372 } 2373 } 2374 displs[0] = 0; 2375 for (i=1; i<size; i++) { 2376 displs[i] = displs[i-1] + recvcounts[i-1]; 2377 } 2378 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2379 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2380 #else 2381 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2382 #endif 2383 /*-------------------------------------------------------------------- 2384 Assemble the matrix into useable form (note numerical values not yet set) 2385 */ 2386 /* set the b->ilen (length of each row) values */ 2387 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2388 /* set the b->i indices */ 2389 b->i[0] = 0; 2390 for (i=1; i<=A->rmap->N/bs; i++) { 2391 b->i[i] = b->i[i-1] + lens[i-1]; 2392 } 2393 ierr = PetscFree(lens);CHKERRQ(ierr); 2394 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2395 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2396 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2397 2398 if (A->symmetric) { 2399 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2400 } else if (A->hermitian) { 2401 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2402 } else if (A->structurally_symmetric) { 2403 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2404 } 2405 *newmat = B; 2406 PetscFunctionReturn(0); 2407 } 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatSOR_MPIBAIJ" 2411 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2412 { 2413 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2414 PetscErrorCode ierr; 2415 Vec bb1 = 0; 2416 2417 PetscFunctionBegin; 2418 if (flag == SOR_APPLY_UPPER) { 2419 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2424 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2425 } 2426 2427 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2428 if (flag & SOR_ZERO_INITIAL_GUESS) { 2429 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2430 its--; 2431 } 2432 2433 while (its--) { 2434 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2435 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2436 2437 /* update rhs: bb1 = bb - B*x */ 2438 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2439 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2440 2441 /* local sweep */ 2442 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2443 } 2444 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2445 if (flag & SOR_ZERO_INITIAL_GUESS) { 2446 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2447 its--; 2448 } 2449 while (its--) { 2450 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2451 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2452 2453 /* update rhs: bb1 = bb - B*x */ 2454 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2455 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2456 2457 /* local sweep */ 2458 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2459 } 2460 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2461 if (flag & SOR_ZERO_INITIAL_GUESS) { 2462 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2463 its--; 2464 } 2465 while (its--) { 2466 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2467 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2468 2469 /* update rhs: bb1 = bb - B*x */ 2470 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2471 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2472 2473 /* local sweep */ 2474 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2475 } 2476 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2477 2478 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2479 PetscFunctionReturn(0); 2480 } 2481 2482 #undef __FUNCT__ 2483 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2484 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2485 { 2486 PetscErrorCode ierr; 2487 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2488 PetscInt N,i,*garray = aij->garray; 2489 PetscInt ib,jb,bs = A->rmap->bs; 2490 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2491 MatScalar *a_val = a_aij->a; 2492 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2493 MatScalar *b_val = b_aij->a; 2494 PetscReal *work; 2495 2496 PetscFunctionBegin; 2497 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2498 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2499 if (type == NORM_2) { 2500 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2501 for (jb=0; jb<bs; jb++) { 2502 for (ib=0; ib<bs; ib++) { 2503 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2504 a_val++; 2505 } 2506 } 2507 } 2508 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2509 for (jb=0; jb<bs; jb++) { 2510 for (ib=0; ib<bs; ib++) { 2511 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2512 b_val++; 2513 } 2514 } 2515 } 2516 } else if (type == NORM_1) { 2517 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2518 for (jb=0; jb<bs; jb++) { 2519 for (ib=0; ib<bs; ib++) { 2520 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2521 a_val++; 2522 } 2523 } 2524 } 2525 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2526 for (jb=0; jb<bs; jb++) { 2527 for (ib=0; ib<bs; ib++) { 2528 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2529 b_val++; 2530 } 2531 } 2532 } 2533 } else if (type == NORM_INFINITY) { 2534 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2535 for (jb=0; jb<bs; jb++) { 2536 for (ib=0; ib<bs; ib++) { 2537 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2538 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2539 a_val++; 2540 } 2541 } 2542 } 2543 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2544 for (jb=0; jb<bs; jb++) { 2545 for (ib=0; ib<bs; ib++) { 2546 int col = garray[b_aij->j[i]] * bs + jb; 2547 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2548 b_val++; 2549 } 2550 } 2551 } 2552 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2553 if (type == NORM_INFINITY) { 2554 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2555 } else { 2556 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2557 } 2558 ierr = PetscFree(work);CHKERRQ(ierr); 2559 if (type == NORM_2) { 2560 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2561 } 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2567 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2568 { 2569 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2570 PetscErrorCode ierr; 2571 2572 PetscFunctionBegin; 2573 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2574 PetscFunctionReturn(0); 2575 } 2576 2577 2578 /* -------------------------------------------------------------------*/ 2579 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2580 MatGetRow_MPIBAIJ, 2581 MatRestoreRow_MPIBAIJ, 2582 MatMult_MPIBAIJ, 2583 /* 4*/ MatMultAdd_MPIBAIJ, 2584 MatMultTranspose_MPIBAIJ, 2585 MatMultTransposeAdd_MPIBAIJ, 2586 0, 2587 0, 2588 0, 2589 /*10*/ 0, 2590 0, 2591 0, 2592 MatSOR_MPIBAIJ, 2593 MatTranspose_MPIBAIJ, 2594 /*15*/ MatGetInfo_MPIBAIJ, 2595 MatEqual_MPIBAIJ, 2596 MatGetDiagonal_MPIBAIJ, 2597 MatDiagonalScale_MPIBAIJ, 2598 MatNorm_MPIBAIJ, 2599 /*20*/ MatAssemblyBegin_MPIBAIJ, 2600 MatAssemblyEnd_MPIBAIJ, 2601 MatSetOption_MPIBAIJ, 2602 MatZeroEntries_MPIBAIJ, 2603 /*24*/ MatZeroRows_MPIBAIJ, 2604 0, 2605 0, 2606 0, 2607 0, 2608 /*29*/ MatSetUp_MPIBAIJ, 2609 0, 2610 0, 2611 0, 2612 0, 2613 /*34*/ MatDuplicate_MPIBAIJ, 2614 0, 2615 0, 2616 0, 2617 0, 2618 /*39*/ MatAXPY_MPIBAIJ, 2619 MatGetSubMatrices_MPIBAIJ, 2620 MatIncreaseOverlap_MPIBAIJ, 2621 MatGetValues_MPIBAIJ, 2622 MatCopy_MPIBAIJ, 2623 /*44*/ 0, 2624 MatScale_MPIBAIJ, 2625 0, 2626 0, 2627 MatZeroRowsColumns_MPIBAIJ, 2628 /*49*/ 0, 2629 0, 2630 0, 2631 0, 2632 0, 2633 /*54*/ MatFDColoringCreate_MPIXAIJ, 2634 0, 2635 MatSetUnfactored_MPIBAIJ, 2636 MatPermute_MPIBAIJ, 2637 MatSetValuesBlocked_MPIBAIJ, 2638 /*59*/ MatGetSubMatrix_MPIBAIJ, 2639 MatDestroy_MPIBAIJ, 2640 MatView_MPIBAIJ, 2641 0, 2642 0, 2643 /*64*/ 0, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2649 0, 2650 0, 2651 0, 2652 0, 2653 /*74*/ 0, 2654 MatFDColoringApply_BAIJ, 2655 0, 2656 0, 2657 0, 2658 /*79*/ 0, 2659 0, 2660 0, 2661 0, 2662 MatLoad_MPIBAIJ, 2663 /*84*/ 0, 2664 0, 2665 0, 2666 0, 2667 0, 2668 /*89*/ 0, 2669 0, 2670 0, 2671 0, 2672 0, 2673 /*94*/ 0, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*99*/ 0, 2679 0, 2680 0, 2681 0, 2682 0, 2683 /*104*/0, 2684 MatRealPart_MPIBAIJ, 2685 MatImaginaryPart_MPIBAIJ, 2686 0, 2687 0, 2688 /*109*/0, 2689 0, 2690 0, 2691 0, 2692 0, 2693 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2694 0, 2695 MatGetGhosts_MPIBAIJ, 2696 0, 2697 0, 2698 /*119*/0, 2699 0, 2700 0, 2701 0, 2702 MatGetMultiProcBlock_MPIBAIJ, 2703 /*124*/0, 2704 MatGetColumnNorms_MPIBAIJ, 2705 MatInvertBlockDiagonal_MPIBAIJ, 2706 0, 2707 0, 2708 /*129*/ 0, 2709 0, 2710 0, 2711 0, 2712 0, 2713 /*134*/ 0, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*139*/ 0, 2719 0, 2720 0, 2721 MatFDColoringSetUp_MPIXAIJ, 2722 0, 2723 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2724 }; 2725 2726 #undef __FUNCT__ 2727 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2728 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2729 { 2730 PetscFunctionBegin; 2731 *a = ((Mat_MPIBAIJ*)A->data)->A; 2732 PetscFunctionReturn(0); 2733 } 2734 2735 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2736 2737 #undef __FUNCT__ 2738 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2739 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2740 { 2741 PetscInt m,rstart,cstart,cend; 2742 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2743 const PetscInt *JJ =0; 2744 PetscScalar *values=0; 2745 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2746 PetscErrorCode ierr; 2747 2748 PetscFunctionBegin; 2749 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2750 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2751 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2752 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2753 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2754 m = B->rmap->n/bs; 2755 rstart = B->rmap->rstart/bs; 2756 cstart = B->cmap->rstart/bs; 2757 cend = B->cmap->rend/bs; 2758 2759 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2760 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2761 for (i=0; i<m; i++) { 2762 nz = ii[i+1] - ii[i]; 2763 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2764 nz_max = PetscMax(nz_max,nz); 2765 JJ = jj + ii[i]; 2766 for (j=0; j<nz; j++) { 2767 if (*JJ >= cstart) break; 2768 JJ++; 2769 } 2770 d = 0; 2771 for (; j<nz; j++) { 2772 if (*JJ++ >= cend) break; 2773 d++; 2774 } 2775 d_nnz[i] = d; 2776 o_nnz[i] = nz - d; 2777 } 2778 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2779 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2780 2781 values = (PetscScalar*)V; 2782 if (!values) { 2783 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2784 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2785 } 2786 for (i=0; i<m; i++) { 2787 PetscInt row = i + rstart; 2788 PetscInt ncols = ii[i+1] - ii[i]; 2789 const PetscInt *icols = jj + ii[i]; 2790 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2791 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2792 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2793 } else { /* block ordering does not match so we can only insert one block at a time. */ 2794 PetscInt j; 2795 for (j=0; j<ncols; j++) { 2796 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2797 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2798 } 2799 } 2800 } 2801 2802 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2803 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2804 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2805 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2806 PetscFunctionReturn(0); 2807 } 2808 2809 #undef __FUNCT__ 2810 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2811 /*@C 2812 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2813 (the default parallel PETSc format). 2814 2815 Collective on MPI_Comm 2816 2817 Input Parameters: 2818 + B - the matrix 2819 . bs - the block size 2820 . i - the indices into j for the start of each local row (starts with zero) 2821 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2822 - v - optional values in the matrix 2823 2824 Level: developer 2825 2826 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2827 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2828 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2829 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2830 block column and the second index is over columns within a block. 2831 2832 .keywords: matrix, aij, compressed row, sparse, parallel 2833 2834 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2835 @*/ 2836 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2837 { 2838 PetscErrorCode ierr; 2839 2840 PetscFunctionBegin; 2841 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2842 PetscValidType(B,1); 2843 PetscValidLogicalCollectiveInt(B,bs,2); 2844 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2845 PetscFunctionReturn(0); 2846 } 2847 2848 #undef __FUNCT__ 2849 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2850 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2851 { 2852 Mat_MPIBAIJ *b; 2853 PetscErrorCode ierr; 2854 PetscInt i; 2855 2856 PetscFunctionBegin; 2857 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2858 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2859 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2860 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2861 2862 if (d_nnz) { 2863 for (i=0; i<B->rmap->n/bs; i++) { 2864 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2865 } 2866 } 2867 if (o_nnz) { 2868 for (i=0; i<B->rmap->n/bs; i++) { 2869 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2870 } 2871 } 2872 2873 b = (Mat_MPIBAIJ*)B->data; 2874 b->bs2 = bs*bs; 2875 b->mbs = B->rmap->n/bs; 2876 b->nbs = B->cmap->n/bs; 2877 b->Mbs = B->rmap->N/bs; 2878 b->Nbs = B->cmap->N/bs; 2879 2880 for (i=0; i<=b->size; i++) { 2881 b->rangebs[i] = B->rmap->range[i]/bs; 2882 } 2883 b->rstartbs = B->rmap->rstart/bs; 2884 b->rendbs = B->rmap->rend/bs; 2885 b->cstartbs = B->cmap->rstart/bs; 2886 b->cendbs = B->cmap->rend/bs; 2887 2888 if (!B->preallocated) { 2889 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2890 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2891 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2892 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2893 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2894 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2895 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2896 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2897 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2898 } 2899 2900 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2901 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2902 B->preallocated = PETSC_TRUE; 2903 PetscFunctionReturn(0); 2904 } 2905 2906 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2907 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2908 2909 #undef __FUNCT__ 2910 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2911 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2912 { 2913 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2914 PetscErrorCode ierr; 2915 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2916 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2917 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2918 2919 PetscFunctionBegin; 2920 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 2921 ii[0] = 0; 2922 for (i=0; i<M; i++) { 2923 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2924 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2925 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2926 /* remove one from count of matrix has diagonal */ 2927 for (j=id[i]; j<id[i+1]; j++) { 2928 if (jd[j] == i) {ii[i+1]--;break;} 2929 } 2930 } 2931 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2932 cnt = 0; 2933 for (i=0; i<M; i++) { 2934 for (j=io[i]; j<io[i+1]; j++) { 2935 if (garray[jo[j]] > rstart) break; 2936 jj[cnt++] = garray[jo[j]]; 2937 } 2938 for (k=id[i]; k<id[i+1]; k++) { 2939 if (jd[k] != i) { 2940 jj[cnt++] = rstart + jd[k]; 2941 } 2942 } 2943 for (; j<io[i+1]; j++) { 2944 jj[cnt++] = garray[jo[j]]; 2945 } 2946 } 2947 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2948 PetscFunctionReturn(0); 2949 } 2950 2951 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2952 2953 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2954 2955 #undef __FUNCT__ 2956 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 2957 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2958 { 2959 PetscErrorCode ierr; 2960 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2961 Mat B; 2962 Mat_MPIAIJ *b; 2963 2964 PetscFunctionBegin; 2965 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2966 2967 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2968 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2969 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2970 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2971 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2972 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2973 b = (Mat_MPIAIJ*) B->data; 2974 2975 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2976 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2977 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2978 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2979 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2980 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2981 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2982 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2983 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2984 if (reuse == MAT_REUSE_MATRIX) { 2985 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2986 } else { 2987 *newmat = B; 2988 } 2989 PetscFunctionReturn(0); 2990 } 2991 2992 /*MC 2993 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2994 2995 Options Database Keys: 2996 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2997 . -mat_block_size <bs> - set the blocksize used to store the matrix 2998 - -mat_use_hash_table <fact> 2999 3000 Level: beginner 3001 3002 .seealso: MatCreateMPIBAIJ 3003 M*/ 3004 3005 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3006 3007 #undef __FUNCT__ 3008 #define __FUNCT__ "MatCreate_MPIBAIJ" 3009 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3010 { 3011 Mat_MPIBAIJ *b; 3012 PetscErrorCode ierr; 3013 PetscBool flg = PETSC_FALSE; 3014 3015 PetscFunctionBegin; 3016 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3017 B->data = (void*)b; 3018 3019 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3020 B->assembled = PETSC_FALSE; 3021 3022 B->insertmode = NOT_SET_VALUES; 3023 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3024 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3025 3026 /* build local table of row and column ownerships */ 3027 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 3028 3029 /* build cache for off array entries formed */ 3030 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3031 3032 b->donotstash = PETSC_FALSE; 3033 b->colmap = NULL; 3034 b->garray = NULL; 3035 b->roworiented = PETSC_TRUE; 3036 3037 /* stuff used in block assembly */ 3038 b->barray = 0; 3039 3040 /* stuff used for matrix vector multiply */ 3041 b->lvec = 0; 3042 b->Mvctx = 0; 3043 3044 /* stuff for MatGetRow() */ 3045 b->rowindices = 0; 3046 b->rowvalues = 0; 3047 b->getrowactive = PETSC_FALSE; 3048 3049 /* hash table stuff */ 3050 b->ht = 0; 3051 b->hd = 0; 3052 b->ht_size = 0; 3053 b->ht_flag = PETSC_FALSE; 3054 b->ht_fact = 0; 3055 b->ht_total_ct = 0; 3056 b->ht_insert_ct = 0; 3057 3058 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3059 b->ijonly = PETSC_FALSE; 3060 3061 3062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3065 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3067 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3068 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3069 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3070 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3071 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3072 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3073 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3074 3075 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3076 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 3077 if (flg) { 3078 PetscReal fact = 1.39; 3079 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3080 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3081 if (fact <= 1.0) fact = 1.39; 3082 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3083 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3084 } 3085 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3086 PetscFunctionReturn(0); 3087 } 3088 3089 /*MC 3090 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3091 3092 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3093 and MATMPIBAIJ otherwise. 3094 3095 Options Database Keys: 3096 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3097 3098 Level: beginner 3099 3100 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3101 M*/ 3102 3103 #undef __FUNCT__ 3104 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3105 /*@C 3106 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3107 (block compressed row). For good matrix assembly performance 3108 the user should preallocate the matrix storage by setting the parameters 3109 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3110 performance can be increased by more than a factor of 50. 3111 3112 Collective on Mat 3113 3114 Input Parameters: 3115 + B - the matrix 3116 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3117 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3118 . d_nz - number of block nonzeros per block row in diagonal portion of local 3119 submatrix (same for all local rows) 3120 . d_nnz - array containing the number of block nonzeros in the various block rows 3121 of the in diagonal portion of the local (possibly different for each block 3122 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3123 set it even if it is zero. 3124 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3125 submatrix (same for all local rows). 3126 - o_nnz - array containing the number of nonzeros in the various block rows of the 3127 off-diagonal portion of the local submatrix (possibly different for 3128 each block row) or NULL. 3129 3130 If the *_nnz parameter is given then the *_nz parameter is ignored 3131 3132 Options Database Keys: 3133 + -mat_block_size - size of the blocks to use 3134 - -mat_use_hash_table <fact> 3135 3136 Notes: 3137 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3138 than it must be used on all processors that share the object for that argument. 3139 3140 Storage Information: 3141 For a square global matrix we define each processor's diagonal portion 3142 to be its local rows and the corresponding columns (a square submatrix); 3143 each processor's off-diagonal portion encompasses the remainder of the 3144 local matrix (a rectangular submatrix). 3145 3146 The user can specify preallocated storage for the diagonal part of 3147 the local submatrix with either d_nz or d_nnz (not both). Set 3148 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3149 memory allocation. Likewise, specify preallocated storage for the 3150 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3151 3152 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3153 the figure below we depict these three local rows and all columns (0-11). 3154 3155 .vb 3156 0 1 2 3 4 5 6 7 8 9 10 11 3157 -------------------------- 3158 row 3 |o o o d d d o o o o o o 3159 row 4 |o o o d d d o o o o o o 3160 row 5 |o o o d d d o o o o o o 3161 -------------------------- 3162 .ve 3163 3164 Thus, any entries in the d locations are stored in the d (diagonal) 3165 submatrix, and any entries in the o locations are stored in the 3166 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3167 stored simply in the MATSEQBAIJ format for compressed row storage. 3168 3169 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3170 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3171 In general, for PDE problems in which most nonzeros are near the diagonal, 3172 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3173 or you will get TERRIBLE performance; see the users' manual chapter on 3174 matrices. 3175 3176 You can call MatGetInfo() to get information on how effective the preallocation was; 3177 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3178 You can also run with the option -info and look for messages with the string 3179 malloc in them to see if additional memory allocation was needed. 3180 3181 Level: intermediate 3182 3183 .keywords: matrix, block, aij, compressed row, sparse, parallel 3184 3185 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3186 @*/ 3187 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3188 { 3189 PetscErrorCode ierr; 3190 3191 PetscFunctionBegin; 3192 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3193 PetscValidType(B,1); 3194 PetscValidLogicalCollectiveInt(B,bs,2); 3195 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3196 PetscFunctionReturn(0); 3197 } 3198 3199 #undef __FUNCT__ 3200 #define __FUNCT__ "MatCreateBAIJ" 3201 /*@C 3202 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3203 (block compressed row). For good matrix assembly performance 3204 the user should preallocate the matrix storage by setting the parameters 3205 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3206 performance can be increased by more than a factor of 50. 3207 3208 Collective on MPI_Comm 3209 3210 Input Parameters: 3211 + comm - MPI communicator 3212 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3213 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3214 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3215 This value should be the same as the local size used in creating the 3216 y vector for the matrix-vector product y = Ax. 3217 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3218 This value should be the same as the local size used in creating the 3219 x vector for the matrix-vector product y = Ax. 3220 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3221 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3222 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3223 submatrix (same for all local rows) 3224 . d_nnz - array containing the number of nonzero blocks in the various block rows 3225 of the in diagonal portion of the local (possibly different for each block 3226 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3227 and set it even if it is zero. 3228 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3229 submatrix (same for all local rows). 3230 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3231 off-diagonal portion of the local submatrix (possibly different for 3232 each block row) or NULL. 3233 3234 Output Parameter: 3235 . A - the matrix 3236 3237 Options Database Keys: 3238 + -mat_block_size - size of the blocks to use 3239 - -mat_use_hash_table <fact> 3240 3241 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3242 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3243 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3244 3245 Notes: 3246 If the *_nnz parameter is given then the *_nz parameter is ignored 3247 3248 A nonzero block is any block that as 1 or more nonzeros in it 3249 3250 The user MUST specify either the local or global matrix dimensions 3251 (possibly both). 3252 3253 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3254 than it must be used on all processors that share the object for that argument. 3255 3256 Storage Information: 3257 For a square global matrix we define each processor's diagonal portion 3258 to be its local rows and the corresponding columns (a square submatrix); 3259 each processor's off-diagonal portion encompasses the remainder of the 3260 local matrix (a rectangular submatrix). 3261 3262 The user can specify preallocated storage for the diagonal part of 3263 the local submatrix with either d_nz or d_nnz (not both). Set 3264 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3265 memory allocation. Likewise, specify preallocated storage for the 3266 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3267 3268 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3269 the figure below we depict these three local rows and all columns (0-11). 3270 3271 .vb 3272 0 1 2 3 4 5 6 7 8 9 10 11 3273 -------------------------- 3274 row 3 |o o o d d d o o o o o o 3275 row 4 |o o o d d d o o o o o o 3276 row 5 |o o o d d d o o o o o o 3277 -------------------------- 3278 .ve 3279 3280 Thus, any entries in the d locations are stored in the d (diagonal) 3281 submatrix, and any entries in the o locations are stored in the 3282 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3283 stored simply in the MATSEQBAIJ format for compressed row storage. 3284 3285 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3286 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3287 In general, for PDE problems in which most nonzeros are near the diagonal, 3288 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3289 or you will get TERRIBLE performance; see the users' manual chapter on 3290 matrices. 3291 3292 Level: intermediate 3293 3294 .keywords: matrix, block, aij, compressed row, sparse, parallel 3295 3296 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3297 @*/ 3298 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3299 { 3300 PetscErrorCode ierr; 3301 PetscMPIInt size; 3302 3303 PetscFunctionBegin; 3304 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3305 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3306 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3307 if (size > 1) { 3308 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3309 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3310 } else { 3311 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3312 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3313 } 3314 PetscFunctionReturn(0); 3315 } 3316 3317 #undef __FUNCT__ 3318 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3319 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3320 { 3321 Mat mat; 3322 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3323 PetscErrorCode ierr; 3324 PetscInt len=0; 3325 3326 PetscFunctionBegin; 3327 *newmat = 0; 3328 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3329 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3330 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3331 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3332 3333 mat->factortype = matin->factortype; 3334 mat->preallocated = PETSC_TRUE; 3335 mat->assembled = PETSC_TRUE; 3336 mat->insertmode = NOT_SET_VALUES; 3337 3338 a = (Mat_MPIBAIJ*)mat->data; 3339 mat->rmap->bs = matin->rmap->bs; 3340 a->bs2 = oldmat->bs2; 3341 a->mbs = oldmat->mbs; 3342 a->nbs = oldmat->nbs; 3343 a->Mbs = oldmat->Mbs; 3344 a->Nbs = oldmat->Nbs; 3345 3346 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3347 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3348 3349 a->size = oldmat->size; 3350 a->rank = oldmat->rank; 3351 a->donotstash = oldmat->donotstash; 3352 a->roworiented = oldmat->roworiented; 3353 a->rowindices = 0; 3354 a->rowvalues = 0; 3355 a->getrowactive = PETSC_FALSE; 3356 a->barray = 0; 3357 a->rstartbs = oldmat->rstartbs; 3358 a->rendbs = oldmat->rendbs; 3359 a->cstartbs = oldmat->cstartbs; 3360 a->cendbs = oldmat->cendbs; 3361 3362 /* hash table stuff */ 3363 a->ht = 0; 3364 a->hd = 0; 3365 a->ht_size = 0; 3366 a->ht_flag = oldmat->ht_flag; 3367 a->ht_fact = oldmat->ht_fact; 3368 a->ht_total_ct = 0; 3369 a->ht_insert_ct = 0; 3370 3371 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3372 if (oldmat->colmap) { 3373 #if defined(PETSC_USE_CTABLE) 3374 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3375 #else 3376 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3377 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3378 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3379 #endif 3380 } else a->colmap = 0; 3381 3382 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3383 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3384 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3385 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3386 } else a->garray = 0; 3387 3388 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3389 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3390 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3391 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3392 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3393 3394 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3395 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3396 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3397 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3398 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3399 *newmat = mat; 3400 PetscFunctionReturn(0); 3401 } 3402 3403 #undef __FUNCT__ 3404 #define __FUNCT__ "MatLoad_MPIBAIJ" 3405 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3406 { 3407 PetscErrorCode ierr; 3408 int fd; 3409 PetscInt i,nz,j,rstart,rend; 3410 PetscScalar *vals,*buf; 3411 MPI_Comm comm; 3412 MPI_Status status; 3413 PetscMPIInt rank,size,maxnz; 3414 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3415 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3416 PetscInt jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax; 3417 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3418 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3419 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3420 3421 PetscFunctionBegin; 3422 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3423 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3424 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3425 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3426 if (bs < 0) bs = 1; 3427 3428 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3429 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3430 if (!rank) { 3431 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3432 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3433 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3434 } 3435 3436 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3437 3438 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3439 M = header[1]; N = header[2]; 3440 3441 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3442 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3443 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3444 3445 /* If global sizes are set, check if they are consistent with that given in the file */ 3446 if (sizesset) { 3447 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3448 } 3449 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3450 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3451 3452 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3453 3454 /* 3455 This code adds extra rows to make sure the number of rows is 3456 divisible by the blocksize 3457 */ 3458 Mbs = M/bs; 3459 extra_rows = bs - M + bs*Mbs; 3460 if (extra_rows == bs) extra_rows = 0; 3461 else Mbs++; 3462 if (extra_rows && !rank) { 3463 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3464 } 3465 3466 /* determine ownership of all rows */ 3467 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3468 mbs = Mbs/size + ((Mbs % size) > rank); 3469 m = mbs*bs; 3470 } else { /* User set */ 3471 m = newmat->rmap->n; 3472 mbs = m/bs; 3473 } 3474 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3475 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3476 3477 /* process 0 needs enough room for process with most rows */ 3478 if (!rank) { 3479 mmax = rowners[1]; 3480 for (i=2; i<=size; i++) { 3481 mmax = PetscMax(mmax,rowners[i]); 3482 } 3483 mmax*=bs; 3484 } else mmax = -1; /* unused, but compiler warns anyway */ 3485 3486 rowners[0] = 0; 3487 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3488 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3489 rstart = rowners[rank]; 3490 rend = rowners[rank+1]; 3491 3492 /* distribute row lengths to all processors */ 3493 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3494 if (!rank) { 3495 mend = m; 3496 if (size == 1) mend = mend - extra_rows; 3497 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3498 for (j=mend; j<m; j++) locrowlens[j] = 1; 3499 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3500 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3501 for (j=0; j<m; j++) { 3502 procsnz[0] += locrowlens[j]; 3503 } 3504 for (i=1; i<size; i++) { 3505 mend = browners[i+1] - browners[i]; 3506 if (i == size-1) mend = mend - extra_rows; 3507 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3508 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3509 /* calculate the number of nonzeros on each processor */ 3510 for (j=0; j<browners[i+1]-browners[i]; j++) { 3511 procsnz[i] += rowlengths[j]; 3512 } 3513 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3514 } 3515 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3516 } else { 3517 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3518 } 3519 3520 if (!rank) { 3521 /* determine max buffer needed and allocate it */ 3522 maxnz = procsnz[0]; 3523 for (i=1; i<size; i++) { 3524 maxnz = PetscMax(maxnz,procsnz[i]); 3525 } 3526 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3527 3528 /* read in my part of the matrix column indices */ 3529 nz = procsnz[0]; 3530 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3531 mycols = ibuf; 3532 if (size == 1) nz -= extra_rows; 3533 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3534 if (size == 1) { 3535 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3536 } 3537 3538 /* read in every ones (except the last) and ship off */ 3539 for (i=1; i<size-1; i++) { 3540 nz = procsnz[i]; 3541 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3542 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3543 } 3544 /* read in the stuff for the last proc */ 3545 if (size != 1) { 3546 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3547 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3548 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3549 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3550 } 3551 ierr = PetscFree(cols);CHKERRQ(ierr); 3552 } else { 3553 /* determine buffer space needed for message */ 3554 nz = 0; 3555 for (i=0; i<m; i++) { 3556 nz += locrowlens[i]; 3557 } 3558 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3559 mycols = ibuf; 3560 /* receive message of column indices*/ 3561 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3562 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3563 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3564 } 3565 3566 /* loop over local rows, determining number of off diagonal entries */ 3567 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3568 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3569 rowcount = 0; nzcount = 0; 3570 for (i=0; i<mbs; i++) { 3571 dcount = 0; 3572 odcount = 0; 3573 for (j=0; j<bs; j++) { 3574 kmax = locrowlens[rowcount]; 3575 for (k=0; k<kmax; k++) { 3576 tmp = mycols[nzcount++]/bs; 3577 if (!mask[tmp]) { 3578 mask[tmp] = 1; 3579 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3580 else masked1[dcount++] = tmp; 3581 } 3582 } 3583 rowcount++; 3584 } 3585 3586 dlens[i] = dcount; 3587 odlens[i] = odcount; 3588 3589 /* zero out the mask elements we set */ 3590 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3591 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3592 } 3593 3594 3595 if (!sizesset) { 3596 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3597 } 3598 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3599 3600 if (!rank) { 3601 ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr); 3602 /* read in my part of the matrix numerical values */ 3603 nz = procsnz[0]; 3604 vals = buf; 3605 mycols = ibuf; 3606 if (size == 1) nz -= extra_rows; 3607 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3608 if (size == 1) { 3609 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3610 } 3611 3612 /* insert into matrix */ 3613 jj = rstart*bs; 3614 for (i=0; i<m; i++) { 3615 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3616 mycols += locrowlens[i]; 3617 vals += locrowlens[i]; 3618 jj++; 3619 } 3620 /* read in other processors (except the last one) and ship out */ 3621 for (i=1; i<size-1; i++) { 3622 nz = procsnz[i]; 3623 vals = buf; 3624 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3625 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3626 } 3627 /* the last proc */ 3628 if (size != 1) { 3629 nz = procsnz[i] - extra_rows; 3630 vals = buf; 3631 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3632 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3633 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3634 } 3635 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3636 } else { 3637 /* receive numeric values */ 3638 ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr); 3639 3640 /* receive message of values*/ 3641 vals = buf; 3642 mycols = ibuf; 3643 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3644 3645 /* insert into matrix */ 3646 jj = rstart*bs; 3647 for (i=0; i<m; i++) { 3648 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3649 mycols += locrowlens[i]; 3650 vals += locrowlens[i]; 3651 jj++; 3652 } 3653 } 3654 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3655 ierr = PetscFree(buf);CHKERRQ(ierr); 3656 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3657 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3658 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3659 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3660 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3661 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3662 PetscFunctionReturn(0); 3663 } 3664 3665 #undef __FUNCT__ 3666 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3667 /*@ 3668 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3669 3670 Input Parameters: 3671 . mat - the matrix 3672 . fact - factor 3673 3674 Not Collective, each process can use a different factor 3675 3676 Level: advanced 3677 3678 Notes: 3679 This can also be set by the command line option: -mat_use_hash_table <fact> 3680 3681 .keywords: matrix, hashtable, factor, HT 3682 3683 .seealso: MatSetOption() 3684 @*/ 3685 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3686 { 3687 PetscErrorCode ierr; 3688 3689 PetscFunctionBegin; 3690 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3691 PetscFunctionReturn(0); 3692 } 3693 3694 #undef __FUNCT__ 3695 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3696 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3697 { 3698 Mat_MPIBAIJ *baij; 3699 3700 PetscFunctionBegin; 3701 baij = (Mat_MPIBAIJ*)mat->data; 3702 baij->ht_fact = fact; 3703 PetscFunctionReturn(0); 3704 } 3705 3706 #undef __FUNCT__ 3707 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3708 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3709 { 3710 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3711 3712 PetscFunctionBegin; 3713 if (Ad) *Ad = a->A; 3714 if (Ao) *Ao = a->B; 3715 if (colmap) *colmap = a->garray; 3716 PetscFunctionReturn(0); 3717 } 3718 3719 /* 3720 Special version for direct calls from Fortran (to eliminate two function call overheads 3721 */ 3722 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3723 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3724 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3725 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3726 #endif 3727 3728 #undef __FUNCT__ 3729 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3730 /*@C 3731 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3732 3733 Collective on Mat 3734 3735 Input Parameters: 3736 + mat - the matrix 3737 . min - number of input rows 3738 . im - input rows 3739 . nin - number of input columns 3740 . in - input columns 3741 . v - numerical values input 3742 - addvin - INSERT_VALUES or ADD_VALUES 3743 3744 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3745 3746 Level: advanced 3747 3748 .seealso: MatSetValuesBlocked() 3749 @*/ 3750 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3751 { 3752 /* convert input arguments to C version */ 3753 Mat mat = *matin; 3754 PetscInt m = *min, n = *nin; 3755 InsertMode addv = *addvin; 3756 3757 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3758 const MatScalar *value; 3759 MatScalar *barray = baij->barray; 3760 PetscBool roworiented = baij->roworiented; 3761 PetscErrorCode ierr; 3762 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3763 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3764 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3765 3766 PetscFunctionBegin; 3767 /* tasks normally handled by MatSetValuesBlocked() */ 3768 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3769 #if defined(PETSC_USE_DEBUG) 3770 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3771 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3772 #endif 3773 if (mat->assembled) { 3774 mat->was_assembled = PETSC_TRUE; 3775 mat->assembled = PETSC_FALSE; 3776 } 3777 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3778 3779 3780 if (!barray) { 3781 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3782 baij->barray = barray; 3783 } 3784 3785 if (roworiented) stepval = (n-1)*bs; 3786 else stepval = (m-1)*bs; 3787 3788 for (i=0; i<m; i++) { 3789 if (im[i] < 0) continue; 3790 #if defined(PETSC_USE_DEBUG) 3791 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3792 #endif 3793 if (im[i] >= rstart && im[i] < rend) { 3794 row = im[i] - rstart; 3795 for (j=0; j<n; j++) { 3796 /* If NumCol = 1 then a copy is not required */ 3797 if ((roworiented) && (n == 1)) { 3798 barray = (MatScalar*)v + i*bs2; 3799 } else if ((!roworiented) && (m == 1)) { 3800 barray = (MatScalar*)v + j*bs2; 3801 } else { /* Here a copy is required */ 3802 if (roworiented) { 3803 value = v + i*(stepval+bs)*bs + j*bs; 3804 } else { 3805 value = v + j*(stepval+bs)*bs + i*bs; 3806 } 3807 for (ii=0; ii<bs; ii++,value+=stepval) { 3808 for (jj=0; jj<bs; jj++) { 3809 *barray++ = *value++; 3810 } 3811 } 3812 barray -=bs2; 3813 } 3814 3815 if (in[j] >= cstart && in[j] < cend) { 3816 col = in[j] - cstart; 3817 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3818 } else if (in[j] < 0) continue; 3819 #if defined(PETSC_USE_DEBUG) 3820 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3821 #endif 3822 else { 3823 if (mat->was_assembled) { 3824 if (!baij->colmap) { 3825 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3826 } 3827 3828 #if defined(PETSC_USE_DEBUG) 3829 #if defined(PETSC_USE_CTABLE) 3830 { PetscInt data; 3831 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3832 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3833 } 3834 #else 3835 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3836 #endif 3837 #endif 3838 #if defined(PETSC_USE_CTABLE) 3839 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3840 col = (col - 1)/bs; 3841 #else 3842 col = (baij->colmap[in[j]] - 1)/bs; 3843 #endif 3844 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3845 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3846 col = in[j]; 3847 } 3848 } else col = in[j]; 3849 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3850 } 3851 } 3852 } else { 3853 if (!baij->donotstash) { 3854 if (roworiented) { 3855 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3856 } else { 3857 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3858 } 3859 } 3860 } 3861 } 3862 3863 /* task normally handled by MatSetValuesBlocked() */ 3864 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3865 PetscFunctionReturn(0); 3866 } 3867 3868 #undef __FUNCT__ 3869 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3870 /*@ 3871 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3872 CSR format the local rows. 3873 3874 Collective on MPI_Comm 3875 3876 Input Parameters: 3877 + comm - MPI communicator 3878 . bs - the block size, only a block size of 1 is supported 3879 . m - number of local rows (Cannot be PETSC_DECIDE) 3880 . n - This value should be the same as the local size used in creating the 3881 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3882 calculated if N is given) For square matrices n is almost always m. 3883 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3884 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3885 . i - row indices 3886 . j - column indices 3887 - a - matrix values 3888 3889 Output Parameter: 3890 . mat - the matrix 3891 3892 Level: intermediate 3893 3894 Notes: 3895 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3896 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3897 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3898 3899 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3900 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3901 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3902 with column-major ordering within blocks. 3903 3904 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3905 3906 .keywords: matrix, aij, compressed row, sparse, parallel 3907 3908 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3909 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3910 @*/ 3911 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3912 { 3913 PetscErrorCode ierr; 3914 3915 PetscFunctionBegin; 3916 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3917 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3918 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3919 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3920 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3921 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3922 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3923 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3924 PetscFunctionReturn(0); 3925 } 3926 3927 #undef __FUNCT__ 3928 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ" 3929 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3930 { 3931 PetscErrorCode ierr; 3932 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3933 PetscInt *indx; 3934 PetscScalar *values; 3935 3936 PetscFunctionBegin; 3937 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3938 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3939 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3940 PetscInt *dnz,*onz,sum,mbs,Nbs; 3941 PetscInt *bindx,rmax=a->rmax,j; 3942 3943 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3944 mbs = m/bs; Nbs = N/cbs; 3945 if (n == PETSC_DECIDE) { 3946 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3947 } 3948 /* Check sum(n) = Nbs */ 3949 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3950 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3951 3952 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3953 rstart -= mbs; 3954 3955 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3956 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3957 for (i=0; i<mbs; i++) { 3958 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3959 nnz = nnz/bs; 3960 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3961 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3962 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3963 } 3964 ierr = PetscFree(bindx);CHKERRQ(ierr); 3965 3966 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3967 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3968 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3969 ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr); 3970 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3971 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3972 } 3973 3974 /* numeric phase */ 3975 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3976 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3977 3978 for (i=0; i<m; i++) { 3979 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3980 Ii = i + rstart; 3981 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3982 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3983 } 3984 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3985 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3986 PetscFunctionReturn(0); 3987 } 3988