1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 #include <petscsf.h> 5 6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 18 { 19 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt i,*idxb = 0; 22 PetscScalar *va,*vb; 23 Vec vtmp; 24 25 PetscFunctionBegin; 26 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 27 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 28 if (idx) { 29 for (i=0; i<A->rmap->n; i++) { 30 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 31 } 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++) { 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 41 va[i] = vb[i]; 42 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 43 } 44 } 45 46 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 47 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 48 ierr = PetscFree(idxb);CHKERRQ(ierr); 49 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 62 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scalable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined(PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++) { 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 110 { \ 111 \ 112 brow = row/bs; \ 113 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 114 rmax = aimax[brow]; nrow = ailen[brow]; \ 115 bcol = col/bs; \ 116 ridx = row % bs; cidx = col % bs; \ 117 low = 0; high = nrow; \ 118 while (high-low > 3) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > bcol) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > bcol) break; \ 125 if (rp[_i] == bcol) { \ 126 bap = ap + bs2*_i + bs*cidx + ridx; \ 127 if (addv == ADD_VALUES) *bap += value; \ 128 else *bap = value; \ 129 goto a_noinsert; \ 130 } \ 131 } \ 132 if (a->nonew == 1) goto a_noinsert; \ 133 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 134 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 135 N = nrow++ - 1; \ 136 /* shift up all the later entries in this row */ \ 137 for (ii=N; ii>=_i; ii--) { \ 138 rp[ii+1] = rp[ii]; \ 139 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 140 } \ 141 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 142 rp[_i] = bcol; \ 143 ap[bs2*_i + bs*cidx + ridx] = value; \ 144 a_noinsert:; \ 145 ailen[brow] = nrow; \ 146 } 147 148 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 149 { \ 150 brow = row/bs; \ 151 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152 rmax = bimax[brow]; nrow = bilen[brow]; \ 153 bcol = col/bs; \ 154 ridx = row % bs; cidx = col % bs; \ 155 low = 0; high = nrow; \ 156 while (high-low > 3) { \ 157 t = (low+high)/2; \ 158 if (rp[t] > bcol) high = t; \ 159 else low = t; \ 160 } \ 161 for (_i=low; _i<high; _i++) { \ 162 if (rp[_i] > bcol) break; \ 163 if (rp[_i] == bcol) { \ 164 bap = ap + bs2*_i + bs*cidx + ridx; \ 165 if (addv == ADD_VALUES) *bap += value; \ 166 else *bap = value; \ 167 goto b_noinsert; \ 168 } \ 169 } \ 170 if (b->nonew == 1) goto b_noinsert; \ 171 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 172 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 173 N = nrow++ - 1; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 178 } \ 179 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 180 rp[_i] = bcol; \ 181 ap[bs2*_i + bs*cidx + ridx] = value; \ 182 b_noinsert:; \ 183 bilen[brow] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIBAIJ" 188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 191 MatScalar value; 192 PetscBool roworiented = baij->roworiented; 193 PetscErrorCode ierr; 194 PetscInt i,j,row,col; 195 PetscInt rstart_orig=mat->rmap->rstart; 196 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 197 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 198 199 /* Some Variables required in the macro */ 200 Mat A = baij->A; 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 202 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 203 MatScalar *aa =a->a; 204 205 Mat B = baij->B; 206 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 207 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 208 MatScalar *ba =b->a; 209 210 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 211 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 212 MatScalar *ap,*bap; 213 214 PetscFunctionBegin; 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig) { 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; 226 else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } else if (in[j] < 0) continue; 230 #if defined(PETSC_USE_DEBUG) 231 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 232 #endif 233 else { 234 if (mat->was_assembled) { 235 if (!baij->colmap) { 236 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined(PETSC_USE_CTABLE) 239 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 240 col = col - 1; 241 #else 242 col = baij->colmap[in[j]/bs] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 245 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 248 B = baij->B; 249 b = (Mat_SeqBAIJ*)(B)->data; 250 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 251 ba =b->a; 252 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 253 else col += in[j]%bs; 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; 256 else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 mat->assembled = PETSC_FALSE; 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray = baij->barray; 283 PetscBool roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if (!barray) { 291 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) stepval = (n-1)*bs; 296 else stepval = (m-1)*bs; 297 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if ((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + (i*(stepval+bs) + j)*bs; 314 } else { 315 value = v + (j*(stepval+bs) + i)*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 318 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend) { 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } else if (in[j] < 0) continue; 328 #if defined(PETSC_USE_DEBUG) 329 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 330 #endif 331 else { 332 if (mat->was_assembled) { 333 if (!baij->colmap) { 334 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 335 } 336 337 #if defined(PETSC_USE_DEBUG) 338 #if defined(PETSC_USE_CTABLE) 339 { PetscInt data; 340 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 341 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 342 } 343 #else 344 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 345 #endif 346 #endif 347 #if defined(PETSC_USE_CTABLE) 348 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 349 col = (col - 1)/bs; 350 #else 351 col = (baij->colmap[in[j]] - 1)/bs; 352 #endif 353 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 354 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 355 col = in[j]; 356 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 357 } else col = in[j]; 358 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 359 } 360 } 361 } else { 362 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 363 if (!baij->donotstash) { 364 if (roworiented) { 365 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 366 } else { 367 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 375 #define HASH_KEY 0.6180339887 376 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 377 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 378 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 379 #undef __FUNCT__ 380 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 381 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 382 { 383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 384 PetscBool roworiented = baij->roworiented; 385 PetscErrorCode ierr; 386 PetscInt i,j,row,col; 387 PetscInt rstart_orig=mat->rmap->rstart; 388 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 389 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 390 PetscReal tmp; 391 MatScalar **HD = baij->hd,value; 392 #if defined(PETSC_USE_DEBUG) 393 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 394 #endif 395 396 PetscFunctionBegin; 397 for (i=0; i<m; i++) { 398 #if defined(PETSC_USE_DEBUG) 399 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 400 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 401 #endif 402 row = im[i]; 403 if (row >= rstart_orig && row < rend_orig) { 404 for (j=0; j<n; j++) { 405 col = in[j]; 406 if (roworiented) value = v[i*n+j]; 407 else value = v[i+j*m]; 408 /* Look up PetscInto the Hash Table */ 409 key = (row/bs)*Nbs+(col/bs)+1; 410 h1 = HASH(size,key,tmp); 411 412 413 idx = h1; 414 #if defined(PETSC_USE_DEBUG) 415 insert_ct++; 416 total_ct++; 417 if (HT[idx] != key) { 418 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 419 if (idx == size) { 420 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 421 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 422 } 423 } 424 #else 425 if (HT[idx] != key) { 426 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 427 if (idx == size) { 428 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 429 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 430 } 431 } 432 #endif 433 /* A HASH table entry is found, so insert the values at the correct address */ 434 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 435 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 436 } 437 } else if (!baij->donotstash) { 438 if (roworiented) { 439 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 440 } else { 441 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 442 } 443 } 444 } 445 #if defined(PETSC_USE_DEBUG) 446 baij->ht_total_ct = total_ct; 447 baij->ht_insert_ct = insert_ct; 448 #endif 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 454 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 457 PetscBool roworiented = baij->roworiented; 458 PetscErrorCode ierr; 459 PetscInt i,j,ii,jj,row,col; 460 PetscInt rstart=baij->rstartbs; 461 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 462 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 463 PetscReal tmp; 464 MatScalar **HD = baij->hd,*baij_a; 465 const PetscScalar *v_t,*value; 466 #if defined(PETSC_USE_DEBUG) 467 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 468 #endif 469 470 PetscFunctionBegin; 471 if (roworiented) stepval = (n-1)*bs; 472 else stepval = (m-1)*bs; 473 474 for (i=0; i<m; i++) { 475 #if defined(PETSC_USE_DEBUG) 476 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 477 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 478 #endif 479 row = im[i]; 480 v_t = v + i*nbs2; 481 if (row >= rstart && row < rend) { 482 for (j=0; j<n; j++) { 483 col = in[j]; 484 485 /* Look up into the Hash Table */ 486 key = row*Nbs+col+1; 487 h1 = HASH(size,key,tmp); 488 489 idx = h1; 490 #if defined(PETSC_USE_DEBUG) 491 total_ct++; 492 insert_ct++; 493 if (HT[idx] != key) { 494 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 495 if (idx == size) { 496 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 497 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 498 } 499 } 500 #else 501 if (HT[idx] != key) { 502 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 503 if (idx == size) { 504 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 505 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 506 } 507 } 508 #endif 509 baij_a = HD[idx]; 510 if (roworiented) { 511 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 512 /* value = v + (i*(stepval+bs)+j)*bs; */ 513 value = v_t; 514 v_t += bs; 515 if (addv == ADD_VALUES) { 516 for (ii=0; ii<bs; ii++,value+=stepval) { 517 for (jj=ii; jj<bs2; jj+=bs) { 518 baij_a[jj] += *value++; 519 } 520 } 521 } else { 522 for (ii=0; ii<bs; ii++,value+=stepval) { 523 for (jj=ii; jj<bs2; jj+=bs) { 524 baij_a[jj] = *value++; 525 } 526 } 527 } 528 } else { 529 value = v + j*(stepval+bs)*bs + i*bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 532 for (jj=0; jj<bs; jj++) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 538 for (jj=0; jj<bs; jj++) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } 544 } 545 } else { 546 if (!baij->donotstash) { 547 if (roworiented) { 548 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 #if defined(PETSC_USE_DEBUG) 556 baij->ht_total_ct = total_ct; 557 baij->ht_insert_ct = insert_ct; 558 #endif 559 PetscFunctionReturn(0); 560 } 561 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatGetValues_MPIBAIJ" 564 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 565 { 566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567 PetscErrorCode ierr; 568 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 569 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 570 571 PetscFunctionBegin; 572 for (i=0; i<m; i++) { 573 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 574 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 575 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 576 row = idxm[i] - bsrstart; 577 for (j=0; j<n; j++) { 578 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 579 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 580 if (idxn[j] >= bscstart && idxn[j] < bscend) { 581 col = idxn[j] - bscstart; 582 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 583 } else { 584 if (!baij->colmap) { 585 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 586 } 587 #if defined(PETSC_USE_CTABLE) 588 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 589 data--; 590 #else 591 data = baij->colmap[idxn[j]/bs]-1; 592 #endif 593 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 594 else { 595 col = data + idxn[j]%bs; 596 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 597 } 598 } 599 } 600 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 601 } 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatNorm_MPIBAIJ" 607 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 608 { 609 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 610 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 611 PetscErrorCode ierr; 612 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 613 PetscReal sum = 0.0; 614 MatScalar *v; 615 616 PetscFunctionBegin; 617 if (baij->size == 1) { 618 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 619 } else { 620 if (type == NORM_FROBENIUS) { 621 v = amat->a; 622 nz = amat->nz*bs2; 623 for (i=0; i<nz; i++) { 624 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 625 } 626 v = bmat->a; 627 nz = bmat->nz*bs2; 628 for (i=0; i<nz; i++) { 629 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 630 } 631 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 632 *nrm = PetscSqrtReal(*nrm); 633 } else if (type == NORM_1) { /* max column sum */ 634 PetscReal *tmp,*tmp2; 635 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 636 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 637 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 638 v = amat->a; jj = amat->j; 639 for (i=0; i<amat->nz; i++) { 640 for (j=0; j<bs; j++) { 641 col = bs*(cstart + *jj) + j; /* column index */ 642 for (row=0; row<bs; row++) { 643 tmp[col] += PetscAbsScalar(*v); v++; 644 } 645 } 646 jj++; 647 } 648 v = bmat->a; jj = bmat->j; 649 for (i=0; i<bmat->nz; i++) { 650 for (j=0; j<bs; j++) { 651 col = bs*garray[*jj] + j; 652 for (row=0; row<bs; row++) { 653 tmp[col] += PetscAbsScalar(*v); v++; 654 } 655 } 656 jj++; 657 } 658 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 659 *nrm = 0.0; 660 for (j=0; j<mat->cmap->N; j++) { 661 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 662 } 663 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 664 } else if (type == NORM_INFINITY) { /* max row sum */ 665 PetscReal *sums; 666 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 667 sum = 0.0; 668 for (j=0; j<amat->mbs; j++) { 669 for (row=0; row<bs; row++) sums[row] = 0.0; 670 v = amat->a + bs2*amat->i[j]; 671 nz = amat->i[j+1]-amat->i[j]; 672 for (i=0; i<nz; i++) { 673 for (col=0; col<bs; col++) { 674 for (row=0; row<bs; row++) { 675 sums[row] += PetscAbsScalar(*v); v++; 676 } 677 } 678 } 679 v = bmat->a + bs2*bmat->i[j]; 680 nz = bmat->i[j+1]-bmat->i[j]; 681 for (i=0; i<nz; i++) { 682 for (col=0; col<bs; col++) { 683 for (row=0; row<bs; row++) { 684 sums[row] += PetscAbsScalar(*v); v++; 685 } 686 } 687 } 688 for (row=0; row<bs; row++) { 689 if (sums[row] > sum) sum = sums[row]; 690 } 691 } 692 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 693 ierr = PetscFree(sums);CHKERRQ(ierr); 694 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 695 } 696 PetscFunctionReturn(0); 697 } 698 699 /* 700 Creates the hash table, and sets the table 701 This table is created only once. 702 If new entried need to be added to the matrix 703 then the hash table has to be destroyed and 704 recreated. 705 */ 706 #undef __FUNCT__ 707 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 708 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 709 { 710 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 711 Mat A = baij->A,B=baij->B; 712 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 713 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 714 PetscErrorCode ierr; 715 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 716 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 717 PetscInt *HT,key; 718 MatScalar **HD; 719 PetscReal tmp; 720 #if defined(PETSC_USE_INFO) 721 PetscInt ct=0,max=0; 722 #endif 723 724 PetscFunctionBegin; 725 if (baij->ht) PetscFunctionReturn(0); 726 727 baij->ht_size = (PetscInt)(factor*nz); 728 ht_size = baij->ht_size; 729 730 /* Allocate Memory for Hash Table */ 731 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 732 HD = baij->hd; 733 HT = baij->ht; 734 735 /* Loop Over A */ 736 for (i=0; i<a->mbs; i++) { 737 for (j=ai[i]; j<ai[i+1]; j++) { 738 row = i+rstart; 739 col = aj[j]+cstart; 740 741 key = row*Nbs + col + 1; 742 h1 = HASH(ht_size,key,tmp); 743 for (k=0; k<ht_size; k++) { 744 if (!HT[(h1+k)%ht_size]) { 745 HT[(h1+k)%ht_size] = key; 746 HD[(h1+k)%ht_size] = a->a + j*bs2; 747 break; 748 #if defined(PETSC_USE_INFO) 749 } else { 750 ct++; 751 #endif 752 } 753 } 754 #if defined(PETSC_USE_INFO) 755 if (k> max) max = k; 756 #endif 757 } 758 } 759 /* Loop Over B */ 760 for (i=0; i<b->mbs; i++) { 761 for (j=bi[i]; j<bi[i+1]; j++) { 762 row = i+rstart; 763 col = garray[bj[j]]; 764 key = row*Nbs + col + 1; 765 h1 = HASH(ht_size,key,tmp); 766 for (k=0; k<ht_size; k++) { 767 if (!HT[(h1+k)%ht_size]) { 768 HT[(h1+k)%ht_size] = key; 769 HD[(h1+k)%ht_size] = b->a + j*bs2; 770 break; 771 #if defined(PETSC_USE_INFO) 772 } else { 773 ct++; 774 #endif 775 } 776 } 777 #if defined(PETSC_USE_INFO) 778 if (k> max) max = k; 779 #endif 780 } 781 } 782 783 /* Print Summary */ 784 #if defined(PETSC_USE_INFO) 785 for (i=0,j=0; i<ht_size; i++) { 786 if (HT[i]) j++; 787 } 788 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 789 #endif 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 795 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 796 { 797 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 798 PetscErrorCode ierr; 799 PetscInt nstash,reallocs; 800 InsertMode addv; 801 802 PetscFunctionBegin; 803 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 804 805 /* make sure all processors are either in INSERTMODE or ADDMODE */ 806 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 807 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 808 mat->insertmode = addv; /* in case this processor had no cache */ 809 810 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 811 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 812 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 813 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 814 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 815 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 821 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 822 { 823 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 824 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 825 PetscErrorCode ierr; 826 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 827 PetscInt *row,*col; 828 PetscBool r1,r2,r3,other_disassembled; 829 MatScalar *val; 830 InsertMode addv = mat->insertmode; 831 PetscMPIInt n; 832 833 PetscFunctionBegin; 834 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 835 if (!baij->donotstash && !mat->nooffprocentries) { 836 while (1) { 837 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 838 if (!flg) break; 839 840 for (i=0; i<n;) { 841 /* Now identify the consecutive vals belonging to the same row */ 842 for (j=i,rstart=row[j]; j<n; j++) { 843 if (row[j] != rstart) break; 844 } 845 if (j < n) ncols = j-i; 846 else ncols = n-i; 847 /* Now assemble all these values with a single function call */ 848 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 849 i = j; 850 } 851 } 852 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 853 /* Now process the block-stash. Since the values are stashed column-oriented, 854 set the roworiented flag to column oriented, and after MatSetValues() 855 restore the original flags */ 856 r1 = baij->roworiented; 857 r2 = a->roworiented; 858 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 859 860 baij->roworiented = PETSC_FALSE; 861 a->roworiented = PETSC_FALSE; 862 863 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 864 while (1) { 865 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 866 if (!flg) break; 867 868 for (i=0; i<n;) { 869 /* Now identify the consecutive vals belonging to the same row */ 870 for (j=i,rstart=row[j]; j<n; j++) { 871 if (row[j] != rstart) break; 872 } 873 if (j < n) ncols = j-i; 874 else ncols = n-i; 875 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 880 881 baij->roworiented = r1; 882 a->roworiented = r2; 883 884 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 885 } 886 887 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 888 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 889 890 /* determine if any processor has disassembled, if so we must 891 also disassemble ourselfs, in order that we may reassemble. */ 892 /* 893 if nonzero structure of submatrix B cannot change then we know that 894 no processor disassembled thus we can skip this stuff 895 */ 896 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 897 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 898 if (mat->was_assembled && !other_disassembled) { 899 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 900 } 901 } 902 903 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 904 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 905 } 906 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 907 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 908 909 #if defined(PETSC_USE_INFO) 910 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 911 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 912 913 baij->ht_total_ct = 0; 914 baij->ht_insert_ct = 0; 915 } 916 #endif 917 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 918 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 919 920 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 921 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 922 } 923 924 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 925 926 baij->rowvalues = 0; 927 928 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 929 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 930 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 931 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 932 } 933 PetscFunctionReturn(0); 934 } 935 936 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 937 #include <petscdraw.h> 938 #undef __FUNCT__ 939 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 940 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 941 { 942 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 943 PetscErrorCode ierr; 944 PetscMPIInt rank = baij->rank; 945 PetscInt bs = mat->rmap->bs; 946 PetscBool iascii,isdraw; 947 PetscViewer sviewer; 948 PetscViewerFormat format; 949 950 PetscFunctionBegin; 951 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 952 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 953 if (iascii) { 954 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 955 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 956 MatInfo info; 957 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 958 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 959 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 961 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 962 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 963 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 964 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 965 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 966 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 967 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 968 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 969 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } else if (format == PETSC_VIEWER_ASCII_INFO) { 972 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 975 PetscFunctionReturn(0); 976 } 977 } 978 979 if (isdraw) { 980 PetscDraw draw; 981 PetscBool isnull; 982 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 983 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 984 } 985 986 { 987 /* assemble the entire matrix onto first processor. */ 988 Mat A; 989 Mat_SeqBAIJ *Aloc; 990 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 991 MatScalar *a; 992 993 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 994 /* Perhaps this should be the type of mat? */ 995 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 996 if (!rank) { 997 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 998 } else { 999 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1000 } 1001 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1002 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1003 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1004 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1005 1006 /* copy over the A part */ 1007 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1008 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1009 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1010 1011 for (i=0; i<mbs; i++) { 1012 rvals[0] = bs*(baij->rstartbs + i); 1013 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1014 for (j=ai[i]; j<ai[i+1]; j++) { 1015 col = (baij->cstartbs+aj[j])*bs; 1016 for (k=0; k<bs; k++) { 1017 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1018 col++; a += bs; 1019 } 1020 } 1021 } 1022 /* copy over the B part */ 1023 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1024 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = baij->garray[aj[j]]*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 ierr = PetscFree(rvals);CHKERRQ(ierr); 1037 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 /* 1040 Everyone has to call to draw the matrix since the graphics waits are 1041 synchronized across all processors that share the PetscDraw object 1042 */ 1043 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1044 if (!rank) { 1045 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1046 } 1047 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1048 ierr = MatDestroy(&A);CHKERRQ(ierr); 1049 } 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1055 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1056 { 1057 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1058 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1059 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1060 PetscErrorCode ierr; 1061 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1062 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1063 int fd; 1064 PetscScalar *column_values; 1065 FILE *file; 1066 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1067 PetscInt message_count,flowcontrolcount; 1068 1069 PetscFunctionBegin; 1070 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1071 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1072 nz = bs2*(A->nz + B->nz); 1073 rlen = mat->rmap->n; 1074 if (!rank) { 1075 header[0] = MAT_FILE_CLASSID; 1076 header[1] = mat->rmap->N; 1077 header[2] = mat->cmap->N; 1078 1079 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1080 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1081 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1082 /* get largest number of rows any processor has */ 1083 range = mat->rmap->range; 1084 for (i=1; i<size; i++) { 1085 rlen = PetscMax(rlen,range[i+1] - range[i]); 1086 } 1087 } else { 1088 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1089 } 1090 1091 ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr); 1092 /* compute lengths of each row */ 1093 for (i=0; i<a->mbs; i++) { 1094 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1095 } 1096 /* store the row lengths to the file */ 1097 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1098 if (!rank) { 1099 MPI_Status status; 1100 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1101 rlen = (range[1] - range[0])/bs; 1102 for (i=0; i<rlen; i++) { 1103 for (j=0; j<bs; j++) { 1104 row_lens[i*bs+j] = bs*crow_lens[i]; 1105 } 1106 } 1107 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1108 for (i=1; i<size; i++) { 1109 rlen = (range[i+1] - range[i])/bs; 1110 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1111 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1112 for (k=0; k<rlen; k++) { 1113 for (j=0; j<bs; j++) { 1114 row_lens[k*bs+j] = bs*crow_lens[k]; 1115 } 1116 } 1117 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1118 } 1119 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1120 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1121 } else { 1122 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1123 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1124 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1125 } 1126 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1127 1128 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1129 information needed to make it for each row from a block row. This does require more communication but still not more than 1130 the communication needed for the nonzero values */ 1131 nzmax = nz; /* space a largest processor needs */ 1132 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1133 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1134 cnt = 0; 1135 for (i=0; i<a->mbs; i++) { 1136 pcnt = cnt; 1137 for (j=B->i[i]; j<B->i[i+1]; j++) { 1138 if ((col = garray[B->j[j]]) > cstart) break; 1139 for (l=0; l<bs; l++) { 1140 column_indices[cnt++] = bs*col+l; 1141 } 1142 } 1143 for (k=A->i[i]; k<A->i[i+1]; k++) { 1144 for (l=0; l<bs; l++) { 1145 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1146 } 1147 } 1148 for (; j<B->i[i+1]; j++) { 1149 for (l=0; l<bs; l++) { 1150 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1151 } 1152 } 1153 len = cnt - pcnt; 1154 for (k=1; k<bs; k++) { 1155 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1156 cnt += len; 1157 } 1158 } 1159 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1160 1161 /* store the columns to the file */ 1162 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1163 if (!rank) { 1164 MPI_Status status; 1165 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1166 for (i=1; i<size; i++) { 1167 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1168 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1169 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1170 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1171 } 1172 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1173 } else { 1174 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1175 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1176 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1177 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1178 } 1179 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1180 1181 /* load up the numerical values */ 1182 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1183 cnt = 0; 1184 for (i=0; i<a->mbs; i++) { 1185 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1186 for (j=B->i[i]; j<B->i[i+1]; j++) { 1187 if (garray[B->j[j]] > cstart) break; 1188 for (l=0; l<bs; l++) { 1189 for (ll=0; ll<bs; ll++) { 1190 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1191 } 1192 } 1193 cnt += bs; 1194 } 1195 for (k=A->i[i]; k<A->i[i+1]; k++) { 1196 for (l=0; l<bs; l++) { 1197 for (ll=0; ll<bs; ll++) { 1198 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1199 } 1200 } 1201 cnt += bs; 1202 } 1203 for (; j<B->i[i+1]; j++) { 1204 for (l=0; l<bs; l++) { 1205 for (ll=0; ll<bs; ll++) { 1206 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1207 } 1208 } 1209 cnt += bs; 1210 } 1211 cnt += (bs-1)*rlen; 1212 } 1213 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1214 1215 /* store the column values to the file */ 1216 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1217 if (!rank) { 1218 MPI_Status status; 1219 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1220 for (i=1; i<size; i++) { 1221 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1222 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1223 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1224 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1225 } 1226 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1227 } else { 1228 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1229 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1230 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1231 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1232 } 1233 ierr = PetscFree(column_values);CHKERRQ(ierr); 1234 1235 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1236 if (file) { 1237 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1238 } 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatView_MPIBAIJ" 1244 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1245 { 1246 PetscErrorCode ierr; 1247 PetscBool iascii,isdraw,issocket,isbinary; 1248 1249 PetscFunctionBegin; 1250 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1251 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1252 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1254 if (iascii || isdraw || issocket) { 1255 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1256 } else if (isbinary) { 1257 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1258 } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1264 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1265 { 1266 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1267 PetscErrorCode ierr; 1268 1269 PetscFunctionBegin; 1270 #if defined(PETSC_USE_LOG) 1271 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1272 #endif 1273 ierr = MatDestroy_Redundant(&mat->redundant);CHKERRQ(ierr); 1274 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1275 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1276 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1277 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1278 #if defined(PETSC_USE_CTABLE) 1279 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1280 #else 1281 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1282 #endif 1283 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1284 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1285 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1286 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1287 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1288 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1289 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1290 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1291 1292 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatMult_MPIBAIJ" 1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1308 { 1309 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1310 PetscErrorCode ierr; 1311 PetscInt nt; 1312 1313 PetscFunctionBegin; 1314 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1315 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1316 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1317 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1318 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1319 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1320 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1321 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1328 { 1329 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1335 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1343 { 1344 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1345 PetscErrorCode ierr; 1346 PetscBool merged; 1347 1348 PetscFunctionBegin; 1349 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1350 /* do nondiagonal part */ 1351 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1352 if (!merged) { 1353 /* send it on its way */ 1354 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1355 /* do local part */ 1356 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1357 /* receive remote parts: note this assumes the values are not actually */ 1358 /* inserted in yy until the next line */ 1359 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1360 } else { 1361 /* do local part */ 1362 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1363 /* send it on its way */ 1364 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1365 /* values actually were received in the Begin() but we need to call this nop */ 1366 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 } 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1374 { 1375 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1376 PetscErrorCode ierr; 1377 1378 PetscFunctionBegin; 1379 /* do nondiagonal part */ 1380 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1381 /* send it on its way */ 1382 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 /* do local part */ 1384 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1385 /* receive remote parts: note this assumes the values are not actually */ 1386 /* inserted in yy until the next line, which is true for my implementation*/ 1387 /* but is not perhaps always true. */ 1388 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 /* 1393 This only works correctly for square matrices where the subblock A->A is the 1394 diagonal block 1395 */ 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1399 { 1400 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1405 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatScale_MPIBAIJ" 1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1412 { 1413 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1414 PetscErrorCode ierr; 1415 1416 PetscFunctionBegin; 1417 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1418 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1425 { 1426 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1427 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1428 PetscErrorCode ierr; 1429 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1430 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1431 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1432 1433 PetscFunctionBegin; 1434 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1435 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1436 mat->getrowactive = PETSC_TRUE; 1437 1438 if (!mat->rowvalues && (idx || v)) { 1439 /* 1440 allocate enough space to hold information from the longest row. 1441 */ 1442 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1443 PetscInt max = 1,mbs = mat->mbs,tmp; 1444 for (i=0; i<mbs; i++) { 1445 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1446 if (max < tmp) max = tmp; 1447 } 1448 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1449 } 1450 lrow = row - brstart; 1451 1452 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1453 if (!v) {pvA = 0; pvB = 0;} 1454 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1455 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1456 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1457 nztot = nzA + nzB; 1458 1459 cmap = mat->garray; 1460 if (v || idx) { 1461 if (nztot) { 1462 /* Sort by increasing column numbers, assuming A and B already sorted */ 1463 PetscInt imark = -1; 1464 if (v) { 1465 *v = v_p = mat->rowvalues; 1466 for (i=0; i<nzB; i++) { 1467 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1468 else break; 1469 } 1470 imark = i; 1471 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1472 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1473 } 1474 if (idx) { 1475 *idx = idx_p = mat->rowindices; 1476 if (imark > -1) { 1477 for (i=0; i<imark; i++) { 1478 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1479 } 1480 } else { 1481 for (i=0; i<nzB; i++) { 1482 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1483 else break; 1484 } 1485 imark = i; 1486 } 1487 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1488 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1489 } 1490 } else { 1491 if (idx) *idx = 0; 1492 if (v) *v = 0; 1493 } 1494 } 1495 *nz = nztot; 1496 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1497 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1503 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1504 { 1505 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1506 1507 PetscFunctionBegin; 1508 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1509 baij->getrowactive = PETSC_FALSE; 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1515 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1516 { 1517 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1518 PetscErrorCode ierr; 1519 1520 PetscFunctionBegin; 1521 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1522 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1528 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1529 { 1530 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1531 Mat A = a->A,B = a->B; 1532 PetscErrorCode ierr; 1533 PetscReal isend[5],irecv[5]; 1534 1535 PetscFunctionBegin; 1536 info->block_size = (PetscReal)matin->rmap->bs; 1537 1538 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1539 1540 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1541 isend[3] = info->memory; isend[4] = info->mallocs; 1542 1543 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1544 1545 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1546 isend[3] += info->memory; isend[4] += info->mallocs; 1547 1548 if (flag == MAT_LOCAL) { 1549 info->nz_used = isend[0]; 1550 info->nz_allocated = isend[1]; 1551 info->nz_unneeded = isend[2]; 1552 info->memory = isend[3]; 1553 info->mallocs = isend[4]; 1554 } else if (flag == MAT_GLOBAL_MAX) { 1555 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1556 1557 info->nz_used = irecv[0]; 1558 info->nz_allocated = irecv[1]; 1559 info->nz_unneeded = irecv[2]; 1560 info->memory = irecv[3]; 1561 info->mallocs = irecv[4]; 1562 } else if (flag == MAT_GLOBAL_SUM) { 1563 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1564 1565 info->nz_used = irecv[0]; 1566 info->nz_allocated = irecv[1]; 1567 info->nz_unneeded = irecv[2]; 1568 info->memory = irecv[3]; 1569 info->mallocs = irecv[4]; 1570 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1571 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1572 info->fill_ratio_needed = 0; 1573 info->factor_mallocs = 0; 1574 PetscFunctionReturn(0); 1575 } 1576 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1579 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1580 { 1581 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1582 PetscErrorCode ierr; 1583 1584 PetscFunctionBegin; 1585 switch (op) { 1586 case MAT_NEW_NONZERO_LOCATIONS: 1587 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1588 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1589 case MAT_KEEP_NONZERO_PATTERN: 1590 case MAT_NEW_NONZERO_LOCATION_ERR: 1591 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1592 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1593 break; 1594 case MAT_ROW_ORIENTED: 1595 a->roworiented = flg; 1596 1597 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1598 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1599 break; 1600 case MAT_NEW_DIAGONALS: 1601 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1602 break; 1603 case MAT_IGNORE_OFF_PROC_ENTRIES: 1604 a->donotstash = flg; 1605 break; 1606 case MAT_USE_HASH_TABLE: 1607 a->ht_flag = flg; 1608 break; 1609 case MAT_SYMMETRIC: 1610 case MAT_STRUCTURALLY_SYMMETRIC: 1611 case MAT_HERMITIAN: 1612 case MAT_SYMMETRY_ETERNAL: 1613 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1614 break; 1615 default: 1616 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1617 } 1618 PetscFunctionReturn(0); 1619 } 1620 1621 #undef __FUNCT__ 1622 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1623 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1624 { 1625 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1626 Mat_SeqBAIJ *Aloc; 1627 Mat B; 1628 PetscErrorCode ierr; 1629 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1630 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1631 MatScalar *a; 1632 1633 PetscFunctionBegin; 1634 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1635 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1636 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1637 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1638 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1639 /* Do not know preallocation information, but must set block size */ 1640 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1641 } else { 1642 B = *matout; 1643 } 1644 1645 /* copy over the A part */ 1646 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1647 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1648 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1649 1650 for (i=0; i<mbs; i++) { 1651 rvals[0] = bs*(baij->rstartbs + i); 1652 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1653 for (j=ai[i]; j<ai[i+1]; j++) { 1654 col = (baij->cstartbs+aj[j])*bs; 1655 for (k=0; k<bs; k++) { 1656 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1657 1658 col++; a += bs; 1659 } 1660 } 1661 } 1662 /* copy over the B part */ 1663 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1664 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1665 for (i=0; i<mbs; i++) { 1666 rvals[0] = bs*(baij->rstartbs + i); 1667 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1668 for (j=ai[i]; j<ai[i+1]; j++) { 1669 col = baij->garray[aj[j]]*bs; 1670 for (k=0; k<bs; k++) { 1671 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1672 col++; 1673 a += bs; 1674 } 1675 } 1676 } 1677 ierr = PetscFree(rvals);CHKERRQ(ierr); 1678 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1679 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1680 1681 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1682 else { 1683 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1684 } 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1690 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1691 { 1692 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1693 Mat a = baij->A,b = baij->B; 1694 PetscErrorCode ierr; 1695 PetscInt s1,s2,s3; 1696 1697 PetscFunctionBegin; 1698 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1699 if (rr) { 1700 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1701 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1702 /* Overlap communication with computation. */ 1703 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1704 } 1705 if (ll) { 1706 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1707 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1708 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1709 } 1710 /* scale the diagonal block */ 1711 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1712 1713 if (rr) { 1714 /* Do a scatter end and then right scale the off-diagonal block */ 1715 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1716 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1717 } 1718 PetscFunctionReturn(0); 1719 } 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1723 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1724 { 1725 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1726 PetscInt *owners = A->rmap->range; 1727 PetscInt n = A->rmap->n; 1728 PetscSF sf; 1729 PetscInt *lrows; 1730 PetscSFNode *rrows; 1731 PetscInt r, p = 0, len = 0; 1732 PetscErrorCode ierr; 1733 1734 PetscFunctionBegin; 1735 /* Create SF where leaves are input rows and roots are owned rows */ 1736 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1737 for (r = 0; r < n; ++r) lrows[r] = -1; 1738 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 1739 for (r = 0; r < N; ++r) { 1740 const PetscInt idx = rows[r]; 1741 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1742 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1743 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1744 } 1745 if (A->nooffproczerorows) { 1746 if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank); 1747 lrows[len++] = idx - owners[p]; 1748 } else { 1749 rrows[r].rank = p; 1750 rrows[r].index = rows[r] - owners[p]; 1751 } 1752 } 1753 if (!A->nooffproczerorows) { 1754 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1755 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1756 /* Collect flags for rows to be zeroed */ 1757 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1758 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1759 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1760 /* Compress and put in row numbers */ 1761 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1762 } 1763 /* fix right hand side if needed */ 1764 if (x && b) { 1765 const PetscScalar *xx; 1766 PetscScalar *bb; 1767 1768 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1769 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1770 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1771 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1772 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1773 } 1774 1775 /* actually zap the local rows */ 1776 /* 1777 Zero the required rows. If the "diagonal block" of the matrix 1778 is square and the user wishes to set the diagonal we use separate 1779 code so that MatSetValues() is not called for each diagonal allocating 1780 new memory, thus calling lots of mallocs and slowing things down. 1781 1782 */ 1783 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1784 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1785 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1786 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1787 } else if (diag != 0.0) { 1788 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1789 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1790 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1791 for (r = 0; r < len; ++r) { 1792 const PetscInt row = lrows[r] + A->rmap->rstart; 1793 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1794 } 1795 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1797 } else { 1798 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1799 } 1800 ierr = PetscFree(lrows);CHKERRQ(ierr); 1801 1802 /* only change matrix nonzero state if pattern was allowed to be changed */ 1803 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1804 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1805 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1806 } 1807 PetscFunctionReturn(0); 1808 } 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 1812 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1813 { 1814 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1815 PetscErrorCode ierr; 1816 PetscMPIInt n = A->rmap->n; 1817 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1818 PetscInt *lrows,*owners = A->rmap->range; 1819 PetscSFNode *rrows; 1820 PetscSF sf; 1821 const PetscScalar *xx; 1822 PetscScalar *bb,*mask; 1823 Vec xmask,lmask; 1824 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1825 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1826 PetscScalar *aa; 1827 1828 PetscFunctionBegin; 1829 /* Create SF where leaves are input rows and roots are owned rows */ 1830 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1831 for (r = 0; r < n; ++r) lrows[r] = -1; 1832 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1833 for (r = 0; r < N; ++r) { 1834 const PetscInt idx = rows[r]; 1835 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1836 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1837 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1838 } 1839 rrows[r].rank = p; 1840 rrows[r].index = rows[r] - owners[p]; 1841 } 1842 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1843 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1844 /* Collect flags for rows to be zeroed */ 1845 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1846 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1847 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1848 /* Compress and put in row numbers */ 1849 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1850 /* zero diagonal part of matrix */ 1851 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1852 /* handle off diagonal part of matrix */ 1853 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1854 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1855 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1856 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1857 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1858 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1859 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1860 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1861 if (x) { 1862 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1863 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1864 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1865 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1866 } 1867 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1868 /* remove zeroed rows of off diagonal matrix */ 1869 for (i = 0; i < len; ++i) { 1870 row = lrows[i]; 1871 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1872 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1873 for (k = 0; k < count; ++k) { 1874 aa[0] = 0.0; 1875 aa += bs; 1876 } 1877 } 1878 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1879 for (i = 0; i < l->B->rmap->N; ++i) { 1880 row = i/bs; 1881 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1882 for (k = 0; k < bs; ++k) { 1883 col = bs*baij->j[j] + k; 1884 if (PetscAbsScalar(mask[col])) { 1885 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1886 if (b) bb[i] -= aa[0]*xx[col]; 1887 aa[0] = 0.0; 1888 } 1889 } 1890 } 1891 } 1892 if (x) { 1893 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1894 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1895 } 1896 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1897 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1898 ierr = PetscFree(lrows);CHKERRQ(ierr); 1899 1900 /* only change matrix nonzero state if pattern was allowed to be changed */ 1901 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1902 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1903 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1904 } 1905 PetscFunctionReturn(0); 1906 } 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1910 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1911 { 1912 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1913 PetscErrorCode ierr; 1914 1915 PetscFunctionBegin; 1916 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1917 PetscFunctionReturn(0); 1918 } 1919 1920 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatEqual_MPIBAIJ" 1924 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1925 { 1926 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1927 Mat a,b,c,d; 1928 PetscBool flg; 1929 PetscErrorCode ierr; 1930 1931 PetscFunctionBegin; 1932 a = matA->A; b = matA->B; 1933 c = matB->A; d = matB->B; 1934 1935 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1936 if (flg) { 1937 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1938 } 1939 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1940 PetscFunctionReturn(0); 1941 } 1942 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "MatCopy_MPIBAIJ" 1945 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1946 { 1947 PetscErrorCode ierr; 1948 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1949 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1950 1951 PetscFunctionBegin; 1952 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1953 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1954 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1955 } else { 1956 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1957 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1958 } 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1964 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1965 { 1966 PetscErrorCode ierr; 1967 1968 PetscFunctionBegin; 1969 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1970 PetscFunctionReturn(0); 1971 } 1972 1973 #undef __FUNCT__ 1974 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ" 1975 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1976 { 1977 PetscErrorCode ierr; 1978 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1979 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1980 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1981 1982 PetscFunctionBegin; 1983 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1984 PetscFunctionReturn(0); 1985 } 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1989 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1990 { 1991 PetscErrorCode ierr; 1992 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1993 PetscBLASInt bnz,one=1; 1994 Mat_SeqBAIJ *x,*y; 1995 1996 PetscFunctionBegin; 1997 if (str == SAME_NONZERO_PATTERN) { 1998 PetscScalar alpha = a; 1999 x = (Mat_SeqBAIJ*)xx->A->data; 2000 y = (Mat_SeqBAIJ*)yy->A->data; 2001 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2002 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2003 x = (Mat_SeqBAIJ*)xx->B->data; 2004 y = (Mat_SeqBAIJ*)yy->B->data; 2005 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2006 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2007 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2008 } else { 2009 Mat B; 2010 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 2011 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2012 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2013 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2014 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2015 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2016 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2017 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 2018 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2019 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2020 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2021 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 2022 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2023 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2024 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2025 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2026 } 2027 PetscFunctionReturn(0); 2028 } 2029 2030 #undef __FUNCT__ 2031 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2032 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2033 { 2034 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2035 PetscErrorCode ierr; 2036 2037 PetscFunctionBegin; 2038 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2039 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2040 PetscFunctionReturn(0); 2041 } 2042 2043 #undef __FUNCT__ 2044 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2045 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2046 { 2047 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2048 PetscErrorCode ierr; 2049 2050 PetscFunctionBegin; 2051 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2052 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2058 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2059 { 2060 PetscErrorCode ierr; 2061 IS iscol_local; 2062 PetscInt csize; 2063 2064 PetscFunctionBegin; 2065 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2066 if (call == MAT_REUSE_MATRIX) { 2067 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2068 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2069 } else { 2070 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2071 } 2072 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2073 if (call == MAT_INITIAL_MATRIX) { 2074 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2075 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2076 } 2077 PetscFunctionReturn(0); 2078 } 2079 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2080 #undef __FUNCT__ 2081 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2082 /* 2083 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2084 in local and then by concatenating the local matrices the end result. 2085 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2086 */ 2087 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2088 { 2089 PetscErrorCode ierr; 2090 PetscMPIInt rank,size; 2091 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2092 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2093 Mat M,Mreuse; 2094 MatScalar *vwork,*aa; 2095 MPI_Comm comm; 2096 IS isrow_new, iscol_new; 2097 PetscBool idflag,allrows, allcols; 2098 Mat_SeqBAIJ *aij; 2099 2100 PetscFunctionBegin; 2101 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2102 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2103 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2104 /* The compression and expansion should be avoided. Doesn't point 2105 out errors, might change the indices, hence buggey */ 2106 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2107 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2108 2109 /* Check for special case: each processor gets entire matrix columns */ 2110 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2111 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2112 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2113 else allcols = PETSC_FALSE; 2114 2115 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2116 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2117 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2118 else allrows = PETSC_FALSE; 2119 2120 if (call == MAT_REUSE_MATRIX) { 2121 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2122 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2123 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2124 } else { 2125 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2126 } 2127 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2128 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2129 /* 2130 m - number of local rows 2131 n - number of columns (same on all processors) 2132 rstart - first row in new global matrix generated 2133 */ 2134 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2135 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2136 m = m/bs; 2137 n = n/bs; 2138 2139 if (call == MAT_INITIAL_MATRIX) { 2140 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2141 ii = aij->i; 2142 jj = aij->j; 2143 2144 /* 2145 Determine the number of non-zeros in the diagonal and off-diagonal 2146 portions of the matrix in order to do correct preallocation 2147 */ 2148 2149 /* first get start and end of "diagonal" columns */ 2150 if (csize == PETSC_DECIDE) { 2151 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2152 if (mglobal == n*bs) { /* square matrix */ 2153 nlocal = m; 2154 } else { 2155 nlocal = n/size + ((n % size) > rank); 2156 } 2157 } else { 2158 nlocal = csize/bs; 2159 } 2160 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2161 rstart = rend - nlocal; 2162 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2163 2164 /* next, compute all the lengths */ 2165 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2166 for (i=0; i<m; i++) { 2167 jend = ii[i+1] - ii[i]; 2168 olen = 0; 2169 dlen = 0; 2170 for (j=0; j<jend; j++) { 2171 if (*jj < rstart || *jj >= rend) olen++; 2172 else dlen++; 2173 jj++; 2174 } 2175 olens[i] = olen; 2176 dlens[i] = dlen; 2177 } 2178 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2179 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2180 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2181 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2182 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2183 } else { 2184 PetscInt ml,nl; 2185 2186 M = *newmat; 2187 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2188 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2189 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2190 /* 2191 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2192 rather than the slower MatSetValues(). 2193 */ 2194 M->was_assembled = PETSC_TRUE; 2195 M->assembled = PETSC_FALSE; 2196 } 2197 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2198 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2199 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2200 ii = aij->i; 2201 jj = aij->j; 2202 aa = aij->a; 2203 for (i=0; i<m; i++) { 2204 row = rstart/bs + i; 2205 nz = ii[i+1] - ii[i]; 2206 cwork = jj; jj += nz; 2207 vwork = aa; aa += nz*bs*bs; 2208 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2209 } 2210 2211 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2212 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2213 *newmat = M; 2214 2215 /* save submatrix used in processor for next request */ 2216 if (call == MAT_INITIAL_MATRIX) { 2217 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2218 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2219 } 2220 PetscFunctionReturn(0); 2221 } 2222 2223 #undef __FUNCT__ 2224 #define __FUNCT__ "MatPermute_MPIBAIJ" 2225 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2226 { 2227 MPI_Comm comm,pcomm; 2228 PetscInt clocal_size,nrows; 2229 const PetscInt *rows; 2230 PetscMPIInt size; 2231 IS crowp,lcolp; 2232 PetscErrorCode ierr; 2233 2234 PetscFunctionBegin; 2235 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2236 /* make a collective version of 'rowp' */ 2237 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2238 if (pcomm==comm) { 2239 crowp = rowp; 2240 } else { 2241 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2242 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2243 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2244 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2245 } 2246 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2247 /* make a local version of 'colp' */ 2248 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2249 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2250 if (size==1) { 2251 lcolp = colp; 2252 } else { 2253 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2254 } 2255 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2256 /* now we just get the submatrix */ 2257 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2258 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2259 /* clean up */ 2260 if (pcomm!=comm) { 2261 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2262 } 2263 if (size>1) { 2264 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2265 } 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2271 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2272 { 2273 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2274 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2275 2276 PetscFunctionBegin; 2277 if (nghosts) *nghosts = B->nbs; 2278 if (ghosts) *ghosts = baij->garray; 2279 PetscFunctionReturn(0); 2280 } 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2284 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2285 { 2286 Mat B; 2287 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2288 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2289 Mat_SeqAIJ *b; 2290 PetscErrorCode ierr; 2291 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2292 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2293 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2294 2295 PetscFunctionBegin; 2296 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2297 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2298 2299 /* ---------------------------------------------------------------- 2300 Tell every processor the number of nonzeros per row 2301 */ 2302 ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr); 2303 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2304 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2305 } 2306 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2307 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2308 displs = recvcounts + size; 2309 for (i=0; i<size; i++) { 2310 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2311 displs[i] = A->rmap->range[i]/bs; 2312 } 2313 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2314 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2315 #else 2316 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2317 #endif 2318 /* --------------------------------------------------------------- 2319 Create the sequential matrix of the same type as the local block diagonal 2320 */ 2321 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2322 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2323 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2324 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2325 b = (Mat_SeqAIJ*)B->data; 2326 2327 /*-------------------------------------------------------------------- 2328 Copy my part of matrix column indices over 2329 */ 2330 sendcount = ad->nz + bd->nz; 2331 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2332 a_jsendbuf = ad->j; 2333 b_jsendbuf = bd->j; 2334 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2335 cnt = 0; 2336 for (i=0; i<n; i++) { 2337 2338 /* put in lower diagonal portion */ 2339 m = bd->i[i+1] - bd->i[i]; 2340 while (m > 0) { 2341 /* is it above diagonal (in bd (compressed) numbering) */ 2342 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2343 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2344 m--; 2345 } 2346 2347 /* put in diagonal portion */ 2348 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2349 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2350 } 2351 2352 /* put in upper diagonal portion */ 2353 while (m-- > 0) { 2354 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2355 } 2356 } 2357 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2358 2359 /*-------------------------------------------------------------------- 2360 Gather all column indices to all processors 2361 */ 2362 for (i=0; i<size; i++) { 2363 recvcounts[i] = 0; 2364 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2365 recvcounts[i] += lens[j]; 2366 } 2367 } 2368 displs[0] = 0; 2369 for (i=1; i<size; i++) { 2370 displs[i] = displs[i-1] + recvcounts[i-1]; 2371 } 2372 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2373 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2374 #else 2375 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2376 #endif 2377 /*-------------------------------------------------------------------- 2378 Assemble the matrix into useable form (note numerical values not yet set) 2379 */ 2380 /* set the b->ilen (length of each row) values */ 2381 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2382 /* set the b->i indices */ 2383 b->i[0] = 0; 2384 for (i=1; i<=A->rmap->N/bs; i++) { 2385 b->i[i] = b->i[i-1] + lens[i-1]; 2386 } 2387 ierr = PetscFree(lens);CHKERRQ(ierr); 2388 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2389 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2390 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2391 2392 if (A->symmetric) { 2393 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2394 } else if (A->hermitian) { 2395 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2396 } else if (A->structurally_symmetric) { 2397 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2398 } 2399 *newmat = B; 2400 PetscFunctionReturn(0); 2401 } 2402 2403 #undef __FUNCT__ 2404 #define __FUNCT__ "MatSOR_MPIBAIJ" 2405 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2406 { 2407 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2408 PetscErrorCode ierr; 2409 Vec bb1 = 0; 2410 2411 PetscFunctionBegin; 2412 if (flag == SOR_APPLY_UPPER) { 2413 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2414 PetscFunctionReturn(0); 2415 } 2416 2417 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2418 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2419 } 2420 2421 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2422 if (flag & SOR_ZERO_INITIAL_GUESS) { 2423 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2424 its--; 2425 } 2426 2427 while (its--) { 2428 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2429 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2430 2431 /* update rhs: bb1 = bb - B*x */ 2432 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2433 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2434 2435 /* local sweep */ 2436 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2437 } 2438 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2439 if (flag & SOR_ZERO_INITIAL_GUESS) { 2440 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2441 its--; 2442 } 2443 while (its--) { 2444 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2445 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2446 2447 /* update rhs: bb1 = bb - B*x */ 2448 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2449 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2450 2451 /* local sweep */ 2452 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2453 } 2454 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2455 if (flag & SOR_ZERO_INITIAL_GUESS) { 2456 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2457 its--; 2458 } 2459 while (its--) { 2460 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2461 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2462 2463 /* update rhs: bb1 = bb - B*x */ 2464 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2465 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2466 2467 /* local sweep */ 2468 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2469 } 2470 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2471 2472 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2473 PetscFunctionReturn(0); 2474 } 2475 2476 #undef __FUNCT__ 2477 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2478 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2479 { 2480 PetscErrorCode ierr; 2481 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2482 PetscInt N,i,*garray = aij->garray; 2483 PetscInt ib,jb,bs = A->rmap->bs; 2484 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2485 MatScalar *a_val = a_aij->a; 2486 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2487 MatScalar *b_val = b_aij->a; 2488 PetscReal *work; 2489 2490 PetscFunctionBegin; 2491 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2492 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2493 if (type == NORM_2) { 2494 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2495 for (jb=0; jb<bs; jb++) { 2496 for (ib=0; ib<bs; ib++) { 2497 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2498 a_val++; 2499 } 2500 } 2501 } 2502 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2503 for (jb=0; jb<bs; jb++) { 2504 for (ib=0; ib<bs; ib++) { 2505 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2506 b_val++; 2507 } 2508 } 2509 } 2510 } else if (type == NORM_1) { 2511 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2512 for (jb=0; jb<bs; jb++) { 2513 for (ib=0; ib<bs; ib++) { 2514 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2515 a_val++; 2516 } 2517 } 2518 } 2519 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2520 for (jb=0; jb<bs; jb++) { 2521 for (ib=0; ib<bs; ib++) { 2522 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2523 b_val++; 2524 } 2525 } 2526 } 2527 } else if (type == NORM_INFINITY) { 2528 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2529 for (jb=0; jb<bs; jb++) { 2530 for (ib=0; ib<bs; ib++) { 2531 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2532 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2533 a_val++; 2534 } 2535 } 2536 } 2537 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2538 for (jb=0; jb<bs; jb++) { 2539 for (ib=0; ib<bs; ib++) { 2540 int col = garray[b_aij->j[i]] * bs + jb; 2541 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2542 b_val++; 2543 } 2544 } 2545 } 2546 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2547 if (type == NORM_INFINITY) { 2548 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2549 } else { 2550 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2551 } 2552 ierr = PetscFree(work);CHKERRQ(ierr); 2553 if (type == NORM_2) { 2554 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2555 } 2556 PetscFunctionReturn(0); 2557 } 2558 2559 #undef __FUNCT__ 2560 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2561 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2562 { 2563 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2564 PetscErrorCode ierr; 2565 2566 PetscFunctionBegin; 2567 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2568 PetscFunctionReturn(0); 2569 } 2570 2571 2572 /* -------------------------------------------------------------------*/ 2573 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2574 MatGetRow_MPIBAIJ, 2575 MatRestoreRow_MPIBAIJ, 2576 MatMult_MPIBAIJ, 2577 /* 4*/ MatMultAdd_MPIBAIJ, 2578 MatMultTranspose_MPIBAIJ, 2579 MatMultTransposeAdd_MPIBAIJ, 2580 0, 2581 0, 2582 0, 2583 /*10*/ 0, 2584 0, 2585 0, 2586 MatSOR_MPIBAIJ, 2587 MatTranspose_MPIBAIJ, 2588 /*15*/ MatGetInfo_MPIBAIJ, 2589 MatEqual_MPIBAIJ, 2590 MatGetDiagonal_MPIBAIJ, 2591 MatDiagonalScale_MPIBAIJ, 2592 MatNorm_MPIBAIJ, 2593 /*20*/ MatAssemblyBegin_MPIBAIJ, 2594 MatAssemblyEnd_MPIBAIJ, 2595 MatSetOption_MPIBAIJ, 2596 MatZeroEntries_MPIBAIJ, 2597 /*24*/ MatZeroRows_MPIBAIJ, 2598 0, 2599 0, 2600 0, 2601 0, 2602 /*29*/ MatSetUp_MPIBAIJ, 2603 0, 2604 0, 2605 0, 2606 0, 2607 /*34*/ MatDuplicate_MPIBAIJ, 2608 0, 2609 0, 2610 0, 2611 0, 2612 /*39*/ MatAXPY_MPIBAIJ, 2613 MatGetSubMatrices_MPIBAIJ, 2614 MatIncreaseOverlap_MPIBAIJ, 2615 MatGetValues_MPIBAIJ, 2616 MatCopy_MPIBAIJ, 2617 /*44*/ 0, 2618 MatScale_MPIBAIJ, 2619 0, 2620 0, 2621 MatZeroRowsColumns_MPIBAIJ, 2622 /*49*/ 0, 2623 0, 2624 0, 2625 0, 2626 0, 2627 /*54*/ MatFDColoringCreate_MPIXAIJ, 2628 0, 2629 MatSetUnfactored_MPIBAIJ, 2630 MatPermute_MPIBAIJ, 2631 MatSetValuesBlocked_MPIBAIJ, 2632 /*59*/ MatGetSubMatrix_MPIBAIJ, 2633 MatDestroy_MPIBAIJ, 2634 MatView_MPIBAIJ, 2635 0, 2636 0, 2637 /*64*/ 0, 2638 0, 2639 0, 2640 0, 2641 0, 2642 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2643 0, 2644 0, 2645 0, 2646 0, 2647 /*74*/ 0, 2648 MatFDColoringApply_BAIJ, 2649 0, 2650 0, 2651 0, 2652 /*79*/ 0, 2653 0, 2654 0, 2655 0, 2656 MatLoad_MPIBAIJ, 2657 /*84*/ 0, 2658 0, 2659 0, 2660 0, 2661 0, 2662 /*89*/ 0, 2663 0, 2664 0, 2665 0, 2666 0, 2667 /*94*/ 0, 2668 0, 2669 0, 2670 0, 2671 0, 2672 /*99*/ 0, 2673 0, 2674 0, 2675 0, 2676 0, 2677 /*104*/0, 2678 MatRealPart_MPIBAIJ, 2679 MatImaginaryPart_MPIBAIJ, 2680 0, 2681 0, 2682 /*109*/0, 2683 0, 2684 0, 2685 0, 2686 0, 2687 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2688 0, 2689 MatGetGhosts_MPIBAIJ, 2690 0, 2691 0, 2692 /*119*/0, 2693 0, 2694 0, 2695 0, 2696 MatGetMultiProcBlock_MPIBAIJ, 2697 /*124*/0, 2698 MatGetColumnNorms_MPIBAIJ, 2699 MatInvertBlockDiagonal_MPIBAIJ, 2700 0, 2701 0, 2702 /*129*/ 0, 2703 0, 2704 0, 2705 0, 2706 0, 2707 /*134*/ 0, 2708 0, 2709 0, 2710 0, 2711 0, 2712 /*139*/ 0, 2713 0, 2714 0, 2715 MatFDColoringSetUp_MPIXAIJ, 2716 0, 2717 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2718 }; 2719 2720 #undef __FUNCT__ 2721 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2722 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2723 { 2724 PetscFunctionBegin; 2725 *a = ((Mat_MPIBAIJ*)A->data)->A; 2726 PetscFunctionReturn(0); 2727 } 2728 2729 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2730 2731 #undef __FUNCT__ 2732 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2733 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2734 { 2735 PetscInt m,rstart,cstart,cend; 2736 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2737 const PetscInt *JJ =0; 2738 PetscScalar *values=0; 2739 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2740 PetscErrorCode ierr; 2741 2742 PetscFunctionBegin; 2743 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2744 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2745 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2746 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2747 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2748 m = B->rmap->n/bs; 2749 rstart = B->rmap->rstart/bs; 2750 cstart = B->cmap->rstart/bs; 2751 cend = B->cmap->rend/bs; 2752 2753 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2754 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2755 for (i=0; i<m; i++) { 2756 nz = ii[i+1] - ii[i]; 2757 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2758 nz_max = PetscMax(nz_max,nz); 2759 JJ = jj + ii[i]; 2760 for (j=0; j<nz; j++) { 2761 if (*JJ >= cstart) break; 2762 JJ++; 2763 } 2764 d = 0; 2765 for (; j<nz; j++) { 2766 if (*JJ++ >= cend) break; 2767 d++; 2768 } 2769 d_nnz[i] = d; 2770 o_nnz[i] = nz - d; 2771 } 2772 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2773 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2774 2775 values = (PetscScalar*)V; 2776 if (!values) { 2777 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2778 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2779 } 2780 for (i=0; i<m; i++) { 2781 PetscInt row = i + rstart; 2782 PetscInt ncols = ii[i+1] - ii[i]; 2783 const PetscInt *icols = jj + ii[i]; 2784 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2785 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2786 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2787 } else { /* block ordering does not match so we can only insert one block at a time. */ 2788 PetscInt j; 2789 for (j=0; j<ncols; j++) { 2790 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2791 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2792 } 2793 } 2794 } 2795 2796 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2797 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2798 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2799 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2800 PetscFunctionReturn(0); 2801 } 2802 2803 #undef __FUNCT__ 2804 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2805 /*@C 2806 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2807 (the default parallel PETSc format). 2808 2809 Collective on MPI_Comm 2810 2811 Input Parameters: 2812 + B - the matrix 2813 . bs - the block size 2814 . i - the indices into j for the start of each local row (starts with zero) 2815 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2816 - v - optional values in the matrix 2817 2818 Level: developer 2819 2820 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2821 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2822 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2823 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2824 block column and the second index is over columns within a block. 2825 2826 .keywords: matrix, aij, compressed row, sparse, parallel 2827 2828 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2829 @*/ 2830 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2831 { 2832 PetscErrorCode ierr; 2833 2834 PetscFunctionBegin; 2835 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2836 PetscValidType(B,1); 2837 PetscValidLogicalCollectiveInt(B,bs,2); 2838 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2839 PetscFunctionReturn(0); 2840 } 2841 2842 #undef __FUNCT__ 2843 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2844 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2845 { 2846 Mat_MPIBAIJ *b; 2847 PetscErrorCode ierr; 2848 PetscInt i; 2849 2850 PetscFunctionBegin; 2851 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2852 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2853 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2854 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2855 2856 if (d_nnz) { 2857 for (i=0; i<B->rmap->n/bs; i++) { 2858 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2859 } 2860 } 2861 if (o_nnz) { 2862 for (i=0; i<B->rmap->n/bs; i++) { 2863 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2864 } 2865 } 2866 2867 b = (Mat_MPIBAIJ*)B->data; 2868 b->bs2 = bs*bs; 2869 b->mbs = B->rmap->n/bs; 2870 b->nbs = B->cmap->n/bs; 2871 b->Mbs = B->rmap->N/bs; 2872 b->Nbs = B->cmap->N/bs; 2873 2874 for (i=0; i<=b->size; i++) { 2875 b->rangebs[i] = B->rmap->range[i]/bs; 2876 } 2877 b->rstartbs = B->rmap->rstart/bs; 2878 b->rendbs = B->rmap->rend/bs; 2879 b->cstartbs = B->cmap->rstart/bs; 2880 b->cendbs = B->cmap->rend/bs; 2881 2882 if (!B->preallocated) { 2883 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2884 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2885 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2886 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2887 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2888 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2889 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2890 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2891 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2892 } 2893 2894 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2895 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2896 B->preallocated = PETSC_TRUE; 2897 PetscFunctionReturn(0); 2898 } 2899 2900 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2901 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2902 2903 #undef __FUNCT__ 2904 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2905 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2906 { 2907 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2908 PetscErrorCode ierr; 2909 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2910 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2911 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2912 2913 PetscFunctionBegin; 2914 ierr = PetscMalloc1((M+1),&ii);CHKERRQ(ierr); 2915 ii[0] = 0; 2916 for (i=0; i<M; i++) { 2917 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2918 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2919 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2920 /* remove one from count of matrix has diagonal */ 2921 for (j=id[i]; j<id[i+1]; j++) { 2922 if (jd[j] == i) {ii[i+1]--;break;} 2923 } 2924 } 2925 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2926 cnt = 0; 2927 for (i=0; i<M; i++) { 2928 for (j=io[i]; j<io[i+1]; j++) { 2929 if (garray[jo[j]] > rstart) break; 2930 jj[cnt++] = garray[jo[j]]; 2931 } 2932 for (k=id[i]; k<id[i+1]; k++) { 2933 if (jd[k] != i) { 2934 jj[cnt++] = rstart + jd[k]; 2935 } 2936 } 2937 for (; j<io[i+1]; j++) { 2938 jj[cnt++] = garray[jo[j]]; 2939 } 2940 } 2941 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2942 PetscFunctionReturn(0); 2943 } 2944 2945 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2946 2947 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2948 2949 #undef __FUNCT__ 2950 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 2951 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2952 { 2953 PetscErrorCode ierr; 2954 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2955 Mat B; 2956 Mat_MPIAIJ *b; 2957 2958 PetscFunctionBegin; 2959 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2960 2961 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2962 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2963 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2964 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2965 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2966 b = (Mat_MPIAIJ*) B->data; 2967 2968 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2969 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2970 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2971 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2972 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2973 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2974 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2977 if (reuse == MAT_REUSE_MATRIX) { 2978 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2979 } else { 2980 *newmat = B; 2981 } 2982 PetscFunctionReturn(0); 2983 } 2984 2985 #if defined(PETSC_HAVE_MUMPS) 2986 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2987 #endif 2988 2989 /*MC 2990 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2991 2992 Options Database Keys: 2993 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2994 . -mat_block_size <bs> - set the blocksize used to store the matrix 2995 - -mat_use_hash_table <fact> 2996 2997 Level: beginner 2998 2999 .seealso: MatCreateMPIBAIJ 3000 M*/ 3001 3002 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3003 3004 #undef __FUNCT__ 3005 #define __FUNCT__ "MatCreate_MPIBAIJ" 3006 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3007 { 3008 Mat_MPIBAIJ *b; 3009 PetscErrorCode ierr; 3010 PetscBool flg; 3011 3012 PetscFunctionBegin; 3013 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3014 B->data = (void*)b; 3015 3016 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3017 B->assembled = PETSC_FALSE; 3018 3019 B->insertmode = NOT_SET_VALUES; 3020 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3021 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3022 3023 /* build local table of row and column ownerships */ 3024 ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr); 3025 3026 /* build cache for off array entries formed */ 3027 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3028 3029 b->donotstash = PETSC_FALSE; 3030 b->colmap = NULL; 3031 b->garray = NULL; 3032 b->roworiented = PETSC_TRUE; 3033 3034 /* stuff used in block assembly */ 3035 b->barray = 0; 3036 3037 /* stuff used for matrix vector multiply */ 3038 b->lvec = 0; 3039 b->Mvctx = 0; 3040 3041 /* stuff for MatGetRow() */ 3042 b->rowindices = 0; 3043 b->rowvalues = 0; 3044 b->getrowactive = PETSC_FALSE; 3045 3046 /* hash table stuff */ 3047 b->ht = 0; 3048 b->hd = 0; 3049 b->ht_size = 0; 3050 b->ht_flag = PETSC_FALSE; 3051 b->ht_fact = 0; 3052 b->ht_total_ct = 0; 3053 b->ht_insert_ct = 0; 3054 3055 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3056 b->ijonly = PETSC_FALSE; 3057 3058 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3059 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3060 if (flg) { 3061 PetscReal fact = 1.39; 3062 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3063 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3064 if (fact <= 1.0) fact = 1.39; 3065 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3066 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3067 } 3068 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3069 3070 #if defined(PETSC_HAVE_MUMPS) 3071 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3072 #endif 3073 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3074 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3075 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3076 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3077 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3079 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3080 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3081 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3084 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3085 PetscFunctionReturn(0); 3086 } 3087 3088 /*MC 3089 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3090 3091 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3092 and MATMPIBAIJ otherwise. 3093 3094 Options Database Keys: 3095 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3096 3097 Level: beginner 3098 3099 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3100 M*/ 3101 3102 #undef __FUNCT__ 3103 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3104 /*@C 3105 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3106 (block compressed row). For good matrix assembly performance 3107 the user should preallocate the matrix storage by setting the parameters 3108 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3109 performance can be increased by more than a factor of 50. 3110 3111 Collective on Mat 3112 3113 Input Parameters: 3114 + B - the matrix 3115 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3116 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3117 . d_nz - number of block nonzeros per block row in diagonal portion of local 3118 submatrix (same for all local rows) 3119 . d_nnz - array containing the number of block nonzeros in the various block rows 3120 of the in diagonal portion of the local (possibly different for each block 3121 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3122 set it even if it is zero. 3123 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3124 submatrix (same for all local rows). 3125 - o_nnz - array containing the number of nonzeros in the various block rows of the 3126 off-diagonal portion of the local submatrix (possibly different for 3127 each block row) or NULL. 3128 3129 If the *_nnz parameter is given then the *_nz parameter is ignored 3130 3131 Options Database Keys: 3132 + -mat_block_size - size of the blocks to use 3133 - -mat_use_hash_table <fact> 3134 3135 Notes: 3136 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3137 than it must be used on all processors that share the object for that argument. 3138 3139 Storage Information: 3140 For a square global matrix we define each processor's diagonal portion 3141 to be its local rows and the corresponding columns (a square submatrix); 3142 each processor's off-diagonal portion encompasses the remainder of the 3143 local matrix (a rectangular submatrix). 3144 3145 The user can specify preallocated storage for the diagonal part of 3146 the local submatrix with either d_nz or d_nnz (not both). Set 3147 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3148 memory allocation. Likewise, specify preallocated storage for the 3149 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3150 3151 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3152 the figure below we depict these three local rows and all columns (0-11). 3153 3154 .vb 3155 0 1 2 3 4 5 6 7 8 9 10 11 3156 -------------------------- 3157 row 3 |o o o d d d o o o o o o 3158 row 4 |o o o d d d o o o o o o 3159 row 5 |o o o d d d o o o o o o 3160 -------------------------- 3161 .ve 3162 3163 Thus, any entries in the d locations are stored in the d (diagonal) 3164 submatrix, and any entries in the o locations are stored in the 3165 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3166 stored simply in the MATSEQBAIJ format for compressed row storage. 3167 3168 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3169 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3170 In general, for PDE problems in which most nonzeros are near the diagonal, 3171 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3172 or you will get TERRIBLE performance; see the users' manual chapter on 3173 matrices. 3174 3175 You can call MatGetInfo() to get information on how effective the preallocation was; 3176 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3177 You can also run with the option -info and look for messages with the string 3178 malloc in them to see if additional memory allocation was needed. 3179 3180 Level: intermediate 3181 3182 .keywords: matrix, block, aij, compressed row, sparse, parallel 3183 3184 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3185 @*/ 3186 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3187 { 3188 PetscErrorCode ierr; 3189 3190 PetscFunctionBegin; 3191 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3192 PetscValidType(B,1); 3193 PetscValidLogicalCollectiveInt(B,bs,2); 3194 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3195 PetscFunctionReturn(0); 3196 } 3197 3198 #undef __FUNCT__ 3199 #define __FUNCT__ "MatCreateBAIJ" 3200 /*@C 3201 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3202 (block compressed row). For good matrix assembly performance 3203 the user should preallocate the matrix storage by setting the parameters 3204 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3205 performance can be increased by more than a factor of 50. 3206 3207 Collective on MPI_Comm 3208 3209 Input Parameters: 3210 + comm - MPI communicator 3211 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3212 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3213 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3214 This value should be the same as the local size used in creating the 3215 y vector for the matrix-vector product y = Ax. 3216 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3217 This value should be the same as the local size used in creating the 3218 x vector for the matrix-vector product y = Ax. 3219 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3220 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3221 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3222 submatrix (same for all local rows) 3223 . d_nnz - array containing the number of nonzero blocks in the various block rows 3224 of the in diagonal portion of the local (possibly different for each block 3225 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3226 and set it even if it is zero. 3227 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3228 submatrix (same for all local rows). 3229 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3230 off-diagonal portion of the local submatrix (possibly different for 3231 each block row) or NULL. 3232 3233 Output Parameter: 3234 . A - the matrix 3235 3236 Options Database Keys: 3237 + -mat_block_size - size of the blocks to use 3238 - -mat_use_hash_table <fact> 3239 3240 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3241 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3242 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3243 3244 Notes: 3245 If the *_nnz parameter is given then the *_nz parameter is ignored 3246 3247 A nonzero block is any block that as 1 or more nonzeros in it 3248 3249 The user MUST specify either the local or global matrix dimensions 3250 (possibly both). 3251 3252 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3253 than it must be used on all processors that share the object for that argument. 3254 3255 Storage Information: 3256 For a square global matrix we define each processor's diagonal portion 3257 to be its local rows and the corresponding columns (a square submatrix); 3258 each processor's off-diagonal portion encompasses the remainder of the 3259 local matrix (a rectangular submatrix). 3260 3261 The user can specify preallocated storage for the diagonal part of 3262 the local submatrix with either d_nz or d_nnz (not both). Set 3263 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3264 memory allocation. Likewise, specify preallocated storage for the 3265 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3266 3267 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3268 the figure below we depict these three local rows and all columns (0-11). 3269 3270 .vb 3271 0 1 2 3 4 5 6 7 8 9 10 11 3272 -------------------------- 3273 row 3 |o o o d d d o o o o o o 3274 row 4 |o o o d d d o o o o o o 3275 row 5 |o o o d d d o o o o o o 3276 -------------------------- 3277 .ve 3278 3279 Thus, any entries in the d locations are stored in the d (diagonal) 3280 submatrix, and any entries in the o locations are stored in the 3281 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3282 stored simply in the MATSEQBAIJ format for compressed row storage. 3283 3284 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3285 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3286 In general, for PDE problems in which most nonzeros are near the diagonal, 3287 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3288 or you will get TERRIBLE performance; see the users' manual chapter on 3289 matrices. 3290 3291 Level: intermediate 3292 3293 .keywords: matrix, block, aij, compressed row, sparse, parallel 3294 3295 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3296 @*/ 3297 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3298 { 3299 PetscErrorCode ierr; 3300 PetscMPIInt size; 3301 3302 PetscFunctionBegin; 3303 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3304 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3305 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3306 if (size > 1) { 3307 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3308 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3309 } else { 3310 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3311 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3312 } 3313 PetscFunctionReturn(0); 3314 } 3315 3316 #undef __FUNCT__ 3317 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3318 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3319 { 3320 Mat mat; 3321 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3322 PetscErrorCode ierr; 3323 PetscInt len=0; 3324 3325 PetscFunctionBegin; 3326 *newmat = 0; 3327 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3328 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3329 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3330 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3331 3332 mat->factortype = matin->factortype; 3333 mat->preallocated = PETSC_TRUE; 3334 mat->assembled = PETSC_TRUE; 3335 mat->insertmode = NOT_SET_VALUES; 3336 3337 a = (Mat_MPIBAIJ*)mat->data; 3338 mat->rmap->bs = matin->rmap->bs; 3339 a->bs2 = oldmat->bs2; 3340 a->mbs = oldmat->mbs; 3341 a->nbs = oldmat->nbs; 3342 a->Mbs = oldmat->Mbs; 3343 a->Nbs = oldmat->Nbs; 3344 3345 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3346 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3347 3348 a->size = oldmat->size; 3349 a->rank = oldmat->rank; 3350 a->donotstash = oldmat->donotstash; 3351 a->roworiented = oldmat->roworiented; 3352 a->rowindices = 0; 3353 a->rowvalues = 0; 3354 a->getrowactive = PETSC_FALSE; 3355 a->barray = 0; 3356 a->rstartbs = oldmat->rstartbs; 3357 a->rendbs = oldmat->rendbs; 3358 a->cstartbs = oldmat->cstartbs; 3359 a->cendbs = oldmat->cendbs; 3360 3361 /* hash table stuff */ 3362 a->ht = 0; 3363 a->hd = 0; 3364 a->ht_size = 0; 3365 a->ht_flag = oldmat->ht_flag; 3366 a->ht_fact = oldmat->ht_fact; 3367 a->ht_total_ct = 0; 3368 a->ht_insert_ct = 0; 3369 3370 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3371 if (oldmat->colmap) { 3372 #if defined(PETSC_USE_CTABLE) 3373 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3374 #else 3375 ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr); 3376 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3377 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3378 #endif 3379 } else a->colmap = 0; 3380 3381 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3382 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3383 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3384 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3385 } else a->garray = 0; 3386 3387 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3388 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3389 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3390 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3391 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3392 3393 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3394 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3395 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3396 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3397 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3398 *newmat = mat; 3399 PetscFunctionReturn(0); 3400 } 3401 3402 #undef __FUNCT__ 3403 #define __FUNCT__ "MatLoad_MPIBAIJ" 3404 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3405 { 3406 PetscErrorCode ierr; 3407 int fd; 3408 PetscInt i,nz,j,rstart,rend; 3409 PetscScalar *vals,*buf; 3410 MPI_Comm comm; 3411 MPI_Status status; 3412 PetscMPIInt rank,size,maxnz; 3413 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3414 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3415 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3416 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3417 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3418 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3419 3420 PetscFunctionBegin; 3421 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3422 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3423 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3424 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3425 3426 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3427 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3428 if (!rank) { 3429 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3430 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3431 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3432 } 3433 3434 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3435 3436 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3437 M = header[1]; N = header[2]; 3438 3439 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3440 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3441 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3442 3443 /* If global sizes are set, check if they are consistent with that given in the file */ 3444 if (sizesset) { 3445 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3446 } 3447 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3448 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3449 3450 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3451 3452 /* 3453 This code adds extra rows to make sure the number of rows is 3454 divisible by the blocksize 3455 */ 3456 Mbs = M/bs; 3457 extra_rows = bs - M + bs*Mbs; 3458 if (extra_rows == bs) extra_rows = 0; 3459 else Mbs++; 3460 if (extra_rows && !rank) { 3461 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3462 } 3463 3464 /* determine ownership of all rows */ 3465 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3466 mbs = Mbs/size + ((Mbs % size) > rank); 3467 m = mbs*bs; 3468 } else { /* User set */ 3469 m = newmat->rmap->n; 3470 mbs = m/bs; 3471 } 3472 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3473 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3474 3475 /* process 0 needs enough room for process with most rows */ 3476 if (!rank) { 3477 mmax = rowners[1]; 3478 for (i=2; i<=size; i++) { 3479 mmax = PetscMax(mmax,rowners[i]); 3480 } 3481 mmax*=bs; 3482 } else mmax = -1; /* unused, but compiler warns anyway */ 3483 3484 rowners[0] = 0; 3485 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3486 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3487 rstart = rowners[rank]; 3488 rend = rowners[rank+1]; 3489 3490 /* distribute row lengths to all processors */ 3491 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3492 if (!rank) { 3493 mend = m; 3494 if (size == 1) mend = mend - extra_rows; 3495 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3496 for (j=mend; j<m; j++) locrowlens[j] = 1; 3497 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3498 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3499 for (j=0; j<m; j++) { 3500 procsnz[0] += locrowlens[j]; 3501 } 3502 for (i=1; i<size; i++) { 3503 mend = browners[i+1] - browners[i]; 3504 if (i == size-1) mend = mend - extra_rows; 3505 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3506 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3507 /* calculate the number of nonzeros on each processor */ 3508 for (j=0; j<browners[i+1]-browners[i]; j++) { 3509 procsnz[i] += rowlengths[j]; 3510 } 3511 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3512 } 3513 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3514 } else { 3515 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3516 } 3517 3518 if (!rank) { 3519 /* determine max buffer needed and allocate it */ 3520 maxnz = procsnz[0]; 3521 for (i=1; i<size; i++) { 3522 maxnz = PetscMax(maxnz,procsnz[i]); 3523 } 3524 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3525 3526 /* read in my part of the matrix column indices */ 3527 nz = procsnz[0]; 3528 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3529 mycols = ibuf; 3530 if (size == 1) nz -= extra_rows; 3531 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3532 if (size == 1) { 3533 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3534 } 3535 3536 /* read in every ones (except the last) and ship off */ 3537 for (i=1; i<size-1; i++) { 3538 nz = procsnz[i]; 3539 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3540 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3541 } 3542 /* read in the stuff for the last proc */ 3543 if (size != 1) { 3544 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3545 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3546 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3547 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3548 } 3549 ierr = PetscFree(cols);CHKERRQ(ierr); 3550 } else { 3551 /* determine buffer space needed for message */ 3552 nz = 0; 3553 for (i=0; i<m; i++) { 3554 nz += locrowlens[i]; 3555 } 3556 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3557 mycols = ibuf; 3558 /* receive message of column indices*/ 3559 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3560 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3561 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3562 } 3563 3564 /* loop over local rows, determining number of off diagonal entries */ 3565 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3566 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3567 rowcount = 0; nzcount = 0; 3568 for (i=0; i<mbs; i++) { 3569 dcount = 0; 3570 odcount = 0; 3571 for (j=0; j<bs; j++) { 3572 kmax = locrowlens[rowcount]; 3573 for (k=0; k<kmax; k++) { 3574 tmp = mycols[nzcount++]/bs; 3575 if (!mask[tmp]) { 3576 mask[tmp] = 1; 3577 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3578 else masked1[dcount++] = tmp; 3579 } 3580 } 3581 rowcount++; 3582 } 3583 3584 dlens[i] = dcount; 3585 odlens[i] = odcount; 3586 3587 /* zero out the mask elements we set */ 3588 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3589 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3590 } 3591 3592 3593 if (!sizesset) { 3594 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3595 } 3596 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3597 3598 if (!rank) { 3599 ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr); 3600 /* read in my part of the matrix numerical values */ 3601 nz = procsnz[0]; 3602 vals = buf; 3603 mycols = ibuf; 3604 if (size == 1) nz -= extra_rows; 3605 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3606 if (size == 1) { 3607 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3608 } 3609 3610 /* insert into matrix */ 3611 jj = rstart*bs; 3612 for (i=0; i<m; i++) { 3613 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3614 mycols += locrowlens[i]; 3615 vals += locrowlens[i]; 3616 jj++; 3617 } 3618 /* read in other processors (except the last one) and ship out */ 3619 for (i=1; i<size-1; i++) { 3620 nz = procsnz[i]; 3621 vals = buf; 3622 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3623 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3624 } 3625 /* the last proc */ 3626 if (size != 1) { 3627 nz = procsnz[i] - extra_rows; 3628 vals = buf; 3629 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3630 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3631 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3632 } 3633 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3634 } else { 3635 /* receive numeric values */ 3636 ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr); 3637 3638 /* receive message of values*/ 3639 vals = buf; 3640 mycols = ibuf; 3641 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3642 3643 /* insert into matrix */ 3644 jj = rstart*bs; 3645 for (i=0; i<m; i++) { 3646 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3647 mycols += locrowlens[i]; 3648 vals += locrowlens[i]; 3649 jj++; 3650 } 3651 } 3652 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3653 ierr = PetscFree(buf);CHKERRQ(ierr); 3654 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3655 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3656 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3657 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3658 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3659 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3660 PetscFunctionReturn(0); 3661 } 3662 3663 #undef __FUNCT__ 3664 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3665 /*@ 3666 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3667 3668 Input Parameters: 3669 . mat - the matrix 3670 . fact - factor 3671 3672 Not Collective, each process can use a different factor 3673 3674 Level: advanced 3675 3676 Notes: 3677 This can also be set by the command line option: -mat_use_hash_table <fact> 3678 3679 .keywords: matrix, hashtable, factor, HT 3680 3681 .seealso: MatSetOption() 3682 @*/ 3683 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3684 { 3685 PetscErrorCode ierr; 3686 3687 PetscFunctionBegin; 3688 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3689 PetscFunctionReturn(0); 3690 } 3691 3692 #undef __FUNCT__ 3693 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3694 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3695 { 3696 Mat_MPIBAIJ *baij; 3697 3698 PetscFunctionBegin; 3699 baij = (Mat_MPIBAIJ*)mat->data; 3700 baij->ht_fact = fact; 3701 PetscFunctionReturn(0); 3702 } 3703 3704 #undef __FUNCT__ 3705 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3706 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3707 { 3708 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3709 3710 PetscFunctionBegin; 3711 if (Ad) *Ad = a->A; 3712 if (Ao) *Ao = a->B; 3713 if (colmap) *colmap = a->garray; 3714 PetscFunctionReturn(0); 3715 } 3716 3717 /* 3718 Special version for direct calls from Fortran (to eliminate two function call overheads 3719 */ 3720 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3721 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3722 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3723 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3724 #endif 3725 3726 #undef __FUNCT__ 3727 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3728 /*@C 3729 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3730 3731 Collective on Mat 3732 3733 Input Parameters: 3734 + mat - the matrix 3735 . min - number of input rows 3736 . im - input rows 3737 . nin - number of input columns 3738 . in - input columns 3739 . v - numerical values input 3740 - addvin - INSERT_VALUES or ADD_VALUES 3741 3742 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3743 3744 Level: advanced 3745 3746 .seealso: MatSetValuesBlocked() 3747 @*/ 3748 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3749 { 3750 /* convert input arguments to C version */ 3751 Mat mat = *matin; 3752 PetscInt m = *min, n = *nin; 3753 InsertMode addv = *addvin; 3754 3755 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3756 const MatScalar *value; 3757 MatScalar *barray = baij->barray; 3758 PetscBool roworiented = baij->roworiented; 3759 PetscErrorCode ierr; 3760 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3761 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3762 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3763 3764 PetscFunctionBegin; 3765 /* tasks normally handled by MatSetValuesBlocked() */ 3766 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3767 #if defined(PETSC_USE_DEBUG) 3768 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3769 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3770 #endif 3771 if (mat->assembled) { 3772 mat->was_assembled = PETSC_TRUE; 3773 mat->assembled = PETSC_FALSE; 3774 } 3775 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3776 3777 3778 if (!barray) { 3779 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3780 baij->barray = barray; 3781 } 3782 3783 if (roworiented) stepval = (n-1)*bs; 3784 else stepval = (m-1)*bs; 3785 3786 for (i=0; i<m; i++) { 3787 if (im[i] < 0) continue; 3788 #if defined(PETSC_USE_DEBUG) 3789 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3790 #endif 3791 if (im[i] >= rstart && im[i] < rend) { 3792 row = im[i] - rstart; 3793 for (j=0; j<n; j++) { 3794 /* If NumCol = 1 then a copy is not required */ 3795 if ((roworiented) && (n == 1)) { 3796 barray = (MatScalar*)v + i*bs2; 3797 } else if ((!roworiented) && (m == 1)) { 3798 barray = (MatScalar*)v + j*bs2; 3799 } else { /* Here a copy is required */ 3800 if (roworiented) { 3801 value = v + i*(stepval+bs)*bs + j*bs; 3802 } else { 3803 value = v + j*(stepval+bs)*bs + i*bs; 3804 } 3805 for (ii=0; ii<bs; ii++,value+=stepval) { 3806 for (jj=0; jj<bs; jj++) { 3807 *barray++ = *value++; 3808 } 3809 } 3810 barray -=bs2; 3811 } 3812 3813 if (in[j] >= cstart && in[j] < cend) { 3814 col = in[j] - cstart; 3815 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3816 } else if (in[j] < 0) continue; 3817 #if defined(PETSC_USE_DEBUG) 3818 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3819 #endif 3820 else { 3821 if (mat->was_assembled) { 3822 if (!baij->colmap) { 3823 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3824 } 3825 3826 #if defined(PETSC_USE_DEBUG) 3827 #if defined(PETSC_USE_CTABLE) 3828 { PetscInt data; 3829 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3830 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3831 } 3832 #else 3833 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3834 #endif 3835 #endif 3836 #if defined(PETSC_USE_CTABLE) 3837 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3838 col = (col - 1)/bs; 3839 #else 3840 col = (baij->colmap[in[j]] - 1)/bs; 3841 #endif 3842 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3843 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3844 col = in[j]; 3845 } 3846 } else col = in[j]; 3847 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3848 } 3849 } 3850 } else { 3851 if (!baij->donotstash) { 3852 if (roworiented) { 3853 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3854 } else { 3855 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3856 } 3857 } 3858 } 3859 } 3860 3861 /* task normally handled by MatSetValuesBlocked() */ 3862 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3863 PetscFunctionReturn(0); 3864 } 3865 3866 #undef __FUNCT__ 3867 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3868 /*@ 3869 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3870 CSR format the local rows. 3871 3872 Collective on MPI_Comm 3873 3874 Input Parameters: 3875 + comm - MPI communicator 3876 . bs - the block size, only a block size of 1 is supported 3877 . m - number of local rows (Cannot be PETSC_DECIDE) 3878 . n - This value should be the same as the local size used in creating the 3879 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3880 calculated if N is given) For square matrices n is almost always m. 3881 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3882 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3883 . i - row indices 3884 . j - column indices 3885 - a - matrix values 3886 3887 Output Parameter: 3888 . mat - the matrix 3889 3890 Level: intermediate 3891 3892 Notes: 3893 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3894 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3895 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3896 3897 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3898 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3899 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3900 with column-major ordering within blocks. 3901 3902 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3903 3904 .keywords: matrix, aij, compressed row, sparse, parallel 3905 3906 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3907 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3908 @*/ 3909 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3910 { 3911 PetscErrorCode ierr; 3912 3913 PetscFunctionBegin; 3914 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3915 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3916 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3917 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3918 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3919 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3920 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3921 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3922 PetscFunctionReturn(0); 3923 } 3924 3925 #undef __FUNCT__ 3926 #define __FUNCT__ "MatCreateMPIBAIJConcatenateSeqBAIJSymbolic" 3927 PetscErrorCode MatCreateMPIBAIJConcatenateSeqBAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 3928 { 3929 PetscErrorCode ierr; 3930 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3931 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 3932 PetscInt *indx,*bindx,rmax=a->rmax,j; 3933 3934 3935 PetscFunctionBegin; 3936 /* This routine will ONLY return MPIBAIJ type matrix */ 3937 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3938 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3939 m = m/bs; N = N/cbs; 3940 if (n == PETSC_DECIDE) { 3941 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3942 } 3943 /* Check sum(n) = N */ 3944 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3945 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3946 3947 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3948 rstart -= m; 3949 3950 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3951 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3952 for (i=0; i<m; i++) { 3953 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3954 nnz = nnz/bs; 3955 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3956 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3957 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3958 } 3959 ierr = PetscFree(bindx);CHKERRQ(ierr); 3960 3961 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3962 ierr = MatSetSizes(*outmat,m*bs,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3963 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3964 ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr); 3965 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3966 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3967 PetscFunctionReturn(0); 3968 } 3969 3970 #undef __FUNCT__ 3971 #define __FUNCT__ "MatCreateMPIBAIJConcatenateSeqBAIJNumeric" 3972 PetscErrorCode MatCreateMPIBAIJConcatenateSeqBAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 3973 { 3974 PetscErrorCode ierr; 3975 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3976 PetscInt *indx; 3977 PetscScalar *values; 3978 3979 PetscFunctionBegin; 3980 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3981 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3982 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 3983 3984 for (i=0; i<m; i++) { 3985 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3986 Ii = i + rstart; 3987 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3988 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3989 } 3990 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3991 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3992 PetscFunctionReturn(0); 3993 } 3994 3995 #undef __FUNCT__ 3996 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ" 3997 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3998 { 3999 PetscErrorCode ierr; 4000 PetscMPIInt size; 4001 4002 PetscFunctionBegin; 4003 /* same as MatCreateMPIAIJConcatenateSeqAIJ() */ 4004 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4005 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4006 if (size == 1) { 4007 if (scall == MAT_INITIAL_MATRIX) { 4008 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4009 } else { 4010 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4011 } 4012 } else { 4013 if (scall == MAT_INITIAL_MATRIX) { 4014 ierr = MatCreateMPIBAIJConcatenateSeqBAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4015 } 4016 ierr = MatCreateMPIBAIJConcatenateSeqBAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4017 } 4018 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4019 PetscFunctionReturn(0); 4020 } 4021