1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) { 29 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 30 } 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap->n; i++) { 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 40 va[i] = vb[i]; 41 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 42 } 43 } 44 45 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 46 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 47 ierr = PetscFree(idxb);CHKERRQ(ierr); 48 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 EXTERN_C_BEGIN 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 62 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 EXTERN_C_END 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 70 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 77 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 /* 83 Local utility routine that creates a mapping from the global column 84 number to the local number in the off-diagonal part of the local 85 storage of the matrix. This is done in a non scalable way since the 86 length of colmap equals the global matrix length. 87 */ 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 90 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 91 { 92 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 93 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 94 PetscErrorCode ierr; 95 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 96 97 PetscFunctionBegin; 98 #if defined(PETSC_USE_CTABLE) 99 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 100 for (i=0; i<nbs; i++) { 101 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 102 } 103 #else 104 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 105 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 106 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 107 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 108 #endif 109 PetscFunctionReturn(0); 110 } 111 112 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 113 { \ 114 \ 115 brow = row/bs; \ 116 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 117 rmax = aimax[brow]; nrow = ailen[brow]; \ 118 bcol = col/bs; \ 119 ridx = row % bs; cidx = col % bs; \ 120 low = 0; high = nrow; \ 121 while (high-low > 3) { \ 122 t = (low+high)/2; \ 123 if (rp[t] > bcol) high = t; \ 124 else low = t; \ 125 } \ 126 for (_i=low; _i<high; _i++) { \ 127 if (rp[_i] > bcol) break; \ 128 if (rp[_i] == bcol) { \ 129 bap = ap + bs2*_i + bs*cidx + ridx; \ 130 if (addv == ADD_VALUES) *bap += value; \ 131 else *bap = value; \ 132 goto a_noinsert; \ 133 } \ 134 } \ 135 if (a->nonew == 1) goto a_noinsert; \ 136 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 137 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 138 N = nrow++ - 1; \ 139 /* shift up all the later entries in this row */ \ 140 for (ii=N; ii>=_i; ii--) { \ 141 rp[ii+1] = rp[ii]; \ 142 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 143 } \ 144 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 145 rp[_i] = bcol; \ 146 ap[bs2*_i + bs*cidx + ridx] = value; \ 147 a_noinsert:; \ 148 ailen[brow] = nrow; \ 149 } 150 151 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 152 { \ 153 brow = row/bs; \ 154 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 155 rmax = bimax[brow]; nrow = bilen[brow]; \ 156 bcol = col/bs; \ 157 ridx = row % bs; cidx = col % bs; \ 158 low = 0; high = nrow; \ 159 while (high-low > 3) { \ 160 t = (low+high)/2; \ 161 if (rp[t] > bcol) high = t; \ 162 else low = t; \ 163 } \ 164 for (_i=low; _i<high; _i++) { \ 165 if (rp[_i] > bcol) break; \ 166 if (rp[_i] == bcol) { \ 167 bap = ap + bs2*_i + bs*cidx + ridx; \ 168 if (addv == ADD_VALUES) *bap += value; \ 169 else *bap = value; \ 170 goto b_noinsert; \ 171 } \ 172 } \ 173 if (b->nonew == 1) goto b_noinsert; \ 174 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 175 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 176 CHKMEMQ; \ 177 N = nrow++ - 1; \ 178 /* shift up all the later entries in this row */ \ 179 for (ii=N; ii>=_i; ii--) { \ 180 rp[ii+1] = rp[ii]; \ 181 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 182 } \ 183 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 184 rp[_i] = bcol; \ 185 ap[bs2*_i + bs*cidx + ridx] = value; \ 186 b_noinsert:; \ 187 bilen[brow] = nrow; \ 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatSetValues_MPIBAIJ" 192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 193 { 194 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 195 MatScalar value; 196 PetscBool roworiented = baij->roworiented; 197 PetscErrorCode ierr; 198 PetscInt i,j,row,col; 199 PetscInt rstart_orig=mat->rmap->rstart; 200 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 201 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 202 203 /* Some Variables required in the macro */ 204 Mat A = baij->A; 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 206 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 207 MatScalar *aa =a->a; 208 209 Mat B = baij->B; 210 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 211 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 212 MatScalar *ba =b->a; 213 214 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 215 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 216 MatScalar *ap,*bap; 217 218 PetscFunctionBegin; 219 if (v) PetscValidScalarPointer(v,6); 220 for (i=0; i<m; i++) { 221 if (im[i] < 0) continue; 222 #if defined(PETSC_USE_DEBUG) 223 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 224 #endif 225 if (im[i] >= rstart_orig && im[i] < rend_orig) { 226 row = im[i] - rstart_orig; 227 for (j=0; j<n; j++) { 228 if (in[j] >= cstart_orig && in[j] < cend_orig) { 229 col = in[j] - cstart_orig; 230 if (roworiented) value = v[i*n+j]; 231 else value = v[i+j*m]; 232 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 233 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 234 } else if (in[j] < 0) continue; 235 #if defined(PETSC_USE_DEBUG) 236 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 237 #endif 238 else { 239 if (mat->was_assembled) { 240 if (!baij->colmap) { 241 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 242 } 243 #if defined(PETSC_USE_CTABLE) 244 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 245 col = col - 1; 246 #else 247 col = baij->colmap[in[j]/bs] - 1; 248 #endif 249 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 250 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 251 col = in[j]; 252 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 253 B = baij->B; 254 b = (Mat_SeqBAIJ*)(B)->data; 255 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 256 ba =b->a; 257 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 258 else col += in[j]%bs; 259 } else col = in[j]; 260 if (roworiented) value = v[i*n+j]; 261 else value = v[i+j*m]; 262 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 263 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 264 } 265 } 266 } else { 267 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 268 if (!baij->donotstash) { 269 mat->assembled = PETSC_FALSE; 270 if (roworiented) { 271 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 272 } else { 273 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 274 } 275 } 276 } 277 } 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 283 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 284 { 285 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 286 const PetscScalar *value; 287 MatScalar *barray = baij->barray; 288 PetscBool roworiented = baij->roworiented; 289 PetscErrorCode ierr; 290 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 291 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 292 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 293 294 PetscFunctionBegin; 295 if (!barray) { 296 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 297 baij->barray = barray; 298 } 299 300 if (roworiented) stepval = (n-1)*bs; 301 else stepval = (m-1)*bs; 302 303 for (i=0; i<m; i++) { 304 if (im[i] < 0) continue; 305 #if defined(PETSC_USE_DEBUG) 306 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 307 #endif 308 if (im[i] >= rstart && im[i] < rend) { 309 row = im[i] - rstart; 310 for (j=0; j<n; j++) { 311 /* If NumCol = 1 then a copy is not required */ 312 if ((roworiented) && (n == 1)) { 313 barray = (MatScalar*)v + i*bs2; 314 } else if ((!roworiented) && (m == 1)) { 315 barray = (MatScalar*)v + j*bs2; 316 } else { /* Here a copy is required */ 317 if (roworiented) { 318 value = v + (i*(stepval+bs) + j)*bs; 319 } else { 320 value = v + (j*(stepval+bs) + i)*bs; 321 } 322 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 323 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 324 barray += bs; 325 } 326 barray -= bs2; 327 } 328 329 if (in[j] >= cstart && in[j] < cend) { 330 col = in[j] - cstart; 331 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 332 } else if (in[j] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 335 #endif 336 else { 337 if (mat->was_assembled) { 338 if (!baij->colmap) { 339 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 340 } 341 342 #if defined(PETSC_USE_DEBUG) 343 #if defined(PETSC_USE_CTABLE) 344 { PetscInt data; 345 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 346 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 347 } 348 #else 349 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 350 #endif 351 #endif 352 #if defined(PETSC_USE_CTABLE) 353 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 354 col = (col - 1)/bs; 355 #else 356 col = (baij->colmap[in[j]] - 1)/bs; 357 #endif 358 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 359 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 360 col = in[j]; 361 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 362 } else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscBool roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; 413 else value = v[i+j*m]; 414 /* Look up PetscInto the Hash Table */ 415 key = (row/bs)*Nbs+(col/bs)+1; 416 h1 = HASH(size,key,tmp); 417 418 419 idx = h1; 420 #if defined(PETSC_USE_DEBUG) 421 insert_ct++; 422 total_ct++; 423 if (HT[idx] != key) { 424 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 425 if (idx == size) { 426 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 427 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 #else 431 if (HT[idx] != key) { 432 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 433 if (idx == size) { 434 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 435 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else if (!baij->donotstash) { 444 if (roworiented) { 445 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 446 } else { 447 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 448 } 449 } 450 } 451 #if defined(PETSC_USE_DEBUG) 452 baij->ht_total_ct = total_ct; 453 baij->ht_insert_ct = insert_ct; 454 #endif 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 460 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 461 { 462 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 463 PetscBool roworiented = baij->roworiented; 464 PetscErrorCode ierr; 465 PetscInt i,j,ii,jj,row,col; 466 PetscInt rstart=baij->rstartbs; 467 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 468 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 469 PetscReal tmp; 470 MatScalar **HD = baij->hd,*baij_a; 471 const PetscScalar *v_t,*value; 472 #if defined(PETSC_USE_DEBUG) 473 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 474 #endif 475 476 PetscFunctionBegin; 477 if (roworiented) stepval = (n-1)*bs; 478 else stepval = (m-1)*bs; 479 480 for (i=0; i<m; i++) { 481 #if defined(PETSC_USE_DEBUG) 482 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 483 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 484 #endif 485 row = im[i]; 486 v_t = v + i*nbs2; 487 if (row >= rstart && row < rend) { 488 for (j=0; j<n; j++) { 489 col = in[j]; 490 491 /* Look up into the Hash Table */ 492 key = row*Nbs+col+1; 493 h1 = HASH(size,key,tmp); 494 495 idx = h1; 496 #if defined(PETSC_USE_DEBUG) 497 total_ct++; 498 insert_ct++; 499 if (HT[idx] != key) { 500 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 501 if (idx == size) { 502 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 503 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 504 } 505 } 506 #else 507 if (HT[idx] != key) { 508 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 509 if (idx == size) { 510 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 511 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 512 } 513 } 514 #endif 515 baij_a = HD[idx]; 516 if (roworiented) { 517 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 518 /* value = v + (i*(stepval+bs)+j)*bs; */ 519 value = v_t; 520 v_t += bs; 521 if (addv == ADD_VALUES) { 522 for (ii=0; ii<bs; ii++,value+=stepval) { 523 for (jj=ii; jj<bs2; jj+=bs) { 524 baij_a[jj] += *value++; 525 } 526 } 527 } else { 528 for (ii=0; ii<bs; ii++,value+=stepval) { 529 for (jj=ii; jj<bs2; jj+=bs) { 530 baij_a[jj] = *value++; 531 } 532 } 533 } 534 } else { 535 value = v + j*(stepval+bs)*bs + i*bs; 536 if (addv == ADD_VALUES) { 537 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 538 for (jj=0; jj<bs; jj++) { 539 baij_a[jj] += *value++; 540 } 541 } 542 } else { 543 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 544 for (jj=0; jj<bs; jj++) { 545 baij_a[jj] = *value++; 546 } 547 } 548 } 549 } 550 } 551 } else { 552 if (!baij->donotstash) { 553 if (roworiented) { 554 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 555 } else { 556 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 557 } 558 } 559 } 560 } 561 #if defined(PETSC_USE_DEBUG) 562 baij->ht_total_ct = total_ct; 563 baij->ht_insert_ct = insert_ct; 564 #endif 565 PetscFunctionReturn(0); 566 } 567 568 #undef __FUNCT__ 569 #define __FUNCT__ "MatGetValues_MPIBAIJ" 570 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 571 { 572 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 573 PetscErrorCode ierr; 574 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 575 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 576 577 PetscFunctionBegin; 578 for (i=0; i<m; i++) { 579 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 580 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 581 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 582 row = idxm[i] - bsrstart; 583 for (j=0; j<n; j++) { 584 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 585 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 586 if (idxn[j] >= bscstart && idxn[j] < bscend) { 587 col = idxn[j] - bscstart; 588 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } else { 590 if (!baij->colmap) { 591 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 592 } 593 #if defined(PETSC_USE_CTABLE) 594 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 595 data--; 596 #else 597 data = baij->colmap[idxn[j]/bs]-1; 598 #endif 599 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 600 else { 601 col = data + idxn[j]%bs; 602 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } 604 } 605 } 606 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 607 } 608 PetscFunctionReturn(0); 609 } 610 611 #undef __FUNCT__ 612 #define __FUNCT__ "MatNorm_MPIBAIJ" 613 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 614 { 615 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 616 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 617 PetscErrorCode ierr; 618 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 619 PetscReal sum = 0.0; 620 MatScalar *v; 621 622 PetscFunctionBegin; 623 if (baij->size == 1) { 624 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 625 } else { 626 if (type == NORM_FROBENIUS) { 627 v = amat->a; 628 nz = amat->nz*bs2; 629 for (i=0; i<nz; i++) { 630 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 631 } 632 v = bmat->a; 633 nz = bmat->nz*bs2; 634 for (i=0; i<nz; i++) { 635 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 636 } 637 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 638 *nrm = PetscSqrtReal(*nrm); 639 } else if (type == NORM_1) { /* max column sum */ 640 PetscReal *tmp,*tmp2; 641 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 642 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 643 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 644 v = amat->a; jj = amat->j; 645 for (i=0; i<amat->nz; i++) { 646 for (j=0; j<bs; j++) { 647 col = bs*(cstart + *jj) + j; /* column index */ 648 for (row=0; row<bs; row++) { 649 tmp[col] += PetscAbsScalar(*v); v++; 650 } 651 } 652 jj++; 653 } 654 v = bmat->a; jj = bmat->j; 655 for (i=0; i<bmat->nz; i++) { 656 for (j=0; j<bs; j++) { 657 col = bs*garray[*jj] + j; 658 for (row=0; row<bs; row++) { 659 tmp[col] += PetscAbsScalar(*v); v++; 660 } 661 } 662 jj++; 663 } 664 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 665 *nrm = 0.0; 666 for (j=0; j<mat->cmap->N; j++) { 667 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 668 } 669 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 670 } else if (type == NORM_INFINITY) { /* max row sum */ 671 PetscReal *sums; 672 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 673 sum = 0.0; 674 for (j=0; j<amat->mbs; j++) { 675 for (row=0; row<bs; row++) sums[row] = 0.0; 676 v = amat->a + bs2*amat->i[j]; 677 nz = amat->i[j+1]-amat->i[j]; 678 for (i=0; i<nz; i++) { 679 for (col=0; col<bs; col++) { 680 for (row=0; row<bs; row++) { 681 sums[row] += PetscAbsScalar(*v); v++; 682 } 683 } 684 } 685 v = bmat->a + bs2*bmat->i[j]; 686 nz = bmat->i[j+1]-bmat->i[j]; 687 for (i=0; i<nz; i++) { 688 for (col=0; col<bs; col++) { 689 for (row=0; row<bs; row++) { 690 sums[row] += PetscAbsScalar(*v); v++; 691 } 692 } 693 } 694 for (row=0; row<bs; row++) { 695 if (sums[row] > sum) sum = sums[row]; 696 } 697 } 698 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 699 ierr = PetscFree(sums);CHKERRQ(ierr); 700 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 701 } 702 PetscFunctionReturn(0); 703 } 704 705 /* 706 Creates the hash table, and sets the table 707 This table is created only once. 708 If new entried need to be added to the matrix 709 then the hash table has to be destroyed and 710 recreated. 711 */ 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 714 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 715 { 716 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 717 Mat A = baij->A,B=baij->B; 718 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 719 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 720 PetscErrorCode ierr; 721 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 722 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 723 PetscInt *HT,key; 724 MatScalar **HD; 725 PetscReal tmp; 726 #if defined(PETSC_USE_INFO) 727 PetscInt ct=0,max=0; 728 #endif 729 730 PetscFunctionBegin; 731 if (baij->ht) PetscFunctionReturn(0); 732 733 baij->ht_size = (PetscInt)(factor*nz); 734 ht_size = baij->ht_size; 735 736 /* Allocate Memory for Hash Table */ 737 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 738 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 739 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 740 HD = baij->hd; 741 HT = baij->ht; 742 743 /* Loop Over A */ 744 for (i=0; i<a->mbs; i++) { 745 for (j=ai[i]; j<ai[i+1]; j++) { 746 row = i+rstart; 747 col = aj[j]+cstart; 748 749 key = row*Nbs + col + 1; 750 h1 = HASH(ht_size,key,tmp); 751 for (k=0; k<ht_size; k++) { 752 if (!HT[(h1+k)%ht_size]) { 753 HT[(h1+k)%ht_size] = key; 754 HD[(h1+k)%ht_size] = a->a + j*bs2; 755 break; 756 #if defined(PETSC_USE_INFO) 757 } else { 758 ct++; 759 #endif 760 } 761 } 762 #if defined(PETSC_USE_INFO) 763 if (k> max) max = k; 764 #endif 765 } 766 } 767 /* Loop Over B */ 768 for (i=0; i<b->mbs; i++) { 769 for (j=bi[i]; j<bi[i+1]; j++) { 770 row = i+rstart; 771 col = garray[bj[j]]; 772 key = row*Nbs + col + 1; 773 h1 = HASH(ht_size,key,tmp); 774 for (k=0; k<ht_size; k++) { 775 if (!HT[(h1+k)%ht_size]) { 776 HT[(h1+k)%ht_size] = key; 777 HD[(h1+k)%ht_size] = b->a + j*bs2; 778 break; 779 #if defined(PETSC_USE_INFO) 780 } else { 781 ct++; 782 #endif 783 } 784 } 785 #if defined(PETSC_USE_INFO) 786 if (k> max) max = k; 787 #endif 788 } 789 } 790 791 /* Print Summary */ 792 #if defined(PETSC_USE_INFO) 793 for (i=0,j=0; i<ht_size; i++) { 794 if (HT[i]) j++; 795 } 796 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 797 #endif 798 PetscFunctionReturn(0); 799 } 800 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 803 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 804 { 805 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 806 PetscErrorCode ierr; 807 PetscInt nstash,reallocs; 808 InsertMode addv; 809 810 PetscFunctionBegin; 811 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 812 813 /* make sure all processors are either in INSERTMODE or ADDMODE */ 814 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 815 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 816 mat->insertmode = addv; /* in case this processor had no cache */ 817 818 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 819 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 820 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 821 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 822 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 823 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNCT__ 828 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 829 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 830 { 831 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 832 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 833 PetscErrorCode ierr; 834 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 835 PetscInt *row,*col; 836 PetscBool r1,r2,r3,other_disassembled; 837 MatScalar *val; 838 InsertMode addv = mat->insertmode; 839 PetscMPIInt n; 840 841 PetscFunctionBegin; 842 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 843 if (!baij->donotstash && !mat->nooffprocentries) { 844 while (1) { 845 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 846 if (!flg) break; 847 848 for (i=0; i<n;) { 849 /* Now identify the consecutive vals belonging to the same row */ 850 for (j=i,rstart=row[j]; j<n; j++) { 851 if (row[j] != rstart) break; 852 } 853 if (j < n) ncols = j-i; 854 else ncols = n-i; 855 /* Now assemble all these values with a single function call */ 856 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 857 i = j; 858 } 859 } 860 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 861 /* Now process the block-stash. Since the values are stashed column-oriented, 862 set the roworiented flag to column oriented, and after MatSetValues() 863 restore the original flags */ 864 r1 = baij->roworiented; 865 r2 = a->roworiented; 866 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 867 868 baij->roworiented = PETSC_FALSE; 869 a->roworiented = PETSC_FALSE; 870 871 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 872 while (1) { 873 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 874 if (!flg) break; 875 876 for (i=0; i<n;) { 877 /* Now identify the consecutive vals belonging to the same row */ 878 for (j=i,rstart=row[j]; j<n; j++) { 879 if (row[j] != rstart) break; 880 } 881 if (j < n) ncols = j-i; 882 else ncols = n-i; 883 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 884 i = j; 885 } 886 } 887 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 888 889 baij->roworiented = r1; 890 a->roworiented = r2; 891 892 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 893 } 894 895 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 896 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 897 898 /* determine if any processor has disassembled, if so we must 899 also disassemble ourselfs, in order that we may reassemble. */ 900 /* 901 if nonzero structure of submatrix B cannot change then we know that 902 no processor disassembled thus we can skip this stuff 903 */ 904 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 905 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 906 if (mat->was_assembled && !other_disassembled) { 907 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 908 } 909 } 910 911 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 912 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 913 } 914 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 915 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 916 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 917 918 #if defined(PETSC_USE_INFO) 919 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 920 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 921 922 baij->ht_total_ct = 0; 923 baij->ht_insert_ct = 0; 924 } 925 #endif 926 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 927 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 928 929 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 930 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 931 } 932 933 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 934 935 baij->rowvalues = 0; 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 941 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 942 { 943 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 944 PetscErrorCode ierr; 945 PetscMPIInt size = baij->size,rank = baij->rank; 946 PetscInt bs = mat->rmap->bs; 947 PetscBool iascii,isdraw; 948 PetscViewer sviewer; 949 PetscViewerFormat format; 950 951 PetscFunctionBegin; 952 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 953 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 954 if (iascii) { 955 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 956 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 957 MatInfo info; 958 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 959 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 961 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 962 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 963 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 964 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 965 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 966 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 967 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 968 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 969 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 970 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } else if (format == PETSC_VIEWER_ASCII_INFO) { 973 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 976 PetscFunctionReturn(0); 977 } 978 } 979 980 if (isdraw) { 981 PetscDraw draw; 982 PetscBool isnull; 983 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 984 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 985 } 986 987 if (size == 1) { 988 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 989 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 990 } else { 991 /* assemble the entire matrix onto first processor. */ 992 Mat A; 993 Mat_SeqBAIJ *Aloc; 994 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 995 MatScalar *a; 996 997 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 998 /* Perhaps this should be the type of mat? */ 999 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1000 if (!rank) { 1001 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1002 } else { 1003 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1004 } 1005 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1006 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1007 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1008 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1009 1010 /* copy over the A part */ 1011 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1012 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1013 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1014 1015 for (i=0; i<mbs; i++) { 1016 rvals[0] = bs*(baij->rstartbs + i); 1017 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1018 for (j=ai[i]; j<ai[i+1]; j++) { 1019 col = (baij->cstartbs+aj[j])*bs; 1020 for (k=0; k<bs; k++) { 1021 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1022 col++; a += bs; 1023 } 1024 } 1025 } 1026 /* copy over the B part */ 1027 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1028 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1029 for (i=0; i<mbs; i++) { 1030 rvals[0] = bs*(baij->rstartbs + i); 1031 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1032 for (j=ai[i]; j<ai[i+1]; j++) { 1033 col = baij->garray[aj[j]]*bs; 1034 for (k=0; k<bs; k++) { 1035 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1036 col++; a += bs; 1037 } 1038 } 1039 } 1040 ierr = PetscFree(rvals);CHKERRQ(ierr); 1041 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1042 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1043 /* 1044 Everyone has to call to draw the matrix since the graphics waits are 1045 synchronized across all processors that share the PetscDraw object 1046 */ 1047 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1048 if (!rank) { 1049 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1050 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1051 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1052 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1053 } 1054 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1055 ierr = MatDestroy(&A);CHKERRQ(ierr); 1056 } 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1062 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1063 { 1064 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1065 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1066 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1067 PetscErrorCode ierr; 1068 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1069 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1070 int fd; 1071 PetscScalar *column_values; 1072 FILE *file; 1073 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1074 PetscInt message_count,flowcontrolcount; 1075 1076 PetscFunctionBegin; 1077 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1078 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1079 nz = bs2*(A->nz + B->nz); 1080 rlen = mat->rmap->n; 1081 if (!rank) { 1082 header[0] = MAT_FILE_CLASSID; 1083 header[1] = mat->rmap->N; 1084 header[2] = mat->cmap->N; 1085 1086 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1087 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1088 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1089 /* get largest number of rows any processor has */ 1090 range = mat->rmap->range; 1091 for (i=1; i<size; i++) { 1092 rlen = PetscMax(rlen,range[i+1] - range[i]); 1093 } 1094 } else { 1095 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1096 } 1097 1098 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1099 /* compute lengths of each row */ 1100 for (i=0; i<a->mbs; i++) { 1101 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1102 } 1103 /* store the row lengths to the file */ 1104 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1105 if (!rank) { 1106 MPI_Status status; 1107 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1108 rlen = (range[1] - range[0])/bs; 1109 for (i=0; i<rlen; i++) { 1110 for (j=0; j<bs; j++) { 1111 row_lens[i*bs+j] = bs*crow_lens[i]; 1112 } 1113 } 1114 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1115 for (i=1; i<size; i++) { 1116 rlen = (range[i+1] - range[i])/bs; 1117 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1118 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1119 for (k=0; k<rlen; k++) { 1120 for (j=0; j<bs; j++) { 1121 row_lens[k*bs+j] = bs*crow_lens[k]; 1122 } 1123 } 1124 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1125 } 1126 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1127 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1128 } else { 1129 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1130 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1131 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1132 } 1133 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1134 1135 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1136 information needed to make it for each row from a block row. This does require more communication but still not more than 1137 the communication needed for the nonzero values */ 1138 nzmax = nz; /* space a largest processor needs */ 1139 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1140 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1141 cnt = 0; 1142 for (i=0; i<a->mbs; i++) { 1143 pcnt = cnt; 1144 for (j=B->i[i]; j<B->i[i+1]; j++) { 1145 if ((col = garray[B->j[j]]) > cstart) break; 1146 for (l=0; l<bs; l++) { 1147 column_indices[cnt++] = bs*col+l; 1148 } 1149 } 1150 for (k=A->i[i]; k<A->i[i+1]; k++) { 1151 for (l=0; l<bs; l++) { 1152 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1153 } 1154 } 1155 for (; j<B->i[i+1]; j++) { 1156 for (l=0; l<bs; l++) { 1157 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1158 } 1159 } 1160 len = cnt - pcnt; 1161 for (k=1; k<bs; k++) { 1162 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1163 cnt += len; 1164 } 1165 } 1166 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1167 1168 /* store the columns to the file */ 1169 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1170 if (!rank) { 1171 MPI_Status status; 1172 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1173 for (i=1; i<size; i++) { 1174 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1175 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1176 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1177 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1178 } 1179 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1180 } else { 1181 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1182 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1183 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1184 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1185 } 1186 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1187 1188 /* load up the numerical values */ 1189 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1190 cnt = 0; 1191 for (i=0; i<a->mbs; i++) { 1192 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1193 for (j=B->i[i]; j<B->i[i+1]; j++) { 1194 if (garray[B->j[j]] > cstart) break; 1195 for (l=0; l<bs; l++) { 1196 for (ll=0; ll<bs; ll++) { 1197 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1198 } 1199 } 1200 cnt += bs; 1201 } 1202 for (k=A->i[i]; k<A->i[i+1]; k++) { 1203 for (l=0; l<bs; l++) { 1204 for (ll=0; ll<bs; ll++) { 1205 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1206 } 1207 } 1208 cnt += bs; 1209 } 1210 for (; j<B->i[i+1]; j++) { 1211 for (l=0; l<bs; l++) { 1212 for (ll=0; ll<bs; ll++) { 1213 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1214 } 1215 } 1216 cnt += bs; 1217 } 1218 cnt += (bs-1)*rlen; 1219 } 1220 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1221 1222 /* store the column values to the file */ 1223 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1224 if (!rank) { 1225 MPI_Status status; 1226 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1227 for (i=1; i<size; i++) { 1228 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1229 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1230 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1231 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1232 } 1233 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1234 } else { 1235 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1236 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1237 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1238 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1239 } 1240 ierr = PetscFree(column_values);CHKERRQ(ierr); 1241 1242 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1243 if (file) { 1244 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1245 } 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatView_MPIBAIJ" 1251 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1252 { 1253 PetscErrorCode ierr; 1254 PetscBool iascii,isdraw,issocket,isbinary; 1255 1256 PetscFunctionBegin; 1257 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1258 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1259 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1260 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1261 if (iascii || isdraw || issocket) { 1262 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1263 } else if (isbinary) { 1264 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1265 } 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1271 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1272 { 1273 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 #if defined(PETSC_USE_LOG) 1278 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1279 #endif 1280 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1281 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1282 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1283 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1284 #if defined(PETSC_USE_CTABLE) 1285 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1286 #else 1287 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1288 #endif 1289 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1290 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1291 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1292 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1293 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1294 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1295 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1296 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",NULL);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatMult_MPIBAIJ" 1313 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1314 { 1315 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1316 PetscErrorCode ierr; 1317 PetscInt nt; 1318 1319 PetscFunctionBegin; 1320 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1321 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1322 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1323 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1324 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1325 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1326 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1327 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1333 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1334 { 1335 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1340 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1341 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1342 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1348 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1349 { 1350 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1351 PetscErrorCode ierr; 1352 PetscBool merged; 1353 1354 PetscFunctionBegin; 1355 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1356 /* do nondiagonal part */ 1357 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1358 if (!merged) { 1359 /* send it on its way */ 1360 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1361 /* do local part */ 1362 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1363 /* receive remote parts: note this assumes the values are not actually */ 1364 /* inserted in yy until the next line */ 1365 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1366 } else { 1367 /* do local part */ 1368 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1369 /* send it on its way */ 1370 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1371 /* values actually were received in the Begin() but we need to call this nop */ 1372 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1373 } 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1379 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1380 { 1381 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1382 PetscErrorCode ierr; 1383 1384 PetscFunctionBegin; 1385 /* do nondiagonal part */ 1386 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1387 /* send it on its way */ 1388 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1389 /* do local part */ 1390 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1391 /* receive remote parts: note this assumes the values are not actually */ 1392 /* inserted in yy until the next line, which is true for my implementation*/ 1393 /* but is not perhaps always true. */ 1394 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1395 PetscFunctionReturn(0); 1396 } 1397 1398 /* 1399 This only works correctly for square matrices where the subblock A->A is the 1400 diagonal block 1401 */ 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1404 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1405 { 1406 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1407 PetscErrorCode ierr; 1408 1409 PetscFunctionBegin; 1410 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1411 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatScale_MPIBAIJ" 1417 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1418 { 1419 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1420 PetscErrorCode ierr; 1421 1422 PetscFunctionBegin; 1423 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1424 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1430 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1431 { 1432 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1433 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1434 PetscErrorCode ierr; 1435 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1436 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1437 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1438 1439 PetscFunctionBegin; 1440 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1441 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1442 mat->getrowactive = PETSC_TRUE; 1443 1444 if (!mat->rowvalues && (idx || v)) { 1445 /* 1446 allocate enough space to hold information from the longest row. 1447 */ 1448 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1449 PetscInt max = 1,mbs = mat->mbs,tmp; 1450 for (i=0; i<mbs; i++) { 1451 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1452 if (max < tmp) max = tmp; 1453 } 1454 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1455 } 1456 lrow = row - brstart; 1457 1458 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1459 if (!v) {pvA = 0; pvB = 0;} 1460 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1461 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1462 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1463 nztot = nzA + nzB; 1464 1465 cmap = mat->garray; 1466 if (v || idx) { 1467 if (nztot) { 1468 /* Sort by increasing column numbers, assuming A and B already sorted */ 1469 PetscInt imark = -1; 1470 if (v) { 1471 *v = v_p = mat->rowvalues; 1472 for (i=0; i<nzB; i++) { 1473 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1474 else break; 1475 } 1476 imark = i; 1477 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1478 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1479 } 1480 if (idx) { 1481 *idx = idx_p = mat->rowindices; 1482 if (imark > -1) { 1483 for (i=0; i<imark; i++) { 1484 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1485 } 1486 } else { 1487 for (i=0; i<nzB; i++) { 1488 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1489 else break; 1490 } 1491 imark = i; 1492 } 1493 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1494 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1495 } 1496 } else { 1497 if (idx) *idx = 0; 1498 if (v) *v = 0; 1499 } 1500 } 1501 *nz = nztot; 1502 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1503 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1509 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1510 { 1511 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1512 1513 PetscFunctionBegin; 1514 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1515 baij->getrowactive = PETSC_FALSE; 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1521 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1522 { 1523 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1524 PetscErrorCode ierr; 1525 1526 PetscFunctionBegin; 1527 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1528 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1534 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1535 { 1536 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1537 Mat A = a->A,B = a->B; 1538 PetscErrorCode ierr; 1539 PetscReal isend[5],irecv[5]; 1540 1541 PetscFunctionBegin; 1542 info->block_size = (PetscReal)matin->rmap->bs; 1543 1544 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1545 1546 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1547 isend[3] = info->memory; isend[4] = info->mallocs; 1548 1549 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1550 1551 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1552 isend[3] += info->memory; isend[4] += info->mallocs; 1553 1554 if (flag == MAT_LOCAL) { 1555 info->nz_used = isend[0]; 1556 info->nz_allocated = isend[1]; 1557 info->nz_unneeded = isend[2]; 1558 info->memory = isend[3]; 1559 info->mallocs = isend[4]; 1560 } else if (flag == MAT_GLOBAL_MAX) { 1561 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1562 1563 info->nz_used = irecv[0]; 1564 info->nz_allocated = irecv[1]; 1565 info->nz_unneeded = irecv[2]; 1566 info->memory = irecv[3]; 1567 info->mallocs = irecv[4]; 1568 } else if (flag == MAT_GLOBAL_SUM) { 1569 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1570 1571 info->nz_used = irecv[0]; 1572 info->nz_allocated = irecv[1]; 1573 info->nz_unneeded = irecv[2]; 1574 info->memory = irecv[3]; 1575 info->mallocs = irecv[4]; 1576 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1577 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1578 info->fill_ratio_needed = 0; 1579 info->factor_mallocs = 0; 1580 PetscFunctionReturn(0); 1581 } 1582 1583 #undef __FUNCT__ 1584 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1585 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1586 { 1587 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1588 PetscErrorCode ierr; 1589 1590 PetscFunctionBegin; 1591 switch (op) { 1592 case MAT_NEW_NONZERO_LOCATIONS: 1593 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1594 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1595 case MAT_KEEP_NONZERO_PATTERN: 1596 case MAT_NEW_NONZERO_LOCATION_ERR: 1597 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1598 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1599 break; 1600 case MAT_ROW_ORIENTED: 1601 a->roworiented = flg; 1602 1603 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1604 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1605 break; 1606 case MAT_NEW_DIAGONALS: 1607 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1608 break; 1609 case MAT_IGNORE_OFF_PROC_ENTRIES: 1610 a->donotstash = flg; 1611 break; 1612 case MAT_USE_HASH_TABLE: 1613 a->ht_flag = flg; 1614 break; 1615 case MAT_SYMMETRIC: 1616 case MAT_STRUCTURALLY_SYMMETRIC: 1617 case MAT_HERMITIAN: 1618 case MAT_SYMMETRY_ETERNAL: 1619 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1620 break; 1621 default: 1622 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1623 } 1624 PetscFunctionReturn(0); 1625 } 1626 1627 #undef __FUNCT__ 1628 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1629 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1630 { 1631 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1632 Mat_SeqBAIJ *Aloc; 1633 Mat B; 1634 PetscErrorCode ierr; 1635 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1636 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1637 MatScalar *a; 1638 1639 PetscFunctionBegin; 1640 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1641 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1642 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1643 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1644 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1645 /* Do not know preallocation information, but must set block size */ 1646 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1647 } else { 1648 B = *matout; 1649 } 1650 1651 /* copy over the A part */ 1652 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1653 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1654 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1655 1656 for (i=0; i<mbs; i++) { 1657 rvals[0] = bs*(baij->rstartbs + i); 1658 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1659 for (j=ai[i]; j<ai[i+1]; j++) { 1660 col = (baij->cstartbs+aj[j])*bs; 1661 for (k=0; k<bs; k++) { 1662 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1663 1664 col++; a += bs; 1665 } 1666 } 1667 } 1668 /* copy over the B part */ 1669 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1670 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1671 for (i=0; i<mbs; i++) { 1672 rvals[0] = bs*(baij->rstartbs + i); 1673 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1674 for (j=ai[i]; j<ai[i+1]; j++) { 1675 col = baij->garray[aj[j]]*bs; 1676 for (k=0; k<bs; k++) { 1677 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1678 col++; 1679 a += bs; 1680 } 1681 } 1682 } 1683 ierr = PetscFree(rvals);CHKERRQ(ierr); 1684 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1685 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1686 1687 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1688 else { 1689 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1690 } 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1696 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1697 { 1698 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1699 Mat a = baij->A,b = baij->B; 1700 PetscErrorCode ierr; 1701 PetscInt s1,s2,s3; 1702 1703 PetscFunctionBegin; 1704 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1705 if (rr) { 1706 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1707 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1708 /* Overlap communication with computation. */ 1709 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1710 } 1711 if (ll) { 1712 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1713 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1714 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1715 } 1716 /* scale the diagonal block */ 1717 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1718 1719 if (rr) { 1720 /* Do a scatter end and then right scale the off-diagonal block */ 1721 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1722 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1723 } 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1729 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1730 { 1731 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1732 PetscErrorCode ierr; 1733 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1734 PetscInt i,*owners = A->rmap->range; 1735 PetscInt *nprocs,j,idx,nsends,row; 1736 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1737 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1738 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1739 MPI_Comm comm; 1740 MPI_Request *send_waits,*recv_waits; 1741 MPI_Status recv_status,*send_status; 1742 const PetscScalar *xx; 1743 PetscScalar *bb; 1744 #if defined(PETSC_DEBUG) 1745 PetscBool found = PETSC_FALSE; 1746 #endif 1747 1748 PetscFunctionBegin; 1749 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1750 /* first count number of contributors to each processor */ 1751 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1752 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1753 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1754 j = 0; 1755 for (i=0; i<N; i++) { 1756 if (lastidx > (idx = rows[i])) j = 0; 1757 lastidx = idx; 1758 for (; j<size; j++) { 1759 if (idx >= owners[j] && idx < owners[j+1]) { 1760 nprocs[2*j]++; 1761 nprocs[2*j+1] = 1; 1762 owner[i] = j; 1763 #if defined(PETSC_DEBUG) 1764 found = PETSC_TRUE; 1765 #endif 1766 break; 1767 } 1768 } 1769 #if defined(PETSC_DEBUG) 1770 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1771 found = PETSC_FALSE; 1772 #endif 1773 } 1774 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 1775 1776 if (A->nooffproczerorows) { 1777 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1778 nrecvs = nsends; 1779 nmax = N; 1780 } else { 1781 /* inform other processors of number of messages and max length*/ 1782 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1783 } 1784 1785 /* post receives: */ 1786 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1787 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1788 for (i=0; i<nrecvs; i++) { 1789 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1790 } 1791 1792 /* do sends: 1793 1) starts[i] gives the starting index in svalues for stuff going to 1794 the ith processor 1795 */ 1796 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1797 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1798 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1799 starts[0] = 0; 1800 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1801 for (i=0; i<N; i++) { 1802 svalues[starts[owner[i]]++] = rows[i]; 1803 } 1804 1805 starts[0] = 0; 1806 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1807 count = 0; 1808 for (i=0; i<size; i++) { 1809 if (nprocs[2*i+1]) { 1810 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1811 } 1812 } 1813 ierr = PetscFree(starts);CHKERRQ(ierr); 1814 1815 base = owners[rank]; 1816 1817 /* wait on receives */ 1818 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1819 count = nrecvs; 1820 slen = 0; 1821 while (count) { 1822 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1823 /* unpack receives into our local space */ 1824 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1825 1826 source[imdex] = recv_status.MPI_SOURCE; 1827 lens[imdex] = n; 1828 slen += n; 1829 count--; 1830 } 1831 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1832 1833 /* move the data into the send scatter */ 1834 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1835 count = 0; 1836 for (i=0; i<nrecvs; i++) { 1837 values = rvalues + i*nmax; 1838 for (j=0; j<lens[i]; j++) { 1839 lrows[count++] = values[j] - base; 1840 } 1841 } 1842 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1843 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1844 ierr = PetscFree(owner);CHKERRQ(ierr); 1845 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1846 1847 /* fix right hand side if needed */ 1848 if (x && b) { 1849 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1850 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1851 for (i=0; i<slen; i++) { 1852 bb[lrows[i]] = diag*xx[lrows[i]]; 1853 } 1854 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1855 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1856 } 1857 1858 /* actually zap the local rows */ 1859 /* 1860 Zero the required rows. If the "diagonal block" of the matrix 1861 is square and the user wishes to set the diagonal we use separate 1862 code so that MatSetValues() is not called for each diagonal allocating 1863 new memory, thus calling lots of mallocs and slowing things down. 1864 1865 */ 1866 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1867 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1868 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1869 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1870 } else if (diag != 0.0) { 1871 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1872 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1873 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1874 for (i=0; i<slen; i++) { 1875 row = lrows[i] + rstart_bs; 1876 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1877 } 1878 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1879 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1880 } else { 1881 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1882 } 1883 1884 ierr = PetscFree(lrows);CHKERRQ(ierr); 1885 1886 /* wait on sends */ 1887 if (nsends) { 1888 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1889 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1890 ierr = PetscFree(send_status);CHKERRQ(ierr); 1891 } 1892 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1893 ierr = PetscFree(svalues);CHKERRQ(ierr); 1894 PetscFunctionReturn(0); 1895 } 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1899 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1900 { 1901 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1902 PetscErrorCode ierr; 1903 1904 PetscFunctionBegin; 1905 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1910 1911 #undef __FUNCT__ 1912 #define __FUNCT__ "MatEqual_MPIBAIJ" 1913 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1914 { 1915 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1916 Mat a,b,c,d; 1917 PetscBool flg; 1918 PetscErrorCode ierr; 1919 1920 PetscFunctionBegin; 1921 a = matA->A; b = matA->B; 1922 c = matB->A; d = matB->B; 1923 1924 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1925 if (flg) { 1926 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1927 } 1928 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1929 PetscFunctionReturn(0); 1930 } 1931 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatCopy_MPIBAIJ" 1934 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1935 { 1936 PetscErrorCode ierr; 1937 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1938 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1939 1940 PetscFunctionBegin; 1941 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1942 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1943 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1944 } else { 1945 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1946 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1953 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1954 { 1955 PetscErrorCode ierr; 1956 1957 PetscFunctionBegin; 1958 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1964 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1965 { 1966 PetscErrorCode ierr; 1967 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1968 PetscBLASInt bnz,one=1; 1969 Mat_SeqBAIJ *x,*y; 1970 1971 PetscFunctionBegin; 1972 if (str == SAME_NONZERO_PATTERN) { 1973 PetscScalar alpha = a; 1974 x = (Mat_SeqBAIJ*)xx->A->data; 1975 y = (Mat_SeqBAIJ*)yy->A->data; 1976 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1977 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1978 x = (Mat_SeqBAIJ*)xx->B->data; 1979 y = (Mat_SeqBAIJ*)yy->B->data; 1980 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1981 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1982 } else { 1983 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1984 } 1985 PetscFunctionReturn(0); 1986 } 1987 1988 #undef __FUNCT__ 1989 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1990 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1991 { 1992 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1993 PetscErrorCode ierr; 1994 1995 PetscFunctionBegin; 1996 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1997 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2003 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2004 { 2005 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2006 PetscErrorCode ierr; 2007 2008 PetscFunctionBegin; 2009 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2010 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2011 PetscFunctionReturn(0); 2012 } 2013 2014 #undef __FUNCT__ 2015 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2016 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2017 { 2018 PetscErrorCode ierr; 2019 IS iscol_local; 2020 PetscInt csize; 2021 2022 PetscFunctionBegin; 2023 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2024 if (call == MAT_REUSE_MATRIX) { 2025 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2026 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2027 } else { 2028 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2029 } 2030 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2031 if (call == MAT_INITIAL_MATRIX) { 2032 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2033 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2034 } 2035 PetscFunctionReturn(0); 2036 } 2037 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2040 /* 2041 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2042 in local and then by concatenating the local matrices the end result. 2043 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2044 */ 2045 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2046 { 2047 PetscErrorCode ierr; 2048 PetscMPIInt rank,size; 2049 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2050 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2051 Mat M,Mreuse; 2052 MatScalar *vwork,*aa; 2053 MPI_Comm comm; 2054 IS isrow_new, iscol_new; 2055 PetscBool idflag,allrows, allcols; 2056 Mat_SeqBAIJ *aij; 2057 2058 PetscFunctionBegin; 2059 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2060 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2061 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2062 /* The compression and expansion should be avoided. Doesn't point 2063 out errors, might change the indices, hence buggey */ 2064 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2065 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2066 2067 /* Check for special case: each processor gets entire matrix columns */ 2068 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2069 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2070 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2071 else allcols = PETSC_FALSE; 2072 2073 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2074 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2075 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2076 else allrows = PETSC_FALSE; 2077 2078 if (call == MAT_REUSE_MATRIX) { 2079 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2080 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2081 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2082 } else { 2083 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2084 } 2085 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2086 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2087 /* 2088 m - number of local rows 2089 n - number of columns (same on all processors) 2090 rstart - first row in new global matrix generated 2091 */ 2092 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2093 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2094 m = m/bs; 2095 n = n/bs; 2096 2097 if (call == MAT_INITIAL_MATRIX) { 2098 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2099 ii = aij->i; 2100 jj = aij->j; 2101 2102 /* 2103 Determine the number of non-zeros in the diagonal and off-diagonal 2104 portions of the matrix in order to do correct preallocation 2105 */ 2106 2107 /* first get start and end of "diagonal" columns */ 2108 if (csize == PETSC_DECIDE) { 2109 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2110 if (mglobal == n*bs) { /* square matrix */ 2111 nlocal = m; 2112 } else { 2113 nlocal = n/size + ((n % size) > rank); 2114 } 2115 } else { 2116 nlocal = csize/bs; 2117 } 2118 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2119 rstart = rend - nlocal; 2120 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2121 2122 /* next, compute all the lengths */ 2123 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2124 olens = dlens + m; 2125 for (i=0; i<m; i++) { 2126 jend = ii[i+1] - ii[i]; 2127 olen = 0; 2128 dlen = 0; 2129 for (j=0; j<jend; j++) { 2130 if (*jj < rstart || *jj >= rend) olen++; 2131 else dlen++; 2132 jj++; 2133 } 2134 olens[i] = olen; 2135 dlens[i] = dlen; 2136 } 2137 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2138 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2139 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2140 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2141 ierr = PetscFree(dlens);CHKERRQ(ierr); 2142 } else { 2143 PetscInt ml,nl; 2144 2145 M = *newmat; 2146 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2147 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2148 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2149 /* 2150 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2151 rather than the slower MatSetValues(). 2152 */ 2153 M->was_assembled = PETSC_TRUE; 2154 M->assembled = PETSC_FALSE; 2155 } 2156 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2157 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2158 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2159 ii = aij->i; 2160 jj = aij->j; 2161 aa = aij->a; 2162 for (i=0; i<m; i++) { 2163 row = rstart/bs + i; 2164 nz = ii[i+1] - ii[i]; 2165 cwork = jj; jj += nz; 2166 vwork = aa; aa += nz*bs*bs; 2167 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2168 } 2169 2170 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2171 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2172 *newmat = M; 2173 2174 /* save submatrix used in processor for next request */ 2175 if (call == MAT_INITIAL_MATRIX) { 2176 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2177 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2178 } 2179 PetscFunctionReturn(0); 2180 } 2181 2182 #undef __FUNCT__ 2183 #define __FUNCT__ "MatPermute_MPIBAIJ" 2184 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2185 { 2186 MPI_Comm comm,pcomm; 2187 PetscInt first,rlocal_size,clocal_size,nrows; 2188 const PetscInt *rows; 2189 PetscMPIInt size; 2190 IS crowp,growp,irowp,lrowp,lcolp; 2191 PetscErrorCode ierr; 2192 2193 PetscFunctionBegin; 2194 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2195 /* make a collective version of 'rowp' */ 2196 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2197 if (pcomm==comm) { 2198 crowp = rowp; 2199 } else { 2200 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2201 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2202 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2203 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2204 } 2205 /* collect the global row permutation and invert it */ 2206 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2207 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2208 if (pcomm!=comm) { 2209 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2210 } 2211 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2212 ierr = ISDestroy(&growp);CHKERRQ(ierr); 2213 /* get the local target indices */ 2214 ierr = MatGetOwnershipRange(A,&first,NULL);CHKERRQ(ierr); 2215 ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr); 2216 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2217 ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2218 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2219 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2220 /* the column permutation is so much easier; 2221 make a local version of 'colp' and invert it */ 2222 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2223 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2224 if (size==1) { 2225 lcolp = colp; 2226 } else { 2227 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2228 } 2229 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2230 /* now we just get the submatrix */ 2231 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2232 if (size>1) { 2233 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2234 } 2235 /* clean up */ 2236 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2237 PetscFunctionReturn(0); 2238 } 2239 2240 #undef __FUNCT__ 2241 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2242 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2243 { 2244 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2245 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2246 2247 PetscFunctionBegin; 2248 if (nghosts) *nghosts = B->nbs; 2249 if (ghosts) *ghosts = baij->garray; 2250 PetscFunctionReturn(0); 2251 } 2252 2253 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat); 2254 2255 #undef __FUNCT__ 2256 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2257 /* 2258 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2259 */ 2260 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2261 { 2262 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2263 PetscErrorCode ierr; 2264 PetscMPIInt size,*ncolsonproc,*disp,nn; 2265 PetscInt bs,i,n,nrows,j,k,m,ncols,col; 2266 const PetscInt *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog; 2267 PetscInt nis = iscoloring->n,nctot,*cols; 2268 PetscInt *rowhit,M,cstart,cend,colb; 2269 PetscInt *columnsforrow,l; 2270 IS *isa; 2271 PetscBool done,flg; 2272 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2273 PetscInt ctype=c->ctype; 2274 2275 PetscFunctionBegin; 2276 if (!mat->assembled) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2277 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2278 2279 if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr);} 2280 else ltog = NULL; 2281 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2282 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2283 2284 M = mat->rmap->n/bs; 2285 cstart = mat->cmap->rstart/bs; 2286 cend = mat->cmap->rend/bs; 2287 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2288 c->N = mat->cmap->N/bs; 2289 c->m = mat->rmap->n/bs; 2290 c->rstart = mat->rmap->rstart/bs; 2291 2292 c->ncolors = nis; 2293 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2294 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2295 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2296 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2297 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2298 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2299 2300 /* Allow access to data structures of local part of matrix */ 2301 if (!baij->colmap) { 2302 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2303 } 2304 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2305 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2306 2307 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2308 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2309 2310 for (i=0; i<nis; i++) { 2311 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2312 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2313 2314 c->ncolumns[i] = n; 2315 if (n) { 2316 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2317 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2318 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2319 } else { 2320 c->columns[i] = 0; 2321 } 2322 2323 if (ctype == IS_COLORING_GLOBAL) { 2324 /* Determine the total (parallel) number of columns of this color */ 2325 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 2326 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2327 2328 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 2329 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2330 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 2331 if (!nctot) { 2332 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2333 } 2334 2335 disp[0] = 0; 2336 for (j=1; j<size; j++) { 2337 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2338 } 2339 2340 /* Get complete list of columns for color on each processor */ 2341 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2342 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2343 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2344 } else if (ctype == IS_COLORING_GHOSTED) { 2345 /* Determine local number of columns of this color on this process, including ghost points */ 2346 nctot = n; 2347 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2348 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2349 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2350 2351 /* 2352 Mark all rows affect by these columns 2353 */ 2354 /* Temporary option to allow for debugging/testing */ 2355 flg = PETSC_FALSE; 2356 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 2357 if (!flg) { /*-----------------------------------------------------------------------------*/ 2358 /* crude, fast version */ 2359 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2360 /* loop over columns*/ 2361 for (j=0; j<nctot; j++) { 2362 if (ctype == IS_COLORING_GHOSTED) { 2363 col = ltog[cols[j]]; 2364 } else { 2365 col = cols[j]; 2366 } 2367 if (col >= cstart && col < cend) { 2368 /* column is in diagonal block of matrix */ 2369 rows = A_cj + A_ci[col-cstart]; 2370 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2371 } else { 2372 #if defined(PETSC_USE_CTABLE) 2373 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2374 colb--; 2375 #else 2376 colb = baij->colmap[col] - 1; 2377 #endif 2378 if (colb == -1) { 2379 m = 0; 2380 } else { 2381 colb = colb/bs; 2382 rows = B_cj + B_ci[colb]; 2383 m = B_ci[colb+1] - B_ci[colb]; 2384 } 2385 } 2386 /* loop over columns marking them in rowhit */ 2387 for (k=0; k<m; k++) { 2388 rowhit[*rows++] = col + 1; 2389 } 2390 } 2391 2392 /* count the number of hits */ 2393 nrows = 0; 2394 for (j=0; j<M; j++) { 2395 if (rowhit[j]) nrows++; 2396 } 2397 c->nrows[i] = nrows; 2398 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2399 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2400 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2401 nrows = 0; 2402 for (j=0; j<M; j++) { 2403 if (rowhit[j]) { 2404 c->rows[i][nrows] = j; 2405 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2406 nrows++; 2407 } 2408 } 2409 } else { /*-------------------------------------------------------------------------------*/ 2410 /* slow version, using rowhit as a linked list */ 2411 PetscInt currentcol,fm,mfm; 2412 rowhit[M] = M; 2413 nrows = 0; 2414 /* loop over columns*/ 2415 for (j=0; j<nctot; j++) { 2416 if (ctype == IS_COLORING_GHOSTED) { 2417 col = ltog[cols[j]]; 2418 } else { 2419 col = cols[j]; 2420 } 2421 if (col >= cstart && col < cend) { 2422 /* column is in diagonal block of matrix */ 2423 rows = A_cj + A_ci[col-cstart]; 2424 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2425 } else { 2426 #if defined(PETSC_USE_CTABLE) 2427 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2428 colb--; 2429 #else 2430 colb = baij->colmap[col] - 1; 2431 #endif 2432 if (colb == -1) { 2433 m = 0; 2434 } else { 2435 colb = colb/bs; 2436 rows = B_cj + B_ci[colb]; 2437 m = B_ci[colb+1] - B_ci[colb]; 2438 } 2439 } 2440 2441 /* loop over columns marking them in rowhit */ 2442 fm = M; /* fm points to first entry in linked list */ 2443 for (k=0; k<m; k++) { 2444 currentcol = *rows++; 2445 /* is it already in the list? */ 2446 do { 2447 mfm = fm; 2448 fm = rowhit[fm]; 2449 } while (fm < currentcol); 2450 /* not in list so add it */ 2451 if (fm != currentcol) { 2452 nrows++; 2453 columnsforrow[currentcol] = col; 2454 /* next three lines insert new entry into linked list */ 2455 rowhit[mfm] = currentcol; 2456 rowhit[currentcol] = fm; 2457 fm = currentcol; 2458 /* fm points to present position in list since we know the columns are sorted */ 2459 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2460 } 2461 } 2462 c->nrows[i] = nrows; 2463 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2464 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2465 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2466 /* now store the linked list of rows into c->rows[i] */ 2467 nrows = 0; 2468 fm = rowhit[M]; 2469 do { 2470 c->rows[i][nrows] = fm; 2471 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2472 fm = rowhit[fm]; 2473 } while (fm < M); 2474 } /* ---------------------------------------------------------------------------------------*/ 2475 ierr = PetscFree(cols);CHKERRQ(ierr); 2476 } 2477 2478 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2479 /* 2480 vscale will contain the "diagonal" on processor scalings followed by the off processor 2481 */ 2482 if (ctype == IS_COLORING_GLOBAL) { 2483 PetscInt *garray; 2484 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2485 for (i=0; i<baij->B->cmap->n/bs; i++) { 2486 for (j=0; j<bs; j++) { 2487 garray[i*bs+j] = bs*baij->garray[i]+j; 2488 } 2489 } 2490 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2491 ierr = PetscFree(garray);CHKERRQ(ierr); 2492 CHKMEMQ; 2493 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2494 for (k=0; k<c->ncolors; k++) { 2495 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2496 for (l=0; l<c->nrows[k]; l++) { 2497 col = c->columnsforrow[k][l]; 2498 if (col >= cstart && col < cend) { 2499 /* column is in diagonal block of matrix */ 2500 colb = col - cstart; 2501 } else { 2502 /* column is in "off-processor" part */ 2503 #if defined(PETSC_USE_CTABLE) 2504 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2505 colb--; 2506 #else 2507 colb = baij->colmap[col] - 1; 2508 #endif 2509 colb = colb/bs; 2510 colb += cend - cstart; 2511 } 2512 c->vscaleforrow[k][l] = colb; 2513 } 2514 } 2515 } else if (ctype == IS_COLORING_GHOSTED) { 2516 /* Get gtol mapping */ 2517 PetscInt N = mat->cmap->N,nlocal,*gtol; 2518 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2519 for (i=0; i<N; i++) gtol[i] = -1; 2520 ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr); 2521 for (i=0; i<nlocal; i++) gtol[ltog[i]] = i; 2522 2523 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2524 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2525 for (k=0; k<c->ncolors; k++) { 2526 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2527 for (l=0; l<c->nrows[k]; l++) { 2528 col = c->columnsforrow[k][l]; /* global column index */ 2529 2530 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2531 } 2532 } 2533 ierr = PetscFree(gtol);CHKERRQ(ierr); 2534 } 2535 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2536 2537 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2538 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2539 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2540 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2541 if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr);} 2542 CHKMEMQ; 2543 PetscFunctionReturn(0); 2544 } 2545 2546 #undef __FUNCT__ 2547 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2548 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2549 { 2550 Mat B; 2551 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2552 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2553 Mat_SeqAIJ *b; 2554 PetscErrorCode ierr; 2555 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2556 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2557 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2558 2559 PetscFunctionBegin; 2560 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2561 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2562 2563 /* ---------------------------------------------------------------- 2564 Tell every processor the number of nonzeros per row 2565 */ 2566 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2567 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2568 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2569 } 2570 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2571 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2572 displs = recvcounts + size; 2573 for (i=0; i<size; i++) { 2574 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2575 displs[i] = A->rmap->range[i]/bs; 2576 } 2577 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2578 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2579 #else 2580 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2581 #endif 2582 /* --------------------------------------------------------------- 2583 Create the sequential matrix of the same type as the local block diagonal 2584 */ 2585 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2586 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2587 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2588 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2589 b = (Mat_SeqAIJ*)B->data; 2590 2591 /*-------------------------------------------------------------------- 2592 Copy my part of matrix column indices over 2593 */ 2594 sendcount = ad->nz + bd->nz; 2595 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2596 a_jsendbuf = ad->j; 2597 b_jsendbuf = bd->j; 2598 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2599 cnt = 0; 2600 for (i=0; i<n; i++) { 2601 2602 /* put in lower diagonal portion */ 2603 m = bd->i[i+1] - bd->i[i]; 2604 while (m > 0) { 2605 /* is it above diagonal (in bd (compressed) numbering) */ 2606 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2607 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2608 m--; 2609 } 2610 2611 /* put in diagonal portion */ 2612 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2613 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2614 } 2615 2616 /* put in upper diagonal portion */ 2617 while (m-- > 0) { 2618 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2619 } 2620 } 2621 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2622 2623 /*-------------------------------------------------------------------- 2624 Gather all column indices to all processors 2625 */ 2626 for (i=0; i<size; i++) { 2627 recvcounts[i] = 0; 2628 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2629 recvcounts[i] += lens[j]; 2630 } 2631 } 2632 displs[0] = 0; 2633 for (i=1; i<size; i++) { 2634 displs[i] = displs[i-1] + recvcounts[i-1]; 2635 } 2636 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2637 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2638 #else 2639 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2640 #endif 2641 /*-------------------------------------------------------------------- 2642 Assemble the matrix into useable form (note numerical values not yet set) 2643 */ 2644 /* set the b->ilen (length of each row) values */ 2645 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2646 /* set the b->i indices */ 2647 b->i[0] = 0; 2648 for (i=1; i<=A->rmap->N/bs; i++) { 2649 b->i[i] = b->i[i-1] + lens[i-1]; 2650 } 2651 ierr = PetscFree(lens);CHKERRQ(ierr); 2652 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2653 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2654 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2655 2656 if (A->symmetric) { 2657 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2658 } else if (A->hermitian) { 2659 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2660 } else if (A->structurally_symmetric) { 2661 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2662 } 2663 *newmat = B; 2664 PetscFunctionReturn(0); 2665 } 2666 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "MatSOR_MPIBAIJ" 2669 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2670 { 2671 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2672 PetscErrorCode ierr; 2673 Vec bb1 = 0; 2674 2675 PetscFunctionBegin; 2676 if (flag == SOR_APPLY_UPPER) { 2677 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2678 PetscFunctionReturn(0); 2679 } 2680 2681 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2682 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2683 } 2684 2685 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2686 if (flag & SOR_ZERO_INITIAL_GUESS) { 2687 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2688 its--; 2689 } 2690 2691 while (its--) { 2692 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2693 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2694 2695 /* update rhs: bb1 = bb - B*x */ 2696 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2697 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2698 2699 /* local sweep */ 2700 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2701 } 2702 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2703 if (flag & SOR_ZERO_INITIAL_GUESS) { 2704 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2705 its--; 2706 } 2707 while (its--) { 2708 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2709 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2710 2711 /* update rhs: bb1 = bb - B*x */ 2712 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2713 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2714 2715 /* local sweep */ 2716 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2717 } 2718 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2719 if (flag & SOR_ZERO_INITIAL_GUESS) { 2720 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2721 its--; 2722 } 2723 while (its--) { 2724 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2725 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2726 2727 /* update rhs: bb1 = bb - B*x */ 2728 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2729 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2730 2731 /* local sweep */ 2732 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2733 } 2734 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2735 2736 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2737 PetscFunctionReturn(0); 2738 } 2739 2740 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2741 2742 #undef __FUNCT__ 2743 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2744 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2745 { 2746 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2747 PetscErrorCode ierr; 2748 2749 PetscFunctionBegin; 2750 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2751 PetscFunctionReturn(0); 2752 } 2753 2754 2755 /* -------------------------------------------------------------------*/ 2756 static struct _MatOps MatOps_Values = { 2757 MatSetValues_MPIBAIJ, 2758 MatGetRow_MPIBAIJ, 2759 MatRestoreRow_MPIBAIJ, 2760 MatMult_MPIBAIJ, 2761 /* 4*/ MatMultAdd_MPIBAIJ, 2762 MatMultTranspose_MPIBAIJ, 2763 MatMultTransposeAdd_MPIBAIJ, 2764 0, 2765 0, 2766 0, 2767 /*10*/ 0, 2768 0, 2769 0, 2770 MatSOR_MPIBAIJ, 2771 MatTranspose_MPIBAIJ, 2772 /*15*/ MatGetInfo_MPIBAIJ, 2773 MatEqual_MPIBAIJ, 2774 MatGetDiagonal_MPIBAIJ, 2775 MatDiagonalScale_MPIBAIJ, 2776 MatNorm_MPIBAIJ, 2777 /*20*/ MatAssemblyBegin_MPIBAIJ, 2778 MatAssemblyEnd_MPIBAIJ, 2779 MatSetOption_MPIBAIJ, 2780 MatZeroEntries_MPIBAIJ, 2781 /*24*/ MatZeroRows_MPIBAIJ, 2782 0, 2783 0, 2784 0, 2785 0, 2786 /*29*/ MatSetUp_MPIBAIJ, 2787 0, 2788 0, 2789 0, 2790 0, 2791 /*34*/ MatDuplicate_MPIBAIJ, 2792 0, 2793 0, 2794 0, 2795 0, 2796 /*39*/ MatAXPY_MPIBAIJ, 2797 MatGetSubMatrices_MPIBAIJ, 2798 MatIncreaseOverlap_MPIBAIJ, 2799 MatGetValues_MPIBAIJ, 2800 MatCopy_MPIBAIJ, 2801 /*44*/ 0, 2802 MatScale_MPIBAIJ, 2803 0, 2804 0, 2805 0, 2806 /*49*/ 0, 2807 0, 2808 0, 2809 0, 2810 0, 2811 /*54*/ MatFDColoringCreate_MPIBAIJ, 2812 0, 2813 MatSetUnfactored_MPIBAIJ, 2814 MatPermute_MPIBAIJ, 2815 MatSetValuesBlocked_MPIBAIJ, 2816 /*59*/ MatGetSubMatrix_MPIBAIJ, 2817 MatDestroy_MPIBAIJ, 2818 MatView_MPIBAIJ, 2819 0, 2820 0, 2821 /*64*/ 0, 2822 0, 2823 0, 2824 0, 2825 0, 2826 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2827 0, 2828 0, 2829 0, 2830 0, 2831 /*74*/ 0, 2832 MatFDColoringApply_BAIJ, 2833 0, 2834 0, 2835 0, 2836 /*79*/ 0, 2837 0, 2838 0, 2839 0, 2840 MatLoad_MPIBAIJ, 2841 /*84*/ 0, 2842 0, 2843 0, 2844 0, 2845 0, 2846 /*89*/ 0, 2847 0, 2848 0, 2849 0, 2850 0, 2851 /*94*/ 0, 2852 0, 2853 0, 2854 0, 2855 0, 2856 /*99*/ 0, 2857 0, 2858 0, 2859 0, 2860 0, 2861 /*104*/0, 2862 MatRealPart_MPIBAIJ, 2863 MatImaginaryPart_MPIBAIJ, 2864 0, 2865 0, 2866 /*109*/0, 2867 0, 2868 0, 2869 0, 2870 0, 2871 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2872 0, 2873 MatGetGhosts_MPIBAIJ, 2874 0, 2875 0, 2876 /*119*/0, 2877 0, 2878 0, 2879 0, 2880 0, 2881 /*124*/0, 2882 0, 2883 MatInvertBlockDiagonal_MPIBAIJ 2884 }; 2885 2886 EXTERN_C_BEGIN 2887 #undef __FUNCT__ 2888 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2889 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2890 { 2891 PetscFunctionBegin; 2892 *a = ((Mat_MPIBAIJ*)A->data)->A; 2893 PetscFunctionReturn(0); 2894 } 2895 EXTERN_C_END 2896 2897 EXTERN_C_BEGIN 2898 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2899 EXTERN_C_END 2900 2901 EXTERN_C_BEGIN 2902 #undef __FUNCT__ 2903 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2904 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2905 { 2906 PetscInt m,rstart,cstart,cend; 2907 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2908 const PetscInt *JJ =0; 2909 PetscScalar *values=0; 2910 PetscErrorCode ierr; 2911 2912 PetscFunctionBegin; 2913 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2914 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2915 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2916 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2917 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2918 m = B->rmap->n/bs; 2919 rstart = B->rmap->rstart/bs; 2920 cstart = B->cmap->rstart/bs; 2921 cend = B->cmap->rend/bs; 2922 2923 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2924 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2925 for (i=0; i<m; i++) { 2926 nz = ii[i+1] - ii[i]; 2927 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2928 nz_max = PetscMax(nz_max,nz); 2929 JJ = jj + ii[i]; 2930 for (j=0; j<nz; j++) { 2931 if (*JJ >= cstart) break; 2932 JJ++; 2933 } 2934 d = 0; 2935 for (; j<nz; j++) { 2936 if (*JJ++ >= cend) break; 2937 d++; 2938 } 2939 d_nnz[i] = d; 2940 o_nnz[i] = nz - d; 2941 } 2942 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2943 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2944 2945 values = (PetscScalar*)V; 2946 if (!values) { 2947 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2948 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2949 } 2950 for (i=0; i<m; i++) { 2951 PetscInt row = i + rstart; 2952 PetscInt ncols = ii[i+1] - ii[i]; 2953 const PetscInt *icols = jj + ii[i]; 2954 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2955 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2956 } 2957 2958 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2959 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2960 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2961 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2962 PetscFunctionReturn(0); 2963 } 2964 EXTERN_C_END 2965 2966 #undef __FUNCT__ 2967 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2968 /*@C 2969 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2970 (the default parallel PETSc format). 2971 2972 Collective on MPI_Comm 2973 2974 Input Parameters: 2975 + A - the matrix 2976 . bs - the block size 2977 . i - the indices into j for the start of each local row (starts with zero) 2978 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2979 - v - optional values in the matrix 2980 2981 Level: developer 2982 2983 .keywords: matrix, aij, compressed row, sparse, parallel 2984 2985 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2986 @*/ 2987 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2988 { 2989 PetscErrorCode ierr; 2990 2991 PetscFunctionBegin; 2992 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2993 PetscValidType(B,1); 2994 PetscValidLogicalCollectiveInt(B,bs,2); 2995 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2996 PetscFunctionReturn(0); 2997 } 2998 2999 EXTERN_C_BEGIN 3000 #undef __FUNCT__ 3001 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 3002 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 3003 { 3004 Mat_MPIBAIJ *b; 3005 PetscErrorCode ierr; 3006 PetscInt i; 3007 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3008 3009 PetscFunctionBegin; 3010 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3011 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3012 3013 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3014 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3015 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3016 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3017 3018 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3019 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3020 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3021 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3022 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3023 3024 if (d_nnz) { 3025 for (i=0; i<B->rmap->n/bs; i++) { 3026 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3027 } 3028 } 3029 if (o_nnz) { 3030 for (i=0; i<B->rmap->n/bs; i++) { 3031 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3032 } 3033 } 3034 3035 b = (Mat_MPIBAIJ*)B->data; 3036 b->bs2 = bs*bs; 3037 b->mbs = B->rmap->n/bs; 3038 b->nbs = B->cmap->n/bs; 3039 b->Mbs = B->rmap->N/bs; 3040 b->Nbs = B->cmap->N/bs; 3041 3042 for (i=0; i<=b->size; i++) { 3043 b->rangebs[i] = B->rmap->range[i]/bs; 3044 } 3045 b->rstartbs = B->rmap->rstart/bs; 3046 b->rendbs = B->rmap->rend/bs; 3047 b->cstartbs = B->cmap->rstart/bs; 3048 b->cendbs = B->cmap->rend/bs; 3049 3050 if (!B->preallocated) { 3051 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3052 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3053 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3054 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3055 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3056 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3057 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3058 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3059 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 3060 } 3061 3062 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3063 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3064 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3065 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3066 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3067 B->preallocated = PETSC_TRUE; 3068 PetscFunctionReturn(0); 3069 } 3070 EXTERN_C_END 3071 3072 EXTERN_C_BEGIN 3073 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3074 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3075 EXTERN_C_END 3076 3077 3078 EXTERN_C_BEGIN 3079 #undef __FUNCT__ 3080 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3081 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3082 { 3083 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3084 PetscErrorCode ierr; 3085 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3086 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3087 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3088 3089 PetscFunctionBegin; 3090 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3091 ii[0] = 0; 3092 CHKMEMQ; 3093 for (i=0; i<M; i++) { 3094 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3095 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3096 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3097 /* remove one from count of matrix has diagonal */ 3098 for (j=id[i]; j<id[i+1]; j++) { 3099 if (jd[j] == i) {ii[i+1]--;break;} 3100 } 3101 CHKMEMQ; 3102 } 3103 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3104 cnt = 0; 3105 for (i=0; i<M; i++) { 3106 for (j=io[i]; j<io[i+1]; j++) { 3107 if (garray[jo[j]] > rstart) break; 3108 jj[cnt++] = garray[jo[j]]; 3109 CHKMEMQ; 3110 } 3111 for (k=id[i]; k<id[i+1]; k++) { 3112 if (jd[k] != i) { 3113 jj[cnt++] = rstart + jd[k]; 3114 CHKMEMQ; 3115 } 3116 } 3117 for (; j<io[i+1]; j++) { 3118 jj[cnt++] = garray[jo[j]]; 3119 CHKMEMQ; 3120 } 3121 } 3122 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 3123 PetscFunctionReturn(0); 3124 } 3125 EXTERN_C_END 3126 3127 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3128 EXTERN_C_BEGIN 3129 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3130 EXTERN_C_END 3131 3132 EXTERN_C_BEGIN 3133 #undef __FUNCT__ 3134 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3135 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3136 { 3137 PetscErrorCode ierr; 3138 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3139 Mat B; 3140 Mat_MPIAIJ *b; 3141 3142 PetscFunctionBegin; 3143 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 3144 3145 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3146 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3147 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3148 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3149 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 3150 b = (Mat_MPIAIJ*) B->data; 3151 3152 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3153 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3154 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3155 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3156 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3159 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3160 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3161 if (reuse == MAT_REUSE_MATRIX) { 3162 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3163 } else { 3164 *newmat = B; 3165 } 3166 PetscFunctionReturn(0); 3167 } 3168 EXTERN_C_END 3169 3170 EXTERN_C_BEGIN 3171 #if defined(PETSC_HAVE_MUMPS) 3172 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3173 #endif 3174 EXTERN_C_END 3175 3176 /*MC 3177 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3178 3179 Options Database Keys: 3180 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3181 . -mat_block_size <bs> - set the blocksize used to store the matrix 3182 - -mat_use_hash_table <fact> 3183 3184 Level: beginner 3185 3186 .seealso: MatCreateMPIBAIJ 3187 M*/ 3188 3189 EXTERN_C_BEGIN 3190 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3191 EXTERN_C_END 3192 3193 EXTERN_C_BEGIN 3194 #undef __FUNCT__ 3195 #define __FUNCT__ "MatCreate_MPIBAIJ" 3196 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3197 { 3198 Mat_MPIBAIJ *b; 3199 PetscErrorCode ierr; 3200 PetscBool flg; 3201 3202 PetscFunctionBegin; 3203 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3204 B->data = (void*)b; 3205 3206 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3207 B->assembled = PETSC_FALSE; 3208 3209 B->insertmode = NOT_SET_VALUES; 3210 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3211 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3212 3213 /* build local table of row and column ownerships */ 3214 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3215 3216 /* build cache for off array entries formed */ 3217 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3218 3219 b->donotstash = PETSC_FALSE; 3220 b->colmap = NULL; 3221 b->garray = NULL; 3222 b->roworiented = PETSC_TRUE; 3223 3224 /* stuff used in block assembly */ 3225 b->barray = 0; 3226 3227 /* stuff used for matrix vector multiply */ 3228 b->lvec = 0; 3229 b->Mvctx = 0; 3230 3231 /* stuff for MatGetRow() */ 3232 b->rowindices = 0; 3233 b->rowvalues = 0; 3234 b->getrowactive = PETSC_FALSE; 3235 3236 /* hash table stuff */ 3237 b->ht = 0; 3238 b->hd = 0; 3239 b->ht_size = 0; 3240 b->ht_flag = PETSC_FALSE; 3241 b->ht_fact = 0; 3242 b->ht_total_ct = 0; 3243 b->ht_insert_ct = 0; 3244 3245 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3246 b->ijonly = PETSC_FALSE; 3247 3248 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3249 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3250 if (flg) { 3251 PetscReal fact = 1.39; 3252 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3253 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3254 if (fact <= 1.0) fact = 1.39; 3255 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3256 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3257 } 3258 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3259 3260 #if defined(PETSC_HAVE_MUMPS) 3261 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3262 #endif 3263 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3264 "MatConvert_MPIBAIJ_MPIAdj", 3265 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3266 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3267 "MatConvert_MPIBAIJ_MPIAIJ", 3268 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3269 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3270 "MatConvert_MPIBAIJ_MPISBAIJ", 3271 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3272 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3273 "MatStoreValues_MPIBAIJ", 3274 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3275 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3276 "MatRetrieveValues_MPIBAIJ", 3277 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3278 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3279 "MatGetDiagonalBlock_MPIBAIJ", 3280 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3281 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3282 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3283 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3284 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3285 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3286 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3287 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3288 "MatDiagonalScaleLocal_MPIBAIJ", 3289 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3290 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3291 "MatSetHashTableFactor_MPIBAIJ", 3292 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3293 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3294 "MatConvert_MPIBAIJ_MPIBSTRM", 3295 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3296 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3297 PetscFunctionReturn(0); 3298 } 3299 EXTERN_C_END 3300 3301 /*MC 3302 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3303 3304 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3305 and MATMPIBAIJ otherwise. 3306 3307 Options Database Keys: 3308 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3309 3310 Level: beginner 3311 3312 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3313 M*/ 3314 3315 #undef __FUNCT__ 3316 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3317 /*@C 3318 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3319 (block compressed row). For good matrix assembly performance 3320 the user should preallocate the matrix storage by setting the parameters 3321 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3322 performance can be increased by more than a factor of 50. 3323 3324 Collective on Mat 3325 3326 Input Parameters: 3327 + A - the matrix 3328 . bs - size of blockk 3329 . d_nz - number of block nonzeros per block row in diagonal portion of local 3330 submatrix (same for all local rows) 3331 . d_nnz - array containing the number of block nonzeros in the various block rows 3332 of the in diagonal portion of the local (possibly different for each block 3333 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3334 set it even if it is zero. 3335 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3336 submatrix (same for all local rows). 3337 - o_nnz - array containing the number of nonzeros in the various block rows of the 3338 off-diagonal portion of the local submatrix (possibly different for 3339 each block row) or NULL. 3340 3341 If the *_nnz parameter is given then the *_nz parameter is ignored 3342 3343 Options Database Keys: 3344 + -mat_block_size - size of the blocks to use 3345 - -mat_use_hash_table <fact> 3346 3347 Notes: 3348 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3349 than it must be used on all processors that share the object for that argument. 3350 3351 Storage Information: 3352 For a square global matrix we define each processor's diagonal portion 3353 to be its local rows and the corresponding columns (a square submatrix); 3354 each processor's off-diagonal portion encompasses the remainder of the 3355 local matrix (a rectangular submatrix). 3356 3357 The user can specify preallocated storage for the diagonal part of 3358 the local submatrix with either d_nz or d_nnz (not both). Set 3359 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3360 memory allocation. Likewise, specify preallocated storage for the 3361 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3362 3363 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3364 the figure below we depict these three local rows and all columns (0-11). 3365 3366 .vb 3367 0 1 2 3 4 5 6 7 8 9 10 11 3368 ------------------- 3369 row 3 | o o o d d d o o o o o o 3370 row 4 | o o o d d d o o o o o o 3371 row 5 | o o o d d d o o o o o o 3372 ------------------- 3373 .ve 3374 3375 Thus, any entries in the d locations are stored in the d (diagonal) 3376 submatrix, and any entries in the o locations are stored in the 3377 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3378 stored simply in the MATSEQBAIJ format for compressed row storage. 3379 3380 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3381 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3382 In general, for PDE problems in which most nonzeros are near the diagonal, 3383 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3384 or you will get TERRIBLE performance; see the users' manual chapter on 3385 matrices. 3386 3387 You can call MatGetInfo() to get information on how effective the preallocation was; 3388 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3389 You can also run with the option -info and look for messages with the string 3390 malloc in them to see if additional memory allocation was needed. 3391 3392 Level: intermediate 3393 3394 .keywords: matrix, block, aij, compressed row, sparse, parallel 3395 3396 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3397 @*/ 3398 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3399 { 3400 PetscErrorCode ierr; 3401 3402 PetscFunctionBegin; 3403 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3404 PetscValidType(B,1); 3405 PetscValidLogicalCollectiveInt(B,bs,2); 3406 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3407 PetscFunctionReturn(0); 3408 } 3409 3410 #undef __FUNCT__ 3411 #define __FUNCT__ "MatCreateBAIJ" 3412 /*@C 3413 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3414 (block compressed row). For good matrix assembly performance 3415 the user should preallocate the matrix storage by setting the parameters 3416 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3417 performance can be increased by more than a factor of 50. 3418 3419 Collective on MPI_Comm 3420 3421 Input Parameters: 3422 + comm - MPI communicator 3423 . bs - size of blockk 3424 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3425 This value should be the same as the local size used in creating the 3426 y vector for the matrix-vector product y = Ax. 3427 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3428 This value should be the same as the local size used in creating the 3429 x vector for the matrix-vector product y = Ax. 3430 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3431 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3432 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3433 submatrix (same for all local rows) 3434 . d_nnz - array containing the number of nonzero blocks in the various block rows 3435 of the in diagonal portion of the local (possibly different for each block 3436 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3437 and set it even if it is zero. 3438 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3439 submatrix (same for all local rows). 3440 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3441 off-diagonal portion of the local submatrix (possibly different for 3442 each block row) or NULL. 3443 3444 Output Parameter: 3445 . A - the matrix 3446 3447 Options Database Keys: 3448 + -mat_block_size - size of the blocks to use 3449 - -mat_use_hash_table <fact> 3450 3451 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3452 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3453 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3454 3455 Notes: 3456 If the *_nnz parameter is given then the *_nz parameter is ignored 3457 3458 A nonzero block is any block that as 1 or more nonzeros in it 3459 3460 The user MUST specify either the local or global matrix dimensions 3461 (possibly both). 3462 3463 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3464 than it must be used on all processors that share the object for that argument. 3465 3466 Storage Information: 3467 For a square global matrix we define each processor's diagonal portion 3468 to be its local rows and the corresponding columns (a square submatrix); 3469 each processor's off-diagonal portion encompasses the remainder of the 3470 local matrix (a rectangular submatrix). 3471 3472 The user can specify preallocated storage for the diagonal part of 3473 the local submatrix with either d_nz or d_nnz (not both). Set 3474 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3475 memory allocation. Likewise, specify preallocated storage for the 3476 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3477 3478 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3479 the figure below we depict these three local rows and all columns (0-11). 3480 3481 .vb 3482 0 1 2 3 4 5 6 7 8 9 10 11 3483 ------------------- 3484 row 3 | o o o d d d o o o o o o 3485 row 4 | o o o d d d o o o o o o 3486 row 5 | o o o d d d o o o o o o 3487 ------------------- 3488 .ve 3489 3490 Thus, any entries in the d locations are stored in the d (diagonal) 3491 submatrix, and any entries in the o locations are stored in the 3492 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3493 stored simply in the MATSEQBAIJ format for compressed row storage. 3494 3495 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3496 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3497 In general, for PDE problems in which most nonzeros are near the diagonal, 3498 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3499 or you will get TERRIBLE performance; see the users' manual chapter on 3500 matrices. 3501 3502 Level: intermediate 3503 3504 .keywords: matrix, block, aij, compressed row, sparse, parallel 3505 3506 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3507 @*/ 3508 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3509 { 3510 PetscErrorCode ierr; 3511 PetscMPIInt size; 3512 3513 PetscFunctionBegin; 3514 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3515 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3516 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3517 if (size > 1) { 3518 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3519 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3520 } else { 3521 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3522 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3523 } 3524 PetscFunctionReturn(0); 3525 } 3526 3527 #undef __FUNCT__ 3528 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3529 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3530 { 3531 Mat mat; 3532 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3533 PetscErrorCode ierr; 3534 PetscInt len=0; 3535 3536 PetscFunctionBegin; 3537 *newmat = 0; 3538 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3539 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3540 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3541 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3542 3543 mat->factortype = matin->factortype; 3544 mat->preallocated = PETSC_TRUE; 3545 mat->assembled = PETSC_TRUE; 3546 mat->insertmode = NOT_SET_VALUES; 3547 3548 a = (Mat_MPIBAIJ*)mat->data; 3549 mat->rmap->bs = matin->rmap->bs; 3550 a->bs2 = oldmat->bs2; 3551 a->mbs = oldmat->mbs; 3552 a->nbs = oldmat->nbs; 3553 a->Mbs = oldmat->Mbs; 3554 a->Nbs = oldmat->Nbs; 3555 3556 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3557 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3558 3559 a->size = oldmat->size; 3560 a->rank = oldmat->rank; 3561 a->donotstash = oldmat->donotstash; 3562 a->roworiented = oldmat->roworiented; 3563 a->rowindices = 0; 3564 a->rowvalues = 0; 3565 a->getrowactive = PETSC_FALSE; 3566 a->barray = 0; 3567 a->rstartbs = oldmat->rstartbs; 3568 a->rendbs = oldmat->rendbs; 3569 a->cstartbs = oldmat->cstartbs; 3570 a->cendbs = oldmat->cendbs; 3571 3572 /* hash table stuff */ 3573 a->ht = 0; 3574 a->hd = 0; 3575 a->ht_size = 0; 3576 a->ht_flag = oldmat->ht_flag; 3577 a->ht_fact = oldmat->ht_fact; 3578 a->ht_total_ct = 0; 3579 a->ht_insert_ct = 0; 3580 3581 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3582 if (oldmat->colmap) { 3583 #if defined(PETSC_USE_CTABLE) 3584 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3585 #else 3586 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3587 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3588 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3589 #endif 3590 } else a->colmap = 0; 3591 3592 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3593 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3594 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3595 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3596 } else a->garray = 0; 3597 3598 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3599 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3600 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3601 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3602 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3603 3604 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3605 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3606 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3607 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3608 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3609 *newmat = mat; 3610 PetscFunctionReturn(0); 3611 } 3612 3613 #undef __FUNCT__ 3614 #define __FUNCT__ "MatLoad_MPIBAIJ" 3615 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3616 { 3617 PetscErrorCode ierr; 3618 int fd; 3619 PetscInt i,nz,j,rstart,rend; 3620 PetscScalar *vals,*buf; 3621 MPI_Comm comm; 3622 MPI_Status status; 3623 PetscMPIInt rank,size,maxnz; 3624 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3625 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3626 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3627 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3628 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3629 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3630 3631 PetscFunctionBegin; 3632 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3633 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3634 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3635 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3636 3637 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3638 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3639 if (!rank) { 3640 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3641 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3642 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3643 } 3644 3645 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3646 3647 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3648 M = header[1]; N = header[2]; 3649 3650 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3651 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3652 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3653 3654 /* If global sizes are set, check if they are consistent with that given in the file */ 3655 if (sizesset) { 3656 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3657 } 3658 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3659 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3660 3661 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3662 3663 /* 3664 This code adds extra rows to make sure the number of rows is 3665 divisible by the blocksize 3666 */ 3667 Mbs = M/bs; 3668 extra_rows = bs - M + bs*Mbs; 3669 if (extra_rows == bs) extra_rows = 0; 3670 else Mbs++; 3671 if (extra_rows && !rank) { 3672 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3673 } 3674 3675 /* determine ownership of all rows */ 3676 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3677 mbs = Mbs/size + ((Mbs % size) > rank); 3678 m = mbs*bs; 3679 } else { /* User set */ 3680 m = newmat->rmap->n; 3681 mbs = m/bs; 3682 } 3683 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3684 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3685 3686 /* process 0 needs enough room for process with most rows */ 3687 if (!rank) { 3688 mmax = rowners[1]; 3689 for (i=2; i<=size; i++) { 3690 mmax = PetscMax(mmax,rowners[i]); 3691 } 3692 mmax*=bs; 3693 } else mmax = m; 3694 3695 rowners[0] = 0; 3696 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3697 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3698 rstart = rowners[rank]; 3699 rend = rowners[rank+1]; 3700 3701 /* distribute row lengths to all processors */ 3702 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3703 if (!rank) { 3704 mend = m; 3705 if (size == 1) mend = mend - extra_rows; 3706 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3707 for (j=mend; j<m; j++) locrowlens[j] = 1; 3708 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3709 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3710 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3711 for (j=0; j<m; j++) { 3712 procsnz[0] += locrowlens[j]; 3713 } 3714 for (i=1; i<size; i++) { 3715 mend = browners[i+1] - browners[i]; 3716 if (i == size-1) mend = mend - extra_rows; 3717 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3718 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3719 /* calculate the number of nonzeros on each processor */ 3720 for (j=0; j<browners[i+1]-browners[i]; j++) { 3721 procsnz[i] += rowlengths[j]; 3722 } 3723 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3724 } 3725 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3726 } else { 3727 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3728 } 3729 3730 if (!rank) { 3731 /* determine max buffer needed and allocate it */ 3732 maxnz = procsnz[0]; 3733 for (i=1; i<size; i++) { 3734 maxnz = PetscMax(maxnz,procsnz[i]); 3735 } 3736 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3737 3738 /* read in my part of the matrix column indices */ 3739 nz = procsnz[0]; 3740 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3741 mycols = ibuf; 3742 if (size == 1) nz -= extra_rows; 3743 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3744 if (size == 1) { 3745 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3746 } 3747 3748 /* read in every ones (except the last) and ship off */ 3749 for (i=1; i<size-1; i++) { 3750 nz = procsnz[i]; 3751 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3752 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3753 } 3754 /* read in the stuff for the last proc */ 3755 if (size != 1) { 3756 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3757 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3758 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3759 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3760 } 3761 ierr = PetscFree(cols);CHKERRQ(ierr); 3762 } else { 3763 /* determine buffer space needed for message */ 3764 nz = 0; 3765 for (i=0; i<m; i++) { 3766 nz += locrowlens[i]; 3767 } 3768 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3769 mycols = ibuf; 3770 /* receive message of column indices*/ 3771 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3772 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3773 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3774 } 3775 3776 /* loop over local rows, determining number of off diagonal entries */ 3777 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3778 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3779 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3780 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3781 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3782 rowcount = 0; nzcount = 0; 3783 for (i=0; i<mbs; i++) { 3784 dcount = 0; 3785 odcount = 0; 3786 for (j=0; j<bs; j++) { 3787 kmax = locrowlens[rowcount]; 3788 for (k=0; k<kmax; k++) { 3789 tmp = mycols[nzcount++]/bs; 3790 if (!mask[tmp]) { 3791 mask[tmp] = 1; 3792 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3793 else masked1[dcount++] = tmp; 3794 } 3795 } 3796 rowcount++; 3797 } 3798 3799 dlens[i] = dcount; 3800 odlens[i] = odcount; 3801 3802 /* zero out the mask elements we set */ 3803 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3804 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3805 } 3806 3807 3808 if (!sizesset) { 3809 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3810 } 3811 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3812 3813 if (!rank) { 3814 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3815 /* read in my part of the matrix numerical values */ 3816 nz = procsnz[0]; 3817 vals = buf; 3818 mycols = ibuf; 3819 if (size == 1) nz -= extra_rows; 3820 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3821 if (size == 1) { 3822 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3823 } 3824 3825 /* insert into matrix */ 3826 jj = rstart*bs; 3827 for (i=0; i<m; i++) { 3828 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3829 mycols += locrowlens[i]; 3830 vals += locrowlens[i]; 3831 jj++; 3832 } 3833 /* read in other processors (except the last one) and ship out */ 3834 for (i=1; i<size-1; i++) { 3835 nz = procsnz[i]; 3836 vals = buf; 3837 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3838 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3839 } 3840 /* the last proc */ 3841 if (size != 1) { 3842 nz = procsnz[i] - extra_rows; 3843 vals = buf; 3844 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3845 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3846 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3847 } 3848 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3849 } else { 3850 /* receive numeric values */ 3851 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3852 3853 /* receive message of values*/ 3854 vals = buf; 3855 mycols = ibuf; 3856 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3857 3858 /* insert into matrix */ 3859 jj = rstart*bs; 3860 for (i=0; i<m; i++) { 3861 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3862 mycols += locrowlens[i]; 3863 vals += locrowlens[i]; 3864 jj++; 3865 } 3866 } 3867 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3868 ierr = PetscFree(buf);CHKERRQ(ierr); 3869 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3870 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3871 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3872 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3873 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3874 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3875 PetscFunctionReturn(0); 3876 } 3877 3878 #undef __FUNCT__ 3879 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3880 /*@ 3881 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3882 3883 Input Parameters: 3884 . mat - the matrix 3885 . fact - factor 3886 3887 Not Collective, each process can use a different factor 3888 3889 Level: advanced 3890 3891 Notes: 3892 This can also be set by the command line option: -mat_use_hash_table <fact> 3893 3894 .keywords: matrix, hashtable, factor, HT 3895 3896 .seealso: MatSetOption() 3897 @*/ 3898 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3899 { 3900 PetscErrorCode ierr; 3901 3902 PetscFunctionBegin; 3903 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3904 PetscFunctionReturn(0); 3905 } 3906 3907 EXTERN_C_BEGIN 3908 #undef __FUNCT__ 3909 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3910 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3911 { 3912 Mat_MPIBAIJ *baij; 3913 3914 PetscFunctionBegin; 3915 baij = (Mat_MPIBAIJ*)mat->data; 3916 baij->ht_fact = fact; 3917 PetscFunctionReturn(0); 3918 } 3919 EXTERN_C_END 3920 3921 #undef __FUNCT__ 3922 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3923 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3924 { 3925 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3926 3927 PetscFunctionBegin; 3928 *Ad = a->A; 3929 *Ao = a->B; 3930 *colmap = a->garray; 3931 PetscFunctionReturn(0); 3932 } 3933 3934 /* 3935 Special version for direct calls from Fortran (to eliminate two function call overheads 3936 */ 3937 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3938 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3939 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3940 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3941 #endif 3942 3943 #undef __FUNCT__ 3944 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3945 /*@C 3946 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3947 3948 Collective on Mat 3949 3950 Input Parameters: 3951 + mat - the matrix 3952 . min - number of input rows 3953 . im - input rows 3954 . nin - number of input columns 3955 . in - input columns 3956 . v - numerical values input 3957 - addvin - INSERT_VALUES or ADD_VALUES 3958 3959 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3960 3961 Level: advanced 3962 3963 .seealso: MatSetValuesBlocked() 3964 @*/ 3965 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3966 { 3967 /* convert input arguments to C version */ 3968 Mat mat = *matin; 3969 PetscInt m = *min, n = *nin; 3970 InsertMode addv = *addvin; 3971 3972 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3973 const MatScalar *value; 3974 MatScalar *barray = baij->barray; 3975 PetscBool roworiented = baij->roworiented; 3976 PetscErrorCode ierr; 3977 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3978 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3979 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3980 3981 PetscFunctionBegin; 3982 /* tasks normally handled by MatSetValuesBlocked() */ 3983 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3984 #if defined(PETSC_USE_DEBUG) 3985 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3986 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3987 #endif 3988 if (mat->assembled) { 3989 mat->was_assembled = PETSC_TRUE; 3990 mat->assembled = PETSC_FALSE; 3991 } 3992 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3993 3994 3995 if (!barray) { 3996 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3997 baij->barray = barray; 3998 } 3999 4000 if (roworiented) stepval = (n-1)*bs; 4001 else stepval = (m-1)*bs; 4002 4003 for (i=0; i<m; i++) { 4004 if (im[i] < 0) continue; 4005 #if defined(PETSC_USE_DEBUG) 4006 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4007 #endif 4008 if (im[i] >= rstart && im[i] < rend) { 4009 row = im[i] - rstart; 4010 for (j=0; j<n; j++) { 4011 /* If NumCol = 1 then a copy is not required */ 4012 if ((roworiented) && (n == 1)) { 4013 barray = (MatScalar*)v + i*bs2; 4014 } else if ((!roworiented) && (m == 1)) { 4015 barray = (MatScalar*)v + j*bs2; 4016 } else { /* Here a copy is required */ 4017 if (roworiented) { 4018 value = v + i*(stepval+bs)*bs + j*bs; 4019 } else { 4020 value = v + j*(stepval+bs)*bs + i*bs; 4021 } 4022 for (ii=0; ii<bs; ii++,value+=stepval) { 4023 for (jj=0; jj<bs; jj++) { 4024 *barray++ = *value++; 4025 } 4026 } 4027 barray -=bs2; 4028 } 4029 4030 if (in[j] >= cstart && in[j] < cend) { 4031 col = in[j] - cstart; 4032 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4033 } else if (in[j] < 0) continue; 4034 #if defined(PETSC_USE_DEBUG) 4035 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4036 #endif 4037 else { 4038 if (mat->was_assembled) { 4039 if (!baij->colmap) { 4040 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4041 } 4042 4043 #if defined(PETSC_USE_DEBUG) 4044 #if defined(PETSC_USE_CTABLE) 4045 { PetscInt data; 4046 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4047 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4048 } 4049 #else 4050 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4051 #endif 4052 #endif 4053 #if defined(PETSC_USE_CTABLE) 4054 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4055 col = (col - 1)/bs; 4056 #else 4057 col = (baij->colmap[in[j]] - 1)/bs; 4058 #endif 4059 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4060 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4061 col = in[j]; 4062 } 4063 } else col = in[j]; 4064 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4065 } 4066 } 4067 } else { 4068 if (!baij->donotstash) { 4069 if (roworiented) { 4070 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4071 } else { 4072 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4073 } 4074 } 4075 } 4076 } 4077 4078 /* task normally handled by MatSetValuesBlocked() */ 4079 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4080 PetscFunctionReturn(0); 4081 } 4082 4083 #undef __FUNCT__ 4084 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4085 /*@ 4086 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4087 CSR format the local rows. 4088 4089 Collective on MPI_Comm 4090 4091 Input Parameters: 4092 + comm - MPI communicator 4093 . bs - the block size, only a block size of 1 is supported 4094 . m - number of local rows (Cannot be PETSC_DECIDE) 4095 . n - This value should be the same as the local size used in creating the 4096 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4097 calculated if N is given) For square matrices n is almost always m. 4098 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4099 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4100 . i - row indices 4101 . j - column indices 4102 - a - matrix values 4103 4104 Output Parameter: 4105 . mat - the matrix 4106 4107 Level: intermediate 4108 4109 Notes: 4110 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4111 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4112 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4113 4114 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4115 4116 .keywords: matrix, aij, compressed row, sparse, parallel 4117 4118 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4119 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4120 @*/ 4121 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4122 { 4123 PetscErrorCode ierr; 4124 4125 PetscFunctionBegin; 4126 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4127 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4128 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4129 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4130 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4131 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4132 PetscFunctionReturn(0); 4133 } 4134