1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++) { 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++) { 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig) { 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; 226 else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } else if (in[j] < 0) continue; 230 #if defined(PETSC_USE_DEBUG) 231 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 232 #endif 233 else { 234 if (mat->was_assembled) { 235 if (!baij->colmap) { 236 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 240 col = col - 1; 241 #else 242 col = baij->colmap[in[j]/bs] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 245 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 248 B = baij->B; 249 b = (Mat_SeqBAIJ*)(B)->data; 250 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 251 ba=b->a; 252 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 253 else col += in[j]%bs; 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; 256 else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 mat->assembled = PETSC_FALSE; 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray=baij->barray; 283 PetscBool roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if (!barray) { 291 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) { 296 stepval = (n-1)*bs; 297 } else { 298 stepval = (m-1)*bs; 299 } 300 for (i=0; i<m; i++) { 301 if (im[i] < 0) continue; 302 #if defined(PETSC_USE_DEBUG) 303 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 304 #endif 305 if (im[i] >= rstart && im[i] < rend) { 306 row = im[i] - rstart; 307 for (j=0; j<n; j++) { 308 /* If NumCol = 1 then a copy is not required */ 309 if ((roworiented) && (n == 1)) { 310 barray = (MatScalar*)v + i*bs2; 311 } else if ((!roworiented) && (m == 1)) { 312 barray = (MatScalar*)v + j*bs2; 313 } else { /* Here a copy is required */ 314 if (roworiented) { 315 value = v + (i*(stepval+bs) + j)*bs; 316 } else { 317 value = v + (j*(stepval+bs) + i)*bs; 318 } 319 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 320 for (jj=0; jj<bs; jj++) { 321 barray[jj] = value[jj]; 322 } 323 barray += bs; 324 } 325 barray -= bs2; 326 } 327 328 if (in[j] >= cstart && in[j] < cend) { 329 col = in[j] - cstart; 330 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 331 } 332 else if (in[j] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 335 #endif 336 else { 337 if (mat->was_assembled) { 338 if (!baij->colmap) { 339 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 340 } 341 342 #if defined(PETSC_USE_DEBUG) 343 #if defined (PETSC_USE_CTABLE) 344 { PetscInt data; 345 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 346 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 347 } 348 #else 349 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 350 #endif 351 #endif 352 #if defined (PETSC_USE_CTABLE) 353 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 354 col = (col - 1)/bs; 355 #else 356 col = (baij->colmap[in[j]] - 1)/bs; 357 #endif 358 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 359 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 360 col = in[j]; 361 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 362 } else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscBool roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; 413 else value = v[i+j*m]; 414 /* Look up PetscInto the Hash Table */ 415 key = (row/bs)*Nbs+(col/bs)+1; 416 h1 = HASH(size,key,tmp); 417 418 419 idx = h1; 420 #if defined(PETSC_USE_DEBUG) 421 insert_ct++; 422 total_ct++; 423 if (HT[idx] != key) { 424 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 425 if (idx == size) { 426 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 427 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 #else 431 if (HT[idx] != key) { 432 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 433 if (idx == size) { 434 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 435 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscBool roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 if (roworiented) { 480 stepval = (n-1)*bs; 481 } else { 482 stepval = (m-1)*bs; 483 } 484 for (i=0; i<m; i++) { 485 #if defined(PETSC_USE_DEBUG) 486 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 487 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 488 #endif 489 row = im[i]; 490 v_t = v + i*nbs2; 491 if (row >= rstart && row < rend) { 492 for (j=0; j<n; j++) { 493 col = in[j]; 494 495 /* Look up into the Hash Table */ 496 key = row*Nbs+col+1; 497 h1 = HASH(size,key,tmp); 498 499 idx = h1; 500 #if defined(PETSC_USE_DEBUG) 501 total_ct++; 502 insert_ct++; 503 if (HT[idx] != key) { 504 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 505 if (idx == size) { 506 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 507 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 508 } 509 } 510 #else 511 if (HT[idx] != key) { 512 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 513 if (idx == size) { 514 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 515 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 516 } 517 } 518 #endif 519 baij_a = HD[idx]; 520 if (roworiented) { 521 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 522 /* value = v + (i*(stepval+bs)+j)*bs; */ 523 value = v_t; 524 v_t += bs; 525 if (addv == ADD_VALUES) { 526 for (ii=0; ii<bs; ii++,value+=stepval) { 527 for (jj=ii; jj<bs2; jj+=bs) { 528 baij_a[jj] += *value++; 529 } 530 } 531 } else { 532 for (ii=0; ii<bs; ii++,value+=stepval) { 533 for (jj=ii; jj<bs2; jj+=bs) { 534 baij_a[jj] = *value++; 535 } 536 } 537 } 538 } else { 539 value = v + j*(stepval+bs)*bs + i*bs; 540 if (addv == ADD_VALUES) { 541 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 542 for (jj=0; jj<bs; jj++) { 543 baij_a[jj] += *value++; 544 } 545 } 546 } else { 547 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 548 for (jj=0; jj<bs; jj++) { 549 baij_a[jj] = *value++; 550 } 551 } 552 } 553 } 554 } 555 } else { 556 if (!baij->donotstash) { 557 if (roworiented) { 558 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 559 } else { 560 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 561 } 562 } 563 } 564 } 565 #if defined(PETSC_USE_DEBUG) 566 baij->ht_total_ct = total_ct; 567 baij->ht_insert_ct = insert_ct; 568 #endif 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatGetValues_MPIBAIJ" 574 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 575 { 576 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 577 PetscErrorCode ierr; 578 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 579 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 580 581 PetscFunctionBegin; 582 for (i=0; i<m; i++) { 583 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 584 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 585 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 586 row = idxm[i] - bsrstart; 587 for (j=0; j<n; j++) { 588 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 589 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 590 if (idxn[j] >= bscstart && idxn[j] < bscend) { 591 col = idxn[j] - bscstart; 592 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 593 } else { 594 if (!baij->colmap) { 595 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 596 } 597 #if defined (PETSC_USE_CTABLE) 598 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 599 data --; 600 #else 601 data = baij->colmap[idxn[j]/bs]-1; 602 #endif 603 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 604 else { 605 col = data + idxn[j]%bs; 606 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 607 } 608 } 609 } 610 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 611 } 612 PetscFunctionReturn(0); 613 } 614 615 #undef __FUNCT__ 616 #define __FUNCT__ "MatNorm_MPIBAIJ" 617 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 618 { 619 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 620 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 621 PetscErrorCode ierr; 622 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 623 PetscReal sum = 0.0; 624 MatScalar *v; 625 626 PetscFunctionBegin; 627 if (baij->size == 1) { 628 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 629 } else { 630 if (type == NORM_FROBENIUS) { 631 v = amat->a; 632 nz = amat->nz*bs2; 633 for (i=0; i<nz; i++) { 634 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 635 } 636 v = bmat->a; 637 nz = bmat->nz*bs2; 638 for (i=0; i<nz; i++) { 639 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 640 } 641 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 642 *nrm = PetscSqrtReal(*nrm); 643 } else if (type == NORM_1) { /* max column sum */ 644 PetscReal *tmp,*tmp2; 645 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 646 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 647 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 648 v = amat->a; jj = amat->j; 649 for (i=0; i<amat->nz; i++) { 650 for (j=0; j<bs; j++) { 651 col = bs*(cstart + *jj) + j; /* column index */ 652 for (row=0; row<bs; row++) { 653 tmp[col] += PetscAbsScalar(*v); v++; 654 } 655 } 656 jj++; 657 } 658 v = bmat->a; jj = bmat->j; 659 for (i=0; i<bmat->nz; i++) { 660 for (j=0; j<bs; j++) { 661 col = bs*garray[*jj] + j; 662 for (row=0; row<bs; row++) { 663 tmp[col] += PetscAbsScalar(*v); v++; 664 } 665 } 666 jj++; 667 } 668 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 669 *nrm = 0.0; 670 for (j=0; j<mat->cmap->N; j++) { 671 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 672 } 673 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 674 } else if (type == NORM_INFINITY) { /* max row sum */ 675 PetscReal *sums; 676 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 677 sum = 0.0; 678 for (j=0; j<amat->mbs; j++) { 679 for (row=0; row<bs; row++) sums[row] = 0.0; 680 v = amat->a + bs2*amat->i[j]; 681 nz = amat->i[j+1]-amat->i[j]; 682 for (i=0; i<nz; i++) { 683 for (col=0; col<bs; col++) { 684 for (row=0; row<bs; row++) { 685 sums[row] += PetscAbsScalar(*v); v++; 686 } 687 } 688 } 689 v = bmat->a + bs2*bmat->i[j]; 690 nz = bmat->i[j+1]-bmat->i[j]; 691 for (i=0; i<nz; i++) { 692 for (col=0; col<bs; col++) { 693 for (row=0; row<bs; row++) { 694 sums[row] += PetscAbsScalar(*v); v++; 695 } 696 } 697 } 698 for (row=0; row<bs; row++) { 699 if (sums[row] > sum) sum = sums[row]; 700 } 701 } 702 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 703 ierr = PetscFree(sums);CHKERRQ(ierr); 704 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 705 } 706 PetscFunctionReturn(0); 707 } 708 709 /* 710 Creates the hash table, and sets the table 711 This table is created only once. 712 If new entried need to be added to the matrix 713 then the hash table has to be destroyed and 714 recreated. 715 */ 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 718 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 719 { 720 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 721 Mat A = baij->A,B=baij->B; 722 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 723 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 724 PetscErrorCode ierr; 725 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 726 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 727 PetscInt *HT,key; 728 MatScalar **HD; 729 PetscReal tmp; 730 #if defined(PETSC_USE_INFO) 731 PetscInt ct=0,max=0; 732 #endif 733 734 PetscFunctionBegin; 735 if (baij->ht) PetscFunctionReturn(0); 736 737 baij->ht_size = (PetscInt)(factor*nz); 738 ht_size = baij->ht_size; 739 740 /* Allocate Memory for Hash Table */ 741 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 742 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 743 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 744 HD = baij->hd; 745 HT = baij->ht; 746 747 /* Loop Over A */ 748 for (i=0; i<a->mbs; i++) { 749 for (j=ai[i]; j<ai[i+1]; j++) { 750 row = i+rstart; 751 col = aj[j]+cstart; 752 753 key = row*Nbs + col + 1; 754 h1 = HASH(ht_size,key,tmp); 755 for (k=0; k<ht_size; k++) { 756 if (!HT[(h1+k)%ht_size]) { 757 HT[(h1+k)%ht_size] = key; 758 HD[(h1+k)%ht_size] = a->a + j*bs2; 759 break; 760 #if defined(PETSC_USE_INFO) 761 } else { 762 ct++; 763 #endif 764 } 765 } 766 #if defined(PETSC_USE_INFO) 767 if (k> max) max = k; 768 #endif 769 } 770 } 771 /* Loop Over B */ 772 for (i=0; i<b->mbs; i++) { 773 for (j=bi[i]; j<bi[i+1]; j++) { 774 row = i+rstart; 775 col = garray[bj[j]]; 776 key = row*Nbs + col + 1; 777 h1 = HASH(ht_size,key,tmp); 778 for (k=0; k<ht_size; k++) { 779 if (!HT[(h1+k)%ht_size]) { 780 HT[(h1+k)%ht_size] = key; 781 HD[(h1+k)%ht_size] = b->a + j*bs2; 782 break; 783 #if defined(PETSC_USE_INFO) 784 } else { 785 ct++; 786 #endif 787 } 788 } 789 #if defined(PETSC_USE_INFO) 790 if (k> max) max = k; 791 #endif 792 } 793 } 794 795 /* Print Summary */ 796 #if defined(PETSC_USE_INFO) 797 for (i=0,j=0; i<ht_size; i++) { 798 if (HT[i]) {j++;} 799 } 800 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 801 #endif 802 PetscFunctionReturn(0); 803 } 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 807 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 808 { 809 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 810 PetscErrorCode ierr; 811 PetscInt nstash,reallocs; 812 InsertMode addv; 813 814 PetscFunctionBegin; 815 if (baij->donotstash || mat->nooffprocentries) { 816 PetscFunctionReturn(0); 817 } 818 819 /* make sure all processors are either in INSERTMODE or ADDMODE */ 820 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 821 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 822 mat->insertmode = addv; /* in case this processor had no cache */ 823 824 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 825 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 826 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 827 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 828 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 829 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 835 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 836 { 837 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 838 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 839 PetscErrorCode ierr; 840 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 841 PetscInt *row,*col; 842 PetscBool r1,r2,r3,other_disassembled; 843 MatScalar *val; 844 InsertMode addv = mat->insertmode; 845 PetscMPIInt n; 846 847 PetscFunctionBegin; 848 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 849 if (!baij->donotstash && !mat->nooffprocentries) { 850 while (1) { 851 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 852 if (!flg) break; 853 854 for (i=0; i<n;) { 855 /* Now identify the consecutive vals belonging to the same row */ 856 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 857 if (j < n) ncols = j-i; 858 else ncols = n-i; 859 /* Now assemble all these values with a single function call */ 860 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 861 i = j; 862 } 863 } 864 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 865 /* Now process the block-stash. Since the values are stashed column-oriented, 866 set the roworiented flag to column oriented, and after MatSetValues() 867 restore the original flags */ 868 r1 = baij->roworiented; 869 r2 = a->roworiented; 870 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 871 baij->roworiented = PETSC_FALSE; 872 a->roworiented = PETSC_FALSE; 873 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 874 while (1) { 875 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 876 if (!flg) break; 877 878 for (i=0; i<n;) { 879 /* Now identify the consecutive vals belonging to the same row */ 880 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 881 if (j < n) ncols = j-i; 882 else ncols = n-i; 883 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 884 i = j; 885 } 886 } 887 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 888 baij->roworiented = r1; 889 a->roworiented = r2; 890 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 891 } 892 893 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 894 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 895 896 /* determine if any processor has disassembled, if so we must 897 also disassemble ourselfs, in order that we may reassemble. */ 898 /* 899 if nonzero structure of submatrix B cannot change then we know that 900 no processor disassembled thus we can skip this stuff 901 */ 902 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 903 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 904 if (mat->was_assembled && !other_disassembled) { 905 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 906 } 907 } 908 909 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 910 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 911 } 912 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 913 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 915 916 #if defined(PETSC_USE_INFO) 917 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 918 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 919 baij->ht_total_ct = 0; 920 baij->ht_insert_ct = 0; 921 } 922 #endif 923 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 924 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 925 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 926 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 927 } 928 929 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 930 baij->rowvalues = 0; 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 936 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 937 { 938 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 939 PetscErrorCode ierr; 940 PetscMPIInt size = baij->size,rank = baij->rank; 941 PetscInt bs = mat->rmap->bs; 942 PetscBool iascii,isdraw; 943 PetscViewer sviewer; 944 PetscViewerFormat format; 945 946 PetscFunctionBegin; 947 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 948 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 949 if (iascii) { 950 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 951 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 952 MatInfo info; 953 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 954 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 956 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 957 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 958 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 959 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 960 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 961 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 962 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 963 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 964 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 965 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } else if (format == PETSC_VIEWER_ASCII_INFO) { 968 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 971 PetscFunctionReturn(0); 972 } 973 } 974 975 if (isdraw) { 976 PetscDraw draw; 977 PetscBool isnull; 978 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 979 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 980 } 981 982 if (size == 1) { 983 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 984 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 985 } else { 986 /* assemble the entire matrix onto first processor. */ 987 Mat A; 988 Mat_SeqBAIJ *Aloc; 989 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 990 MatScalar *a; 991 992 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 993 /* Perhaps this should be the type of mat? */ 994 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 995 if (!rank) { 996 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 997 } else { 998 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 999 } 1000 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1001 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1002 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1003 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1004 1005 /* copy over the A part */ 1006 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1007 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1008 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1009 1010 for (i=0; i<mbs; i++) { 1011 rvals[0] = bs*(baij->rstartbs + i); 1012 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1013 for (j=ai[i]; j<ai[i+1]; j++) { 1014 col = (baij->cstartbs+aj[j])*bs; 1015 for (k=0; k<bs; k++) { 1016 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1017 col++; a += bs; 1018 } 1019 } 1020 } 1021 /* copy over the B part */ 1022 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1023 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1024 for (i=0; i<mbs; i++) { 1025 rvals[0] = bs*(baij->rstartbs + i); 1026 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1027 for (j=ai[i]; j<ai[i+1]; j++) { 1028 col = baij->garray[aj[j]]*bs; 1029 for (k=0; k<bs; k++) { 1030 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1031 col++; a += bs; 1032 } 1033 } 1034 } 1035 ierr = PetscFree(rvals);CHKERRQ(ierr); 1036 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1037 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 /* 1039 Everyone has to call to draw the matrix since the graphics waits are 1040 synchronized across all processors that share the PetscDraw object 1041 */ 1042 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1043 if (!rank) { 1044 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1045 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1046 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1047 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1048 } 1049 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1050 ierr = MatDestroy(&A);CHKERRQ(ierr); 1051 } 1052 PetscFunctionReturn(0); 1053 } 1054 1055 #undef __FUNCT__ 1056 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1057 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1058 { 1059 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1060 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1061 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1062 PetscErrorCode ierr; 1063 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1064 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1065 int fd; 1066 PetscScalar *column_values; 1067 FILE *file; 1068 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1069 PetscInt message_count,flowcontrolcount; 1070 1071 PetscFunctionBegin; 1072 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1073 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1074 nz = bs2*(A->nz + B->nz); 1075 rlen = mat->rmap->n; 1076 if (!rank) { 1077 header[0] = MAT_FILE_CLASSID; 1078 header[1] = mat->rmap->N; 1079 header[2] = mat->cmap->N; 1080 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1081 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1082 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1083 /* get largest number of rows any processor has */ 1084 range = mat->rmap->range; 1085 for (i=1; i<size; i++) { 1086 rlen = PetscMax(rlen,range[i+1] - range[i]); 1087 } 1088 } else { 1089 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1090 } 1091 1092 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1093 /* compute lengths of each row */ 1094 for (i=0; i<a->mbs; i++) { 1095 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1096 } 1097 /* store the row lengths to the file */ 1098 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1099 if (!rank) { 1100 MPI_Status status; 1101 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1102 rlen = (range[1] - range[0])/bs; 1103 for (i=0; i<rlen; i++) { 1104 for (j=0; j<bs; j++) { 1105 row_lens[i*bs+j] = bs*crow_lens[i]; 1106 } 1107 } 1108 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1109 for (i=1; i<size; i++) { 1110 rlen = (range[i+1] - range[i])/bs; 1111 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1112 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1113 for (k=0; k<rlen; k++) { 1114 for (j=0; j<bs; j++) { 1115 row_lens[k*bs+j] = bs*crow_lens[k]; 1116 } 1117 } 1118 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1119 } 1120 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1121 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1122 } else { 1123 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1124 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1125 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1126 } 1127 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1128 1129 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1130 information needed to make it for each row from a block row. This does require more communication but still not more than 1131 the communication needed for the nonzero values */ 1132 nzmax = nz; /* space a largest processor needs */ 1133 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1134 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1135 cnt = 0; 1136 for (i=0; i<a->mbs; i++) { 1137 pcnt = cnt; 1138 for (j=B->i[i]; j<B->i[i+1]; j++) { 1139 if ((col = garray[B->j[j]]) > cstart) break; 1140 for (l=0; l<bs; l++) { 1141 column_indices[cnt++] = bs*col+l; 1142 } 1143 } 1144 for (k=A->i[i]; k<A->i[i+1]; k++) { 1145 for (l=0; l<bs; l++) { 1146 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1147 } 1148 } 1149 for (; j<B->i[i+1]; j++) { 1150 for (l=0; l<bs; l++) { 1151 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1152 } 1153 } 1154 len = cnt - pcnt; 1155 for (k=1; k<bs; k++) { 1156 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1157 cnt += len; 1158 } 1159 } 1160 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1161 1162 /* store the columns to the file */ 1163 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1164 if (!rank) { 1165 MPI_Status status; 1166 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1167 for (i=1; i<size; i++) { 1168 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1169 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1170 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1171 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1172 } 1173 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1174 } else { 1175 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1176 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1177 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1178 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1179 } 1180 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1181 1182 /* load up the numerical values */ 1183 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1184 cnt = 0; 1185 for (i=0; i<a->mbs; i++) { 1186 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1187 for (j=B->i[i]; j<B->i[i+1]; j++) { 1188 if (garray[B->j[j]] > cstart) break; 1189 for (l=0; l<bs; l++) { 1190 for (ll=0; ll<bs; ll++) { 1191 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1192 } 1193 } 1194 cnt += bs; 1195 } 1196 for (k=A->i[i]; k<A->i[i+1]; k++) { 1197 for (l=0; l<bs; l++) { 1198 for (ll=0; ll<bs; ll++) { 1199 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1200 } 1201 } 1202 cnt += bs; 1203 } 1204 for (; j<B->i[i+1]; j++) { 1205 for (l=0; l<bs; l++) { 1206 for (ll=0; ll<bs; ll++) { 1207 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1208 } 1209 } 1210 cnt += bs; 1211 } 1212 cnt += (bs-1)*rlen; 1213 } 1214 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1215 1216 /* store the column values to the file */ 1217 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1218 if (!rank) { 1219 MPI_Status status; 1220 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1221 for (i=1; i<size; i++) { 1222 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1223 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1224 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1225 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1226 } 1227 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1228 } else { 1229 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1230 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1231 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1232 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(column_values);CHKERRQ(ierr); 1235 1236 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1237 if (file) { 1238 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1239 } 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatView_MPIBAIJ" 1245 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1246 { 1247 PetscErrorCode ierr; 1248 PetscBool iascii,isdraw,issocket,isbinary; 1249 1250 PetscFunctionBegin; 1251 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1252 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1254 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1255 if (iascii || isdraw || issocket) { 1256 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1257 } else if (isbinary) { 1258 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1259 } 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1265 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1266 { 1267 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1268 PetscErrorCode ierr; 1269 1270 PetscFunctionBegin; 1271 #if defined(PETSC_USE_LOG) 1272 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1273 #endif 1274 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1275 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1276 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1277 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1278 #if defined (PETSC_USE_CTABLE) 1279 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1280 #else 1281 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1282 #endif 1283 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1284 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1285 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1286 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1287 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1288 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1289 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1290 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1291 1292 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatMult_MPIBAIJ" 1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1308 { 1309 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1310 PetscErrorCode ierr; 1311 PetscInt nt; 1312 1313 PetscFunctionBegin; 1314 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1315 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1316 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1317 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1318 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1319 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1320 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1321 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1328 { 1329 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1335 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1343 { 1344 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1345 PetscErrorCode ierr; 1346 PetscBool merged; 1347 1348 PetscFunctionBegin; 1349 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1350 /* do nondiagonal part */ 1351 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1352 if (!merged) { 1353 /* send it on its way */ 1354 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1355 /* do local part */ 1356 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1357 /* receive remote parts: note this assumes the values are not actually */ 1358 /* inserted in yy until the next line */ 1359 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1360 } else { 1361 /* do local part */ 1362 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1363 /* send it on its way */ 1364 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1365 /* values actually were received in the Begin() but we need to call this nop */ 1366 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 } 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1374 { 1375 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1376 PetscErrorCode ierr; 1377 1378 PetscFunctionBegin; 1379 /* do nondiagonal part */ 1380 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1381 /* send it on its way */ 1382 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 /* do local part */ 1384 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1385 /* receive remote parts: note this assumes the values are not actually */ 1386 /* inserted in yy until the next line, which is true for my implementation*/ 1387 /* but is not perhaps always true. */ 1388 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 /* 1393 This only works correctly for square matrices where the subblock A->A is the 1394 diagonal block 1395 */ 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1399 { 1400 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1405 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatScale_MPIBAIJ" 1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1412 { 1413 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1414 PetscErrorCode ierr; 1415 1416 PetscFunctionBegin; 1417 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1418 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1425 { 1426 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1427 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1428 PetscErrorCode ierr; 1429 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1430 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1431 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1432 1433 PetscFunctionBegin; 1434 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1435 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1436 mat->getrowactive = PETSC_TRUE; 1437 1438 if (!mat->rowvalues && (idx || v)) { 1439 /* 1440 allocate enough space to hold information from the longest row. 1441 */ 1442 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1443 PetscInt max = 1,mbs = mat->mbs,tmp; 1444 for (i=0; i<mbs; i++) { 1445 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1446 if (max < tmp) { max = tmp; } 1447 } 1448 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1449 } 1450 lrow = row - brstart; 1451 1452 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1453 if (!v) {pvA = 0; pvB = 0;} 1454 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1455 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1456 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1457 nztot = nzA + nzB; 1458 1459 cmap = mat->garray; 1460 if (v || idx) { 1461 if (nztot) { 1462 /* Sort by increasing column numbers, assuming A and B already sorted */ 1463 PetscInt imark = -1; 1464 if (v) { 1465 *v = v_p = mat->rowvalues; 1466 for (i=0; i<nzB; i++) { 1467 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1468 else break; 1469 } 1470 imark = i; 1471 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1472 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1473 } 1474 if (idx) { 1475 *idx = idx_p = mat->rowindices; 1476 if (imark > -1) { 1477 for (i=0; i<imark; i++) { 1478 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1479 } 1480 } else { 1481 for (i=0; i<nzB; i++) { 1482 if (cmap[cworkB[i]/bs] < cstart) 1483 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1484 else break; 1485 } 1486 imark = i; 1487 } 1488 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1489 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1490 } 1491 } else { 1492 if (idx) *idx = 0; 1493 if (v) *v = 0; 1494 } 1495 } 1496 *nz = nztot; 1497 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1498 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1504 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1505 { 1506 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1507 1508 PetscFunctionBegin; 1509 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1510 baij->getrowactive = PETSC_FALSE; 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1516 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1517 { 1518 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1519 PetscErrorCode ierr; 1520 1521 PetscFunctionBegin; 1522 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1523 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1529 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1530 { 1531 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1532 Mat A = a->A,B = a->B; 1533 PetscErrorCode ierr; 1534 PetscReal isend[5],irecv[5]; 1535 1536 PetscFunctionBegin; 1537 info->block_size = (PetscReal)matin->rmap->bs; 1538 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1539 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1540 isend[3] = info->memory; isend[4] = info->mallocs; 1541 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1542 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1543 isend[3] += info->memory; isend[4] += info->mallocs; 1544 if (flag == MAT_LOCAL) { 1545 info->nz_used = isend[0]; 1546 info->nz_allocated = isend[1]; 1547 info->nz_unneeded = isend[2]; 1548 info->memory = isend[3]; 1549 info->mallocs = isend[4]; 1550 } else if (flag == MAT_GLOBAL_MAX) { 1551 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1552 info->nz_used = irecv[0]; 1553 info->nz_allocated = irecv[1]; 1554 info->nz_unneeded = irecv[2]; 1555 info->memory = irecv[3]; 1556 info->mallocs = irecv[4]; 1557 } else if (flag == MAT_GLOBAL_SUM) { 1558 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1559 info->nz_used = irecv[0]; 1560 info->nz_allocated = irecv[1]; 1561 info->nz_unneeded = irecv[2]; 1562 info->memory = irecv[3]; 1563 info->mallocs = irecv[4]; 1564 } else SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1565 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1566 info->fill_ratio_needed = 0; 1567 info->factor_mallocs = 0; 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1573 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1574 { 1575 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1576 PetscErrorCode ierr; 1577 1578 PetscFunctionBegin; 1579 switch (op) { 1580 case MAT_NEW_NONZERO_LOCATIONS: 1581 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1582 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1583 case MAT_KEEP_NONZERO_PATTERN: 1584 case MAT_NEW_NONZERO_LOCATION_ERR: 1585 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1586 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1587 break; 1588 case MAT_ROW_ORIENTED: 1589 a->roworiented = flg; 1590 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1591 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1592 break; 1593 case MAT_NEW_DIAGONALS: 1594 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1595 break; 1596 case MAT_IGNORE_OFF_PROC_ENTRIES: 1597 a->donotstash = flg; 1598 break; 1599 case MAT_USE_HASH_TABLE: 1600 a->ht_flag = flg; 1601 break; 1602 case MAT_SYMMETRIC: 1603 case MAT_STRUCTURALLY_SYMMETRIC: 1604 case MAT_HERMITIAN: 1605 case MAT_SYMMETRY_ETERNAL: 1606 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1607 break; 1608 default: 1609 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1610 } 1611 PetscFunctionReturn(0); 1612 } 1613 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1616 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1617 { 1618 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1619 Mat_SeqBAIJ *Aloc; 1620 Mat B; 1621 PetscErrorCode ierr; 1622 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1623 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1624 MatScalar *a; 1625 1626 PetscFunctionBegin; 1627 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1628 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1629 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1630 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1631 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1632 /* Do not know preallocation information, but must set block size */ 1633 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr); 1634 } else { 1635 B = *matout; 1636 } 1637 1638 /* copy over the A part */ 1639 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1640 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1641 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1642 1643 for (i=0; i<mbs; i++) { 1644 rvals[0] = bs*(baij->rstartbs + i); 1645 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1646 for (j=ai[i]; j<ai[i+1]; j++) { 1647 col = (baij->cstartbs+aj[j])*bs; 1648 for (k=0; k<bs; k++) { 1649 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1650 col++; a += bs; 1651 } 1652 } 1653 } 1654 /* copy over the B part */ 1655 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1656 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1657 for (i=0; i<mbs; i++) { 1658 rvals[0] = bs*(baij->rstartbs + i); 1659 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1660 for (j=ai[i]; j<ai[i+1]; j++) { 1661 col = baij->garray[aj[j]]*bs; 1662 for (k=0; k<bs; k++) { 1663 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1664 col++; a += bs; 1665 } 1666 } 1667 } 1668 ierr = PetscFree(rvals);CHKERRQ(ierr); 1669 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1670 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1671 1672 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1673 *matout = B; 1674 } else { 1675 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1676 } 1677 PetscFunctionReturn(0); 1678 } 1679 1680 #undef __FUNCT__ 1681 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1682 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1683 { 1684 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1685 Mat a = baij->A,b = baij->B; 1686 PetscErrorCode ierr; 1687 PetscInt s1,s2,s3; 1688 1689 PetscFunctionBegin; 1690 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1691 if (rr) { 1692 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1693 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1694 /* Overlap communication with computation. */ 1695 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 } 1697 if (ll) { 1698 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1699 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1700 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1701 } 1702 /* scale the diagonal block */ 1703 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1704 1705 if (rr) { 1706 /* Do a scatter end and then right scale the off-diagonal block */ 1707 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1708 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1709 } 1710 1711 PetscFunctionReturn(0); 1712 } 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1716 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1717 { 1718 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1719 PetscErrorCode ierr; 1720 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1721 PetscInt i,*owners = A->rmap->range; 1722 PetscInt *nprocs,j,idx,nsends,row; 1723 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1724 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1725 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1726 MPI_Comm comm = ((PetscObject)A)->comm; 1727 MPI_Request *send_waits,*recv_waits; 1728 MPI_Status recv_status,*send_status; 1729 const PetscScalar *xx; 1730 PetscScalar *bb; 1731 #if defined(PETSC_DEBUG) 1732 PetscBool found = PETSC_FALSE; 1733 #endif 1734 1735 PetscFunctionBegin; 1736 /* first count number of contributors to each processor */ 1737 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1738 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1739 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1740 j = 0; 1741 for (i=0; i<N; i++) { 1742 if (lastidx > (idx = rows[i])) j = 0; 1743 lastidx = idx; 1744 for (; j<size; j++) { 1745 if (idx >= owners[j] && idx < owners[j+1]) { 1746 nprocs[2*j]++; 1747 nprocs[2*j+1] = 1; 1748 owner[i] = j; 1749 #if defined(PETSC_DEBUG) 1750 found = PETSC_TRUE; 1751 #endif 1752 break; 1753 } 1754 } 1755 #if defined(PETSC_DEBUG) 1756 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1757 found = PETSC_FALSE; 1758 #endif 1759 } 1760 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1761 1762 if (A->nooffproczerorows) { 1763 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1764 nrecvs = nsends; 1765 nmax = N; 1766 } else { 1767 /* inform other processors of number of messages and max length*/ 1768 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1769 } 1770 1771 /* post receives: */ 1772 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1773 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1774 for (i=0; i<nrecvs; i++) { 1775 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1776 } 1777 1778 /* do sends: 1779 1) starts[i] gives the starting index in svalues for stuff going to 1780 the ith processor 1781 */ 1782 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1783 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1784 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1785 starts[0] = 0; 1786 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1787 for (i=0; i<N; i++) { 1788 svalues[starts[owner[i]]++] = rows[i]; 1789 } 1790 1791 starts[0] = 0; 1792 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1793 count = 0; 1794 for (i=0; i<size; i++) { 1795 if (nprocs[2*i+1]) { 1796 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1797 } 1798 } 1799 ierr = PetscFree(starts);CHKERRQ(ierr); 1800 1801 base = owners[rank]; 1802 1803 /* wait on receives */ 1804 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1805 count = nrecvs; 1806 slen = 0; 1807 while (count) { 1808 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1809 /* unpack receives into our local space */ 1810 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1811 source[imdex] = recv_status.MPI_SOURCE; 1812 lens[imdex] = n; 1813 slen += n; 1814 count--; 1815 } 1816 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1817 1818 /* move the data into the send scatter */ 1819 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1820 count = 0; 1821 for (i=0; i<nrecvs; i++) { 1822 values = rvalues + i*nmax; 1823 for (j=0; j<lens[i]; j++) { 1824 lrows[count++] = values[j] - base; 1825 } 1826 } 1827 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1828 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1829 ierr = PetscFree(owner);CHKERRQ(ierr); 1830 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1831 1832 /* fix right hand side if needed */ 1833 if (x && b) { 1834 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1835 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1836 for (i=0; i<slen; i++) { 1837 bb[lrows[i]] = diag*xx[lrows[i]]; 1838 } 1839 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1840 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1841 } 1842 1843 /* actually zap the local rows */ 1844 /* 1845 Zero the required rows. If the "diagonal block" of the matrix 1846 is square and the user wishes to set the diagonal we use separate 1847 code so that MatSetValues() is not called for each diagonal allocating 1848 new memory, thus calling lots of mallocs and slowing things down. 1849 1850 */ 1851 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1852 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1853 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1854 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1855 } else if (diag != 0.0) { 1856 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1857 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1858 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1859 for (i=0; i<slen; i++) { 1860 row = lrows[i] + rstart_bs; 1861 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1862 } 1863 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1864 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1865 } else { 1866 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1867 } 1868 1869 ierr = PetscFree(lrows);CHKERRQ(ierr); 1870 1871 /* wait on sends */ 1872 if (nsends) { 1873 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1874 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1875 ierr = PetscFree(send_status);CHKERRQ(ierr); 1876 } 1877 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1878 ierr = PetscFree(svalues);CHKERRQ(ierr); 1879 1880 PetscFunctionReturn(0); 1881 } 1882 1883 #undef __FUNCT__ 1884 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1885 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1886 { 1887 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1888 PetscErrorCode ierr; 1889 1890 PetscFunctionBegin; 1891 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1892 PetscFunctionReturn(0); 1893 } 1894 1895 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatEqual_MPIBAIJ" 1899 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1900 { 1901 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1902 Mat a,b,c,d; 1903 PetscBool flg; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 a = matA->A; b = matA->B; 1908 c = matB->A; d = matB->B; 1909 1910 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1911 if (flg) { 1912 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1913 } 1914 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatCopy_MPIBAIJ" 1920 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1921 { 1922 PetscErrorCode ierr; 1923 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1924 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1925 1926 PetscFunctionBegin; 1927 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1928 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1929 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1930 } else { 1931 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1932 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1933 } 1934 PetscFunctionReturn(0); 1935 } 1936 1937 #undef __FUNCT__ 1938 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1939 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1940 { 1941 PetscErrorCode ierr; 1942 1943 PetscFunctionBegin; 1944 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1945 PetscFunctionReturn(0); 1946 } 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1950 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1951 { 1952 PetscErrorCode ierr; 1953 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1954 PetscBLASInt bnz,one=1; 1955 Mat_SeqBAIJ *x,*y; 1956 1957 PetscFunctionBegin; 1958 if (str == SAME_NONZERO_PATTERN) { 1959 PetscScalar alpha = a; 1960 x = (Mat_SeqBAIJ *)xx->A->data; 1961 y = (Mat_SeqBAIJ *)yy->A->data; 1962 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1963 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1964 x = (Mat_SeqBAIJ *)xx->B->data; 1965 y = (Mat_SeqBAIJ *)yy->B->data; 1966 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1967 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1968 } else { 1969 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1970 } 1971 PetscFunctionReturn(0); 1972 } 1973 1974 #undef __FUNCT__ 1975 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1976 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1977 { 1978 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1979 PetscErrorCode ierr; 1980 1981 PetscFunctionBegin; 1982 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1983 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1984 PetscFunctionReturn(0); 1985 } 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1989 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1990 { 1991 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1996 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1997 PetscFunctionReturn(0); 1998 } 1999 2000 #undef __FUNCT__ 2001 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2002 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2003 { 2004 PetscErrorCode ierr; 2005 IS iscol_local; 2006 PetscInt csize; 2007 2008 PetscFunctionBegin; 2009 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2010 if (call == MAT_REUSE_MATRIX) { 2011 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2012 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2013 } else { 2014 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2015 } 2016 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2017 if (call == MAT_INITIAL_MATRIX) { 2018 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2019 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2020 } 2021 PetscFunctionReturn(0); 2022 } 2023 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2024 #undef __FUNCT__ 2025 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2026 /* 2027 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2028 in local and then by concatenating the local matrices the end result. 2029 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2030 */ 2031 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2032 { 2033 PetscErrorCode ierr; 2034 PetscMPIInt rank,size; 2035 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2036 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2037 Mat M,Mreuse; 2038 MatScalar *vwork,*aa; 2039 MPI_Comm comm = ((PetscObject)mat)->comm; 2040 IS isrow_new, iscol_new; 2041 PetscBool idflag,allrows, allcols; 2042 Mat_SeqBAIJ *aij; 2043 2044 PetscFunctionBegin; 2045 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2046 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2047 /* The compression and expansion should be avoided. Doesn't point 2048 out errors, might change the indices, hence buggey */ 2049 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2050 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2051 2052 /* Check for special case: each processor gets entire matrix columns */ 2053 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2054 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2055 if (idflag && ncol == mat->cmap->N) { 2056 allcols = PETSC_TRUE; 2057 } else { 2058 allcols = PETSC_FALSE; 2059 } 2060 2061 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2062 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2063 if (idflag && nrow == mat->rmap->N) { 2064 allrows = PETSC_TRUE; 2065 } else { 2066 allrows = PETSC_FALSE; 2067 } 2068 if (call == MAT_REUSE_MATRIX) { 2069 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2070 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2071 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2072 } else { 2073 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2074 } 2075 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2076 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2077 /* 2078 m - number of local rows 2079 n - number of columns (same on all processors) 2080 rstart - first row in new global matrix generated 2081 */ 2082 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2083 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2084 m = m/bs; 2085 n = n/bs; 2086 2087 if (call == MAT_INITIAL_MATRIX) { 2088 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2089 ii = aij->i; 2090 jj = aij->j; 2091 2092 /* 2093 Determine the number of non-zeros in the diagonal and off-diagonal 2094 portions of the matrix in order to do correct preallocation 2095 */ 2096 2097 /* first get start and end of "diagonal" columns */ 2098 if (csize == PETSC_DECIDE) { 2099 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2100 if (mglobal == n*bs) { /* square matrix */ 2101 nlocal = m; 2102 } else { 2103 nlocal = n/size + ((n % size) > rank); 2104 } 2105 } else { 2106 nlocal = csize/bs; 2107 } 2108 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2109 rstart = rend - nlocal; 2110 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2111 2112 /* next, compute all the lengths */ 2113 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2114 olens = dlens + m; 2115 for (i=0; i<m; i++) { 2116 jend = ii[i+1] - ii[i]; 2117 olen = 0; 2118 dlen = 0; 2119 for (j=0; j<jend; j++) { 2120 if (*jj < rstart || *jj >= rend) olen++; 2121 else dlen++; 2122 jj++; 2123 } 2124 olens[i] = olen; 2125 dlens[i] = dlen; 2126 } 2127 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2128 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2129 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2130 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2131 ierr = PetscFree(dlens);CHKERRQ(ierr); 2132 } else { 2133 PetscInt ml,nl; 2134 2135 M = *newmat; 2136 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2137 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2138 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2139 /* 2140 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2141 rather than the slower MatSetValues(). 2142 */ 2143 M->was_assembled = PETSC_TRUE; 2144 M->assembled = PETSC_FALSE; 2145 } 2146 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2147 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2148 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2149 ii = aij->i; 2150 jj = aij->j; 2151 aa = aij->a; 2152 for (i=0; i<m; i++) { 2153 row = rstart/bs + i; 2154 nz = ii[i+1] - ii[i]; 2155 cwork = jj; jj += nz; 2156 vwork = aa; aa += nz*bs*bs; 2157 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2158 } 2159 2160 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2161 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2162 *newmat = M; 2163 2164 /* save submatrix used in processor for next request */ 2165 if (call == MAT_INITIAL_MATRIX) { 2166 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2167 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2168 } 2169 2170 PetscFunctionReturn(0); 2171 } 2172 2173 #undef __FUNCT__ 2174 #define __FUNCT__ "MatPermute_MPIBAIJ" 2175 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2176 { 2177 MPI_Comm comm,pcomm; 2178 PetscInt first,rlocal_size,clocal_size,nrows; 2179 const PetscInt *rows; 2180 PetscMPIInt size; 2181 IS crowp,growp,irowp,lrowp,lcolp; 2182 PetscErrorCode ierr; 2183 2184 PetscFunctionBegin; 2185 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2186 /* make a collective version of 'rowp' */ 2187 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2188 if (pcomm==comm) { 2189 crowp = rowp; 2190 } else { 2191 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2192 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2193 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2194 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2195 } 2196 /* collect the global row permutation and invert it */ 2197 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2198 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2199 if (pcomm!=comm) { 2200 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2201 } 2202 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2203 ierr = ISDestroy(&growp);CHKERRQ(ierr); 2204 /* get the local target indices */ 2205 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2206 ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr); 2207 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2208 ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2209 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2210 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2211 /* the column permutation is so much easier; 2212 make a local version of 'colp' and invert it */ 2213 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2214 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2215 if (size==1) { 2216 lcolp = colp; 2217 } else { 2218 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2219 } 2220 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2221 /* now we just get the submatrix */ 2222 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2223 if (size>1) { 2224 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2225 } 2226 /* clean up */ 2227 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2228 PetscFunctionReturn(0); 2229 } 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2233 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2234 { 2235 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2236 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2237 2238 PetscFunctionBegin; 2239 if (nghosts) { *nghosts = B->nbs;} 2240 if (ghosts) {*ghosts = baij->garray;} 2241 PetscFunctionReturn(0); 2242 } 2243 2244 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat); 2245 2246 #undef __FUNCT__ 2247 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2248 /* 2249 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2250 */ 2251 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2252 { 2253 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2254 PetscErrorCode ierr; 2255 PetscMPIInt size,*ncolsonproc,*disp,nn; 2256 PetscInt bs,i,n,nrows,j,k,m,ncols,col; 2257 const PetscInt *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj; 2258 PetscInt nis = iscoloring->n,nctot,*cols; 2259 PetscInt *rowhit,M,cstart,cend,colb; 2260 PetscInt *columnsforrow,l; 2261 IS *isa; 2262 PetscBool done,flg; 2263 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2264 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2265 2266 PetscFunctionBegin; 2267 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2268 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2269 2270 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2271 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2272 M = mat->rmap->n/bs; 2273 cstart = mat->cmap->rstart/bs; 2274 cend = mat->cmap->rend/bs; 2275 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2276 c->N = mat->cmap->N/bs; 2277 c->m = mat->rmap->n/bs; 2278 c->rstart = mat->rmap->rstart/bs; 2279 2280 c->ncolors = nis; 2281 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2282 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2283 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2284 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2285 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2286 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2287 2288 /* Allow access to data structures of local part of matrix */ 2289 if (!baij->colmap) { 2290 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2291 } 2292 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2293 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2294 2295 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2296 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2297 2298 for (i=0; i<nis; i++) { 2299 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2300 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2301 c->ncolumns[i] = n; 2302 if (n) { 2303 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2304 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2305 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2306 } else { 2307 c->columns[i] = 0; 2308 } 2309 2310 if (ctype == IS_COLORING_GLOBAL) { 2311 /* Determine the total (parallel) number of columns of this color */ 2312 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2313 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2314 2315 nn = PetscMPIIntCast(n); 2316 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2317 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2318 if (!nctot) { 2319 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2320 } 2321 2322 disp[0] = 0; 2323 for (j=1; j<size; j++) { 2324 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2325 } 2326 2327 /* Get complete list of columns for color on each processor */ 2328 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2329 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2330 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2331 } else if (ctype == IS_COLORING_GHOSTED) { 2332 /* Determine local number of columns of this color on this process, including ghost points */ 2333 nctot = n; 2334 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2335 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2336 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2337 2338 /* 2339 Mark all rows affect by these columns 2340 */ 2341 /* Temporary option to allow for debugging/testing */ 2342 flg = PETSC_FALSE; 2343 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2344 if (!flg) {/*-----------------------------------------------------------------------------*/ 2345 /* crude, fast version */ 2346 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2347 /* loop over columns*/ 2348 for (j=0; j<nctot; j++) { 2349 if (ctype == IS_COLORING_GHOSTED) { 2350 col = ltog[cols[j]]; 2351 } else { 2352 col = cols[j]; 2353 } 2354 if (col >= cstart && col < cend) { 2355 /* column is in diagonal block of matrix */ 2356 rows = A_cj + A_ci[col-cstart]; 2357 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2358 } else { 2359 #if defined (PETSC_USE_CTABLE) 2360 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2361 colb --; 2362 #else 2363 colb = baij->colmap[col] - 1; 2364 #endif 2365 if (colb == -1) { 2366 m = 0; 2367 } else { 2368 colb = colb/bs; 2369 rows = B_cj + B_ci[colb]; 2370 m = B_ci[colb+1] - B_ci[colb]; 2371 } 2372 } 2373 /* loop over columns marking them in rowhit */ 2374 for (k=0; k<m; k++) { 2375 rowhit[*rows++] = col + 1; 2376 } 2377 } 2378 2379 /* count the number of hits */ 2380 nrows = 0; 2381 for (j=0; j<M; j++) { 2382 if (rowhit[j]) nrows++; 2383 } 2384 c->nrows[i] = nrows; 2385 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2386 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2387 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2388 nrows = 0; 2389 for (j=0; j<M; j++) { 2390 if (rowhit[j]) { 2391 c->rows[i][nrows] = j; 2392 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2393 nrows++; 2394 } 2395 } 2396 } else {/*-------------------------------------------------------------------------------*/ 2397 /* slow version, using rowhit as a linked list */ 2398 PetscInt currentcol,fm,mfm; 2399 rowhit[M] = M; 2400 nrows = 0; 2401 /* loop over columns*/ 2402 for (j=0; j<nctot; j++) { 2403 if (ctype == IS_COLORING_GHOSTED) { 2404 col = ltog[cols[j]]; 2405 } else { 2406 col = cols[j]; 2407 } 2408 if (col >= cstart && col < cend) { 2409 /* column is in diagonal block of matrix */ 2410 rows = A_cj + A_ci[col-cstart]; 2411 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2412 } else { 2413 #if defined (PETSC_USE_CTABLE) 2414 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2415 colb --; 2416 #else 2417 colb = baij->colmap[col] - 1; 2418 #endif 2419 if (colb == -1) { 2420 m = 0; 2421 } else { 2422 colb = colb/bs; 2423 rows = B_cj + B_ci[colb]; 2424 m = B_ci[colb+1] - B_ci[colb]; 2425 } 2426 } 2427 2428 /* loop over columns marking them in rowhit */ 2429 fm = M; /* fm points to first entry in linked list */ 2430 for (k=0; k<m; k++) { 2431 currentcol = *rows++; 2432 /* is it already in the list? */ 2433 do { 2434 mfm = fm; 2435 fm = rowhit[fm]; 2436 } while (fm < currentcol); 2437 /* not in list so add it */ 2438 if (fm != currentcol) { 2439 nrows++; 2440 columnsforrow[currentcol] = col; 2441 /* next three lines insert new entry into linked list */ 2442 rowhit[mfm] = currentcol; 2443 rowhit[currentcol] = fm; 2444 fm = currentcol; 2445 /* fm points to present position in list since we know the columns are sorted */ 2446 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2447 } 2448 } 2449 c->nrows[i] = nrows; 2450 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2451 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2452 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2453 /* now store the linked list of rows into c->rows[i] */ 2454 nrows = 0; 2455 fm = rowhit[M]; 2456 do { 2457 c->rows[i][nrows] = fm; 2458 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2459 fm = rowhit[fm]; 2460 } while (fm < M); 2461 } /* ---------------------------------------------------------------------------------------*/ 2462 ierr = PetscFree(cols);CHKERRQ(ierr); 2463 } 2464 2465 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2466 /* 2467 vscale will contain the "diagonal" on processor scalings followed by the off processor 2468 */ 2469 if (ctype == IS_COLORING_GLOBAL) { 2470 PetscInt *garray; 2471 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2472 for (i=0; i<baij->B->cmap->n/bs; i++) { 2473 for (j=0; j<bs; j++) { 2474 garray[i*bs+j] = bs*baij->garray[i]+j; 2475 } 2476 } 2477 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2478 ierr = PetscFree(garray);CHKERRQ(ierr); 2479 CHKMEMQ; 2480 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2481 for (k=0; k<c->ncolors; k++) { 2482 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2483 for (l=0; l<c->nrows[k]; l++) { 2484 col = c->columnsforrow[k][l]; 2485 if (col >= cstart && col < cend) { 2486 /* column is in diagonal block of matrix */ 2487 colb = col - cstart; 2488 } else { 2489 /* column is in "off-processor" part */ 2490 #if defined (PETSC_USE_CTABLE) 2491 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2492 colb --; 2493 #else 2494 colb = baij->colmap[col] - 1; 2495 #endif 2496 colb = colb/bs; 2497 colb += cend - cstart; 2498 } 2499 c->vscaleforrow[k][l] = colb; 2500 } 2501 } 2502 } else if (ctype == IS_COLORING_GHOSTED) { 2503 /* Get gtol mapping */ 2504 PetscInt N = mat->cmap->N, *gtol; 2505 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2506 for (i=0; i<N; i++) gtol[i] = -1; 2507 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2508 2509 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2510 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2511 for (k=0; k<c->ncolors; k++) { 2512 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2513 for (l=0; l<c->nrows[k]; l++) { 2514 col = c->columnsforrow[k][l]; /* global column index */ 2515 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2516 } 2517 } 2518 ierr = PetscFree(gtol);CHKERRQ(ierr); 2519 } 2520 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2521 2522 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2523 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2524 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2525 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2526 CHKMEMQ; 2527 PetscFunctionReturn(0); 2528 } 2529 2530 #undef __FUNCT__ 2531 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2532 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2533 { 2534 Mat B; 2535 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2536 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2537 Mat_SeqAIJ *b; 2538 PetscErrorCode ierr; 2539 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2540 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2541 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2542 2543 PetscFunctionBegin; 2544 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2545 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2546 2547 /* ---------------------------------------------------------------- 2548 Tell every processor the number of nonzeros per row 2549 */ 2550 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2551 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2552 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2553 } 2554 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2555 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2556 displs = recvcounts + size; 2557 for (i=0; i<size; i++) { 2558 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2559 displs[i] = A->rmap->range[i]/bs; 2560 } 2561 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2562 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2563 #else 2564 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2565 #endif 2566 /* --------------------------------------------------------------- 2567 Create the sequential matrix of the same type as the local block diagonal 2568 */ 2569 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2570 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2571 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2572 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2573 b = (Mat_SeqAIJ *)B->data; 2574 2575 /*-------------------------------------------------------------------- 2576 Copy my part of matrix column indices over 2577 */ 2578 sendcount = ad->nz + bd->nz; 2579 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2580 a_jsendbuf = ad->j; 2581 b_jsendbuf = bd->j; 2582 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2583 cnt = 0; 2584 for (i=0; i<n; i++) { 2585 2586 /* put in lower diagonal portion */ 2587 m = bd->i[i+1] - bd->i[i]; 2588 while (m > 0) { 2589 /* is it above diagonal (in bd (compressed) numbering) */ 2590 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2591 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2592 m--; 2593 } 2594 2595 /* put in diagonal portion */ 2596 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2597 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2598 } 2599 2600 /* put in upper diagonal portion */ 2601 while (m-- > 0) { 2602 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2603 } 2604 } 2605 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2606 2607 /*-------------------------------------------------------------------- 2608 Gather all column indices to all processors 2609 */ 2610 for (i=0; i<size; i++) { 2611 recvcounts[i] = 0; 2612 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2613 recvcounts[i] += lens[j]; 2614 } 2615 } 2616 displs[0] = 0; 2617 for (i=1; i<size; i++) { 2618 displs[i] = displs[i-1] + recvcounts[i-1]; 2619 } 2620 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2621 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2622 #else 2623 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2624 #endif 2625 /*-------------------------------------------------------------------- 2626 Assemble the matrix into useable form (note numerical values not yet set) 2627 */ 2628 /* set the b->ilen (length of each row) values */ 2629 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2630 /* set the b->i indices */ 2631 b->i[0] = 0; 2632 for (i=1; i<=A->rmap->N/bs; i++) { 2633 b->i[i] = b->i[i-1] + lens[i-1]; 2634 } 2635 ierr = PetscFree(lens);CHKERRQ(ierr); 2636 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2637 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2638 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2639 2640 if (A->symmetric) { 2641 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2642 } else if (A->hermitian) { 2643 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2644 } else if (A->structurally_symmetric) { 2645 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2646 } 2647 *newmat = B; 2648 PetscFunctionReturn(0); 2649 } 2650 2651 #undef __FUNCT__ 2652 #define __FUNCT__ "MatSOR_MPIBAIJ" 2653 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2654 { 2655 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2656 PetscErrorCode ierr; 2657 Vec bb1 = 0; 2658 2659 PetscFunctionBegin; 2660 if (flag == SOR_APPLY_UPPER) { 2661 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2666 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2667 } 2668 2669 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2670 if (flag & SOR_ZERO_INITIAL_GUESS) { 2671 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2672 its--; 2673 } 2674 2675 while (its--) { 2676 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2677 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2678 2679 /* update rhs: bb1 = bb - B*x */ 2680 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2681 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2682 2683 /* local sweep */ 2684 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2685 } 2686 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2687 if (flag & SOR_ZERO_INITIAL_GUESS) { 2688 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2689 its--; 2690 } 2691 while (its--) { 2692 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2693 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2694 2695 /* update rhs: bb1 = bb - B*x */ 2696 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2697 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2698 2699 /* local sweep */ 2700 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2701 } 2702 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2703 if (flag & SOR_ZERO_INITIAL_GUESS) { 2704 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2705 its--; 2706 } 2707 while (its--) { 2708 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2709 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2710 2711 /* update rhs: bb1 = bb - B*x */ 2712 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2713 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2714 2715 /* local sweep */ 2716 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2717 } 2718 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2719 2720 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2721 PetscFunctionReturn(0); 2722 } 2723 2724 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2725 2726 #undef __FUNCT__ 2727 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2728 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2729 { 2730 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2731 PetscErrorCode ierr; 2732 2733 PetscFunctionBegin; 2734 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2735 PetscFunctionReturn(0); 2736 } 2737 2738 2739 /* -------------------------------------------------------------------*/ 2740 static struct _MatOps MatOps_Values = { 2741 MatSetValues_MPIBAIJ, 2742 MatGetRow_MPIBAIJ, 2743 MatRestoreRow_MPIBAIJ, 2744 MatMult_MPIBAIJ, 2745 /* 4*/ MatMultAdd_MPIBAIJ, 2746 MatMultTranspose_MPIBAIJ, 2747 MatMultTransposeAdd_MPIBAIJ, 2748 0, 2749 0, 2750 0, 2751 /*10*/ 0, 2752 0, 2753 0, 2754 MatSOR_MPIBAIJ, 2755 MatTranspose_MPIBAIJ, 2756 /*15*/ MatGetInfo_MPIBAIJ, 2757 MatEqual_MPIBAIJ, 2758 MatGetDiagonal_MPIBAIJ, 2759 MatDiagonalScale_MPIBAIJ, 2760 MatNorm_MPIBAIJ, 2761 /*20*/ MatAssemblyBegin_MPIBAIJ, 2762 MatAssemblyEnd_MPIBAIJ, 2763 MatSetOption_MPIBAIJ, 2764 MatZeroEntries_MPIBAIJ, 2765 /*24*/ MatZeroRows_MPIBAIJ, 2766 0, 2767 0, 2768 0, 2769 0, 2770 /*29*/ MatSetUp_MPIBAIJ, 2771 0, 2772 0, 2773 0, 2774 0, 2775 /*34*/ MatDuplicate_MPIBAIJ, 2776 0, 2777 0, 2778 0, 2779 0, 2780 /*39*/ MatAXPY_MPIBAIJ, 2781 MatGetSubMatrices_MPIBAIJ, 2782 MatIncreaseOverlap_MPIBAIJ, 2783 MatGetValues_MPIBAIJ, 2784 MatCopy_MPIBAIJ, 2785 /*44*/ 0, 2786 MatScale_MPIBAIJ, 2787 0, 2788 0, 2789 0, 2790 /*49*/ 0, 2791 0, 2792 0, 2793 0, 2794 0, 2795 /*54*/ MatFDColoringCreate_MPIBAIJ, 2796 0, 2797 MatSetUnfactored_MPIBAIJ, 2798 MatPermute_MPIBAIJ, 2799 MatSetValuesBlocked_MPIBAIJ, 2800 /*59*/ MatGetSubMatrix_MPIBAIJ, 2801 MatDestroy_MPIBAIJ, 2802 MatView_MPIBAIJ, 2803 0, 2804 0, 2805 /*64*/ 0, 2806 0, 2807 0, 2808 0, 2809 0, 2810 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2811 0, 2812 0, 2813 0, 2814 0, 2815 /*74*/ 0, 2816 MatFDColoringApply_BAIJ, 2817 0, 2818 0, 2819 0, 2820 /*79*/ 0, 2821 0, 2822 0, 2823 0, 2824 MatLoad_MPIBAIJ, 2825 /*84*/ 0, 2826 0, 2827 0, 2828 0, 2829 0, 2830 /*89*/ 0, 2831 0, 2832 0, 2833 0, 2834 0, 2835 /*94*/ 0, 2836 0, 2837 0, 2838 0, 2839 0, 2840 /*99*/ 0, 2841 0, 2842 0, 2843 0, 2844 0, 2845 /*104*/0, 2846 MatRealPart_MPIBAIJ, 2847 MatImaginaryPart_MPIBAIJ, 2848 0, 2849 0, 2850 /*109*/0, 2851 0, 2852 0, 2853 0, 2854 0, 2855 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2856 0, 2857 MatGetGhosts_MPIBAIJ, 2858 0, 2859 0, 2860 /*119*/0, 2861 0, 2862 0, 2863 0, 2864 0, 2865 /*124*/0, 2866 0, 2867 MatInvertBlockDiagonal_MPIBAIJ 2868 }; 2869 2870 EXTERN_C_BEGIN 2871 #undef __FUNCT__ 2872 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2873 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2874 { 2875 PetscFunctionBegin; 2876 *a = ((Mat_MPIBAIJ *)A->data)->A; 2877 PetscFunctionReturn(0); 2878 } 2879 EXTERN_C_END 2880 2881 EXTERN_C_BEGIN 2882 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2883 EXTERN_C_END 2884 2885 EXTERN_C_BEGIN 2886 #undef __FUNCT__ 2887 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2888 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2889 { 2890 PetscInt m,rstart,cstart,cend; 2891 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2892 const PetscInt *JJ=0; 2893 PetscScalar *values=0; 2894 PetscErrorCode ierr; 2895 2896 PetscFunctionBegin; 2897 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2898 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2899 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2900 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2901 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2902 m = B->rmap->n/bs; 2903 rstart = B->rmap->rstart/bs; 2904 cstart = B->cmap->rstart/bs; 2905 cend = B->cmap->rend/bs; 2906 2907 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2908 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2909 for (i=0; i<m; i++) { 2910 nz = ii[i+1] - ii[i]; 2911 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2912 nz_max = PetscMax(nz_max,nz); 2913 JJ = jj + ii[i]; 2914 for (j=0; j<nz; j++) { 2915 if (*JJ >= cstart) break; 2916 JJ++; 2917 } 2918 d = 0; 2919 for (; j<nz; j++) { 2920 if (*JJ++ >= cend) break; 2921 d++; 2922 } 2923 d_nnz[i] = d; 2924 o_nnz[i] = nz - d; 2925 } 2926 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2927 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2928 2929 values = (PetscScalar*)V; 2930 if (!values) { 2931 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2932 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2933 } 2934 for (i=0; i<m; i++) { 2935 PetscInt row = i + rstart; 2936 PetscInt ncols = ii[i+1] - ii[i]; 2937 const PetscInt *icols = jj + ii[i]; 2938 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2939 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2940 } 2941 2942 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2943 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2944 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2945 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2946 PetscFunctionReturn(0); 2947 } 2948 EXTERN_C_END 2949 2950 #undef __FUNCT__ 2951 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2952 /*@C 2953 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2954 (the default parallel PETSc format). 2955 2956 Collective on MPI_Comm 2957 2958 Input Parameters: 2959 + A - the matrix 2960 . bs - the block size 2961 . i - the indices into j for the start of each local row (starts with zero) 2962 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2963 - v - optional values in the matrix 2964 2965 Level: developer 2966 2967 .keywords: matrix, aij, compressed row, sparse, parallel 2968 2969 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2970 @*/ 2971 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2972 { 2973 PetscErrorCode ierr; 2974 2975 PetscFunctionBegin; 2976 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2977 PetscValidType(B,1); 2978 PetscValidLogicalCollectiveInt(B,bs,2); 2979 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2980 PetscFunctionReturn(0); 2981 } 2982 2983 EXTERN_C_BEGIN 2984 #undef __FUNCT__ 2985 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2986 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2987 { 2988 Mat_MPIBAIJ *b; 2989 PetscErrorCode ierr; 2990 PetscInt i; 2991 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 2992 2993 PetscFunctionBegin; 2994 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 2995 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 2996 2997 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2998 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2999 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3000 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3001 3002 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3003 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3004 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3005 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3006 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3007 3008 if (d_nnz) { 3009 for (i=0; i<B->rmap->n/bs; i++) { 3010 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3011 } 3012 } 3013 if (o_nnz) { 3014 for (i=0; i<B->rmap->n/bs; i++) { 3015 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3016 } 3017 } 3018 3019 b = (Mat_MPIBAIJ*)B->data; 3020 b->bs2 = bs*bs; 3021 b->mbs = B->rmap->n/bs; 3022 b->nbs = B->cmap->n/bs; 3023 b->Mbs = B->rmap->N/bs; 3024 b->Nbs = B->cmap->N/bs; 3025 3026 for (i=0; i<=b->size; i++) { 3027 b->rangebs[i] = B->rmap->range[i]/bs; 3028 } 3029 b->rstartbs = B->rmap->rstart/bs; 3030 b->rendbs = B->rmap->rend/bs; 3031 b->cstartbs = B->cmap->rstart/bs; 3032 b->cendbs = B->cmap->rend/bs; 3033 3034 if (!B->preallocated) { 3035 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3036 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3037 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3038 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3039 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3040 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3041 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3042 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3043 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3044 } 3045 3046 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3047 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3048 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3049 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3050 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3051 B->preallocated = PETSC_TRUE; 3052 PetscFunctionReturn(0); 3053 } 3054 EXTERN_C_END 3055 3056 EXTERN_C_BEGIN 3057 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3058 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3059 EXTERN_C_END 3060 3061 3062 EXTERN_C_BEGIN 3063 #undef __FUNCT__ 3064 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3065 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3066 { 3067 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3068 PetscErrorCode ierr; 3069 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3070 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3071 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3072 3073 PetscFunctionBegin; 3074 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3075 ii[0] = 0; 3076 CHKMEMQ; 3077 for (i=0; i<M; i++) { 3078 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3079 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3080 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3081 /* remove one from count of matrix has diagonal */ 3082 for (j=id[i]; j<id[i+1]; j++) { 3083 if (jd[j] == i) {ii[i+1]--;break;} 3084 } 3085 CHKMEMQ; 3086 } 3087 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3088 cnt = 0; 3089 for (i=0; i<M; i++) { 3090 for (j=io[i]; j<io[i+1]; j++) { 3091 if (garray[jo[j]] > rstart) break; 3092 jj[cnt++] = garray[jo[j]]; 3093 CHKMEMQ; 3094 } 3095 for (k=id[i]; k<id[i+1]; k++) { 3096 if (jd[k] != i) { 3097 jj[cnt++] = rstart + jd[k]; 3098 CHKMEMQ; 3099 } 3100 } 3101 for (;j<io[i+1]; j++) { 3102 jj[cnt++] = garray[jo[j]]; 3103 CHKMEMQ; 3104 } 3105 } 3106 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3107 PetscFunctionReturn(0); 3108 } 3109 EXTERN_C_END 3110 3111 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3112 EXTERN_C_BEGIN 3113 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3114 EXTERN_C_END 3115 3116 EXTERN_C_BEGIN 3117 #undef __FUNCT__ 3118 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3119 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3120 { 3121 PetscErrorCode ierr; 3122 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3123 Mat B; 3124 Mat_MPIAIJ *b; 3125 3126 PetscFunctionBegin; 3127 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3128 3129 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3130 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3131 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3132 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3133 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3134 b = (Mat_MPIAIJ*) B->data; 3135 3136 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3137 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3138 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3139 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3140 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3141 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3142 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3143 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3144 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3145 if (reuse == MAT_REUSE_MATRIX) { 3146 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3147 } else { 3148 *newmat = B; 3149 } 3150 PetscFunctionReturn(0); 3151 } 3152 EXTERN_C_END 3153 3154 EXTERN_C_BEGIN 3155 #if defined(PETSC_HAVE_MUMPS) 3156 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3157 #endif 3158 EXTERN_C_END 3159 3160 /*MC 3161 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3162 3163 Options Database Keys: 3164 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3165 . -mat_block_size <bs> - set the blocksize used to store the matrix 3166 - -mat_use_hash_table <fact> 3167 3168 Level: beginner 3169 3170 .seealso: MatCreateMPIBAIJ 3171 M*/ 3172 3173 EXTERN_C_BEGIN 3174 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3175 EXTERN_C_END 3176 3177 EXTERN_C_BEGIN 3178 #undef __FUNCT__ 3179 #define __FUNCT__ "MatCreate_MPIBAIJ" 3180 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3181 { 3182 Mat_MPIBAIJ *b; 3183 PetscErrorCode ierr; 3184 PetscBool flg; 3185 3186 PetscFunctionBegin; 3187 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3188 B->data = (void*)b; 3189 3190 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3191 B->assembled = PETSC_FALSE; 3192 3193 B->insertmode = NOT_SET_VALUES; 3194 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3195 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3196 3197 /* build local table of row and column ownerships */ 3198 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3199 3200 /* build cache for off array entries formed */ 3201 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3202 b->donotstash = PETSC_FALSE; 3203 b->colmap = PETSC_NULL; 3204 b->garray = PETSC_NULL; 3205 b->roworiented = PETSC_TRUE; 3206 3207 /* stuff used in block assembly */ 3208 b->barray = 0; 3209 3210 /* stuff used for matrix vector multiply */ 3211 b->lvec = 0; 3212 b->Mvctx = 0; 3213 3214 /* stuff for MatGetRow() */ 3215 b->rowindices = 0; 3216 b->rowvalues = 0; 3217 b->getrowactive = PETSC_FALSE; 3218 3219 /* hash table stuff */ 3220 b->ht = 0; 3221 b->hd = 0; 3222 b->ht_size = 0; 3223 b->ht_flag = PETSC_FALSE; 3224 b->ht_fact = 0; 3225 b->ht_total_ct = 0; 3226 b->ht_insert_ct = 0; 3227 3228 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3229 b->ijonly = PETSC_FALSE; 3230 3231 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3232 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3233 if (flg) { 3234 PetscReal fact = 1.39; 3235 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3236 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3237 if (fact <= 1.0) fact = 1.39; 3238 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3239 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3240 } 3241 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3242 3243 #if defined(PETSC_HAVE_MUMPS) 3244 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3245 #endif 3246 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3247 "MatConvert_MPIBAIJ_MPIAdj", 3248 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3249 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3250 "MatConvert_MPIBAIJ_MPIAIJ", 3251 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3252 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3253 "MatConvert_MPIBAIJ_MPISBAIJ", 3254 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3255 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3256 "MatStoreValues_MPIBAIJ", 3257 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3258 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3259 "MatRetrieveValues_MPIBAIJ", 3260 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3261 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3262 "MatGetDiagonalBlock_MPIBAIJ", 3263 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3264 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3265 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3266 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3267 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3268 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3269 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3270 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3271 "MatDiagonalScaleLocal_MPIBAIJ", 3272 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3273 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3274 "MatSetHashTableFactor_MPIBAIJ", 3275 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3276 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3277 "MatConvert_MPIBAIJ_MPIBSTRM", 3278 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3279 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3280 PetscFunctionReturn(0); 3281 } 3282 EXTERN_C_END 3283 3284 /*MC 3285 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3286 3287 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3288 and MATMPIBAIJ otherwise. 3289 3290 Options Database Keys: 3291 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3292 3293 Level: beginner 3294 3295 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3296 M*/ 3297 3298 #undef __FUNCT__ 3299 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3300 /*@C 3301 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3302 (block compressed row). For good matrix assembly performance 3303 the user should preallocate the matrix storage by setting the parameters 3304 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3305 performance can be increased by more than a factor of 50. 3306 3307 Collective on Mat 3308 3309 Input Parameters: 3310 + A - the matrix 3311 . bs - size of blockk 3312 . d_nz - number of block nonzeros per block row in diagonal portion of local 3313 submatrix (same for all local rows) 3314 . d_nnz - array containing the number of block nonzeros in the various block rows 3315 of the in diagonal portion of the local (possibly different for each block 3316 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3317 set it even if it is zero. 3318 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3319 submatrix (same for all local rows). 3320 - o_nnz - array containing the number of nonzeros in the various block rows of the 3321 off-diagonal portion of the local submatrix (possibly different for 3322 each block row) or PETSC_NULL. 3323 3324 If the *_nnz parameter is given then the *_nz parameter is ignored 3325 3326 Options Database Keys: 3327 + -mat_block_size - size of the blocks to use 3328 - -mat_use_hash_table <fact> 3329 3330 Notes: 3331 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3332 than it must be used on all processors that share the object for that argument. 3333 3334 Storage Information: 3335 For a square global matrix we define each processor's diagonal portion 3336 to be its local rows and the corresponding columns (a square submatrix); 3337 each processor's off-diagonal portion encompasses the remainder of the 3338 local matrix (a rectangular submatrix). 3339 3340 The user can specify preallocated storage for the diagonal part of 3341 the local submatrix with either d_nz or d_nnz (not both). Set 3342 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3343 memory allocation. Likewise, specify preallocated storage for the 3344 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3345 3346 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3347 the figure below we depict these three local rows and all columns (0-11). 3348 3349 .vb 3350 0 1 2 3 4 5 6 7 8 9 10 11 3351 ------------------- 3352 row 3 | o o o d d d o o o o o o 3353 row 4 | o o o d d d o o o o o o 3354 row 5 | o o o d d d o o o o o o 3355 ------------------- 3356 .ve 3357 3358 Thus, any entries in the d locations are stored in the d (diagonal) 3359 submatrix, and any entries in the o locations are stored in the 3360 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3361 stored simply in the MATSEQBAIJ format for compressed row storage. 3362 3363 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3364 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3365 In general, for PDE problems in which most nonzeros are near the diagonal, 3366 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3367 or you will get TERRIBLE performance; see the users' manual chapter on 3368 matrices. 3369 3370 You can call MatGetInfo() to get information on how effective the preallocation was; 3371 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3372 You can also run with the option -info and look for messages with the string 3373 malloc in them to see if additional memory allocation was needed. 3374 3375 Level: intermediate 3376 3377 .keywords: matrix, block, aij, compressed row, sparse, parallel 3378 3379 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3380 @*/ 3381 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3382 { 3383 PetscErrorCode ierr; 3384 3385 PetscFunctionBegin; 3386 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3387 PetscValidType(B,1); 3388 PetscValidLogicalCollectiveInt(B,bs,2); 3389 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3390 PetscFunctionReturn(0); 3391 } 3392 3393 #undef __FUNCT__ 3394 #define __FUNCT__ "MatCreateBAIJ" 3395 /*@C 3396 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3397 (block compressed row). For good matrix assembly performance 3398 the user should preallocate the matrix storage by setting the parameters 3399 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3400 performance can be increased by more than a factor of 50. 3401 3402 Collective on MPI_Comm 3403 3404 Input Parameters: 3405 + comm - MPI communicator 3406 . bs - size of blockk 3407 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3408 This value should be the same as the local size used in creating the 3409 y vector for the matrix-vector product y = Ax. 3410 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3411 This value should be the same as the local size used in creating the 3412 x vector for the matrix-vector product y = Ax. 3413 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3414 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3415 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3416 submatrix (same for all local rows) 3417 . d_nnz - array containing the number of nonzero blocks in the various block rows 3418 of the in diagonal portion of the local (possibly different for each block 3419 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3420 and set it even if it is zero. 3421 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3422 submatrix (same for all local rows). 3423 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3424 off-diagonal portion of the local submatrix (possibly different for 3425 each block row) or PETSC_NULL. 3426 3427 Output Parameter: 3428 . A - the matrix 3429 3430 Options Database Keys: 3431 + -mat_block_size - size of the blocks to use 3432 - -mat_use_hash_table <fact> 3433 3434 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3435 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3436 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3437 3438 Notes: 3439 If the *_nnz parameter is given then the *_nz parameter is ignored 3440 3441 A nonzero block is any block that as 1 or more nonzeros in it 3442 3443 The user MUST specify either the local or global matrix dimensions 3444 (possibly both). 3445 3446 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3447 than it must be used on all processors that share the object for that argument. 3448 3449 Storage Information: 3450 For a square global matrix we define each processor's diagonal portion 3451 to be its local rows and the corresponding columns (a square submatrix); 3452 each processor's off-diagonal portion encompasses the remainder of the 3453 local matrix (a rectangular submatrix). 3454 3455 The user can specify preallocated storage for the diagonal part of 3456 the local submatrix with either d_nz or d_nnz (not both). Set 3457 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3458 memory allocation. Likewise, specify preallocated storage for the 3459 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3460 3461 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3462 the figure below we depict these three local rows and all columns (0-11). 3463 3464 .vb 3465 0 1 2 3 4 5 6 7 8 9 10 11 3466 ------------------- 3467 row 3 | o o o d d d o o o o o o 3468 row 4 | o o o d d d o o o o o o 3469 row 5 | o o o d d d o o o o o o 3470 ------------------- 3471 .ve 3472 3473 Thus, any entries in the d locations are stored in the d (diagonal) 3474 submatrix, and any entries in the o locations are stored in the 3475 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3476 stored simply in the MATSEQBAIJ format for compressed row storage. 3477 3478 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3479 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3480 In general, for PDE problems in which most nonzeros are near the diagonal, 3481 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3482 or you will get TERRIBLE performance; see the users' manual chapter on 3483 matrices. 3484 3485 Level: intermediate 3486 3487 .keywords: matrix, block, aij, compressed row, sparse, parallel 3488 3489 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3490 @*/ 3491 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3492 { 3493 PetscErrorCode ierr; 3494 PetscMPIInt size; 3495 3496 PetscFunctionBegin; 3497 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3498 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3499 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3500 if (size > 1) { 3501 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3502 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3503 } else { 3504 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3505 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3506 } 3507 PetscFunctionReturn(0); 3508 } 3509 3510 #undef __FUNCT__ 3511 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3512 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3513 { 3514 Mat mat; 3515 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3516 PetscErrorCode ierr; 3517 PetscInt len=0; 3518 3519 PetscFunctionBegin; 3520 *newmat = 0; 3521 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3522 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3523 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3524 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3525 3526 mat->factortype = matin->factortype; 3527 mat->preallocated = PETSC_TRUE; 3528 mat->assembled = PETSC_TRUE; 3529 mat->insertmode = NOT_SET_VALUES; 3530 3531 a = (Mat_MPIBAIJ*)mat->data; 3532 mat->rmap->bs = matin->rmap->bs; 3533 a->bs2 = oldmat->bs2; 3534 a->mbs = oldmat->mbs; 3535 a->nbs = oldmat->nbs; 3536 a->Mbs = oldmat->Mbs; 3537 a->Nbs = oldmat->Nbs; 3538 3539 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3540 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3541 3542 a->size = oldmat->size; 3543 a->rank = oldmat->rank; 3544 a->donotstash = oldmat->donotstash; 3545 a->roworiented = oldmat->roworiented; 3546 a->rowindices = 0; 3547 a->rowvalues = 0; 3548 a->getrowactive = PETSC_FALSE; 3549 a->barray = 0; 3550 a->rstartbs = oldmat->rstartbs; 3551 a->rendbs = oldmat->rendbs; 3552 a->cstartbs = oldmat->cstartbs; 3553 a->cendbs = oldmat->cendbs; 3554 3555 /* hash table stuff */ 3556 a->ht = 0; 3557 a->hd = 0; 3558 a->ht_size = 0; 3559 a->ht_flag = oldmat->ht_flag; 3560 a->ht_fact = oldmat->ht_fact; 3561 a->ht_total_ct = 0; 3562 a->ht_insert_ct = 0; 3563 3564 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3565 if (oldmat->colmap) { 3566 #if defined (PETSC_USE_CTABLE) 3567 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3568 #else 3569 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3570 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3571 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3572 #endif 3573 } else a->colmap = 0; 3574 3575 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3576 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3577 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3578 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3579 } else a->garray = 0; 3580 3581 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3582 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3583 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3584 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3585 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3586 3587 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3588 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3589 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3590 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3591 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3592 *newmat = mat; 3593 3594 PetscFunctionReturn(0); 3595 } 3596 3597 #undef __FUNCT__ 3598 #define __FUNCT__ "MatLoad_MPIBAIJ" 3599 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3600 { 3601 PetscErrorCode ierr; 3602 int fd; 3603 PetscInt i,nz,j,rstart,rend; 3604 PetscScalar *vals,*buf; 3605 MPI_Comm comm = ((PetscObject)viewer)->comm; 3606 MPI_Status status; 3607 PetscMPIInt rank,size,maxnz; 3608 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3609 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3610 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3611 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3612 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3613 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3614 3615 PetscFunctionBegin; 3616 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3617 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3618 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3619 3620 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3621 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3622 if (!rank) { 3623 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3624 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3625 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3626 } 3627 3628 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3629 3630 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3631 M = header[1]; N = header[2]; 3632 3633 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3634 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3635 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3636 3637 /* If global sizes are set, check if they are consistent with that given in the file */ 3638 if (sizesset) { 3639 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3640 } 3641 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3642 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3643 3644 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3645 3646 /* 3647 This code adds extra rows to make sure the number of rows is 3648 divisible by the blocksize 3649 */ 3650 Mbs = M/bs; 3651 extra_rows = bs - M + bs*Mbs; 3652 if (extra_rows == bs) extra_rows = 0; 3653 else Mbs++; 3654 if (extra_rows && !rank) { 3655 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3656 } 3657 3658 /* determine ownership of all rows */ 3659 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3660 mbs = Mbs/size + ((Mbs % size) > rank); 3661 m = mbs*bs; 3662 } else { /* User set */ 3663 m = newmat->rmap->n; 3664 mbs = m/bs; 3665 } 3666 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3667 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3668 3669 /* process 0 needs enough room for process with most rows */ 3670 if (!rank) { 3671 mmax = rowners[1]; 3672 for (i=2; i<=size; i++) { 3673 mmax = PetscMax(mmax,rowners[i]); 3674 } 3675 mmax*=bs; 3676 } else mmax = m; 3677 3678 rowners[0] = 0; 3679 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3680 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3681 rstart = rowners[rank]; 3682 rend = rowners[rank+1]; 3683 3684 /* distribute row lengths to all processors */ 3685 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3686 if (!rank) { 3687 mend = m; 3688 if (size == 1) mend = mend - extra_rows; 3689 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3690 for (j=mend; j<m; j++) locrowlens[j] = 1; 3691 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3692 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3693 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3694 for (j=0; j<m; j++) { 3695 procsnz[0] += locrowlens[j]; 3696 } 3697 for (i=1; i<size; i++) { 3698 mend = browners[i+1] - browners[i]; 3699 if (i == size-1) mend = mend - extra_rows; 3700 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3701 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3702 /* calculate the number of nonzeros on each processor */ 3703 for (j=0; j<browners[i+1]-browners[i]; j++) { 3704 procsnz[i] += rowlengths[j]; 3705 } 3706 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3707 } 3708 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3709 } else { 3710 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3711 } 3712 3713 if (!rank) { 3714 /* determine max buffer needed and allocate it */ 3715 maxnz = procsnz[0]; 3716 for (i=1; i<size; i++) { 3717 maxnz = PetscMax(maxnz,procsnz[i]); 3718 } 3719 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3720 3721 /* read in my part of the matrix column indices */ 3722 nz = procsnz[0]; 3723 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3724 mycols = ibuf; 3725 if (size == 1) nz -= extra_rows; 3726 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3727 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3728 3729 /* read in every ones (except the last) and ship off */ 3730 for (i=1; i<size-1; i++) { 3731 nz = procsnz[i]; 3732 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3733 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3734 } 3735 /* read in the stuff for the last proc */ 3736 if (size != 1) { 3737 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3738 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3739 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3740 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3741 } 3742 ierr = PetscFree(cols);CHKERRQ(ierr); 3743 } else { 3744 /* determine buffer space needed for message */ 3745 nz = 0; 3746 for (i=0; i<m; i++) { 3747 nz += locrowlens[i]; 3748 } 3749 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3750 mycols = ibuf; 3751 /* receive message of column indices*/ 3752 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3753 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3754 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3755 } 3756 3757 /* loop over local rows, determining number of off diagonal entries */ 3758 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3759 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3760 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3761 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3762 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3763 rowcount = 0; nzcount = 0; 3764 for (i=0; i<mbs; i++) { 3765 dcount = 0; 3766 odcount = 0; 3767 for (j=0; j<bs; j++) { 3768 kmax = locrowlens[rowcount]; 3769 for (k=0; k<kmax; k++) { 3770 tmp = mycols[nzcount++]/bs; 3771 if (!mask[tmp]) { 3772 mask[tmp] = 1; 3773 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3774 else masked1[dcount++] = tmp; 3775 } 3776 } 3777 rowcount++; 3778 } 3779 3780 dlens[i] = dcount; 3781 odlens[i] = odcount; 3782 3783 /* zero out the mask elements we set */ 3784 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3785 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3786 } 3787 3788 3789 if (!sizesset) { 3790 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3791 } 3792 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3793 3794 if (!rank) { 3795 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3796 /* read in my part of the matrix numerical values */ 3797 nz = procsnz[0]; 3798 vals = buf; 3799 mycols = ibuf; 3800 if (size == 1) nz -= extra_rows; 3801 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3802 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3803 3804 /* insert into matrix */ 3805 jj = rstart*bs; 3806 for (i=0; i<m; i++) { 3807 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3808 mycols += locrowlens[i]; 3809 vals += locrowlens[i]; 3810 jj++; 3811 } 3812 /* read in other processors (except the last one) and ship out */ 3813 for (i=1; i<size-1; i++) { 3814 nz = procsnz[i]; 3815 vals = buf; 3816 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3817 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3818 } 3819 /* the last proc */ 3820 if (size != 1) { 3821 nz = procsnz[i] - extra_rows; 3822 vals = buf; 3823 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3824 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3825 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3826 } 3827 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3828 } else { 3829 /* receive numeric values */ 3830 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3831 3832 /* receive message of values*/ 3833 vals = buf; 3834 mycols = ibuf; 3835 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3836 3837 /* insert into matrix */ 3838 jj = rstart*bs; 3839 for (i=0; i<m; i++) { 3840 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3841 mycols += locrowlens[i]; 3842 vals += locrowlens[i]; 3843 jj++; 3844 } 3845 } 3846 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3847 ierr = PetscFree(buf);CHKERRQ(ierr); 3848 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3849 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3850 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3851 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3852 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3853 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3854 3855 PetscFunctionReturn(0); 3856 } 3857 3858 #undef __FUNCT__ 3859 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3860 /*@ 3861 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3862 3863 Input Parameters: 3864 . mat - the matrix 3865 . fact - factor 3866 3867 Not Collective, each process can use a different factor 3868 3869 Level: advanced 3870 3871 Notes: 3872 This can also be set by the command line option: -mat_use_hash_table <fact> 3873 3874 .keywords: matrix, hashtable, factor, HT 3875 3876 .seealso: MatSetOption() 3877 @*/ 3878 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3879 { 3880 PetscErrorCode ierr; 3881 3882 PetscFunctionBegin; 3883 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3884 PetscFunctionReturn(0); 3885 } 3886 3887 EXTERN_C_BEGIN 3888 #undef __FUNCT__ 3889 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3890 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3891 { 3892 Mat_MPIBAIJ *baij; 3893 3894 PetscFunctionBegin; 3895 baij = (Mat_MPIBAIJ*)mat->data; 3896 baij->ht_fact = fact; 3897 PetscFunctionReturn(0); 3898 } 3899 EXTERN_C_END 3900 3901 #undef __FUNCT__ 3902 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3903 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3904 { 3905 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3906 3907 PetscFunctionBegin; 3908 *Ad = a->A; 3909 *Ao = a->B; 3910 *colmap = a->garray; 3911 PetscFunctionReturn(0); 3912 } 3913 3914 /* 3915 Special version for direct calls from Fortran (to eliminate two function call overheads 3916 */ 3917 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3918 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3919 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3920 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3921 #endif 3922 3923 #undef __FUNCT__ 3924 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3925 /*@C 3926 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3927 3928 Collective on Mat 3929 3930 Input Parameters: 3931 + mat - the matrix 3932 . min - number of input rows 3933 . im - input rows 3934 . nin - number of input columns 3935 . in - input columns 3936 . v - numerical values input 3937 - addvin - INSERT_VALUES or ADD_VALUES 3938 3939 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3940 3941 Level: advanced 3942 3943 .seealso: MatSetValuesBlocked() 3944 @*/ 3945 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3946 { 3947 /* convert input arguments to C version */ 3948 Mat mat = *matin; 3949 PetscInt m = *min, n = *nin; 3950 InsertMode addv = *addvin; 3951 3952 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3953 const MatScalar *value; 3954 MatScalar *barray=baij->barray; 3955 PetscBool roworiented = baij->roworiented; 3956 PetscErrorCode ierr; 3957 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3958 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3959 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3960 3961 PetscFunctionBegin; 3962 /* tasks normally handled by MatSetValuesBlocked() */ 3963 if (mat->insertmode == NOT_SET_VALUES) { 3964 mat->insertmode = addv; 3965 } 3966 #if defined(PETSC_USE_DEBUG) 3967 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3968 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3969 #endif 3970 if (mat->assembled) { 3971 mat->was_assembled = PETSC_TRUE; 3972 mat->assembled = PETSC_FALSE; 3973 } 3974 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3975 3976 3977 if (!barray) { 3978 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3979 baij->barray = barray; 3980 } 3981 3982 if (roworiented) { 3983 stepval = (n-1)*bs; 3984 } else { 3985 stepval = (m-1)*bs; 3986 } 3987 for (i=0; i<m; i++) { 3988 if (im[i] < 0) continue; 3989 #if defined(PETSC_USE_DEBUG) 3990 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3991 #endif 3992 if (im[i] >= rstart && im[i] < rend) { 3993 row = im[i] - rstart; 3994 for (j=0; j<n; j++) { 3995 /* If NumCol = 1 then a copy is not required */ 3996 if ((roworiented) && (n == 1)) { 3997 barray = (MatScalar*)v + i*bs2; 3998 } else if ((!roworiented) && (m == 1)) { 3999 barray = (MatScalar*)v + j*bs2; 4000 } else { /* Here a copy is required */ 4001 if (roworiented) { 4002 value = v + i*(stepval+bs)*bs + j*bs; 4003 } else { 4004 value = v + j*(stepval+bs)*bs + i*bs; 4005 } 4006 for (ii=0; ii<bs; ii++,value+=stepval) { 4007 for (jj=0; jj<bs; jj++) { 4008 *barray++ = *value++; 4009 } 4010 } 4011 barray -=bs2; 4012 } 4013 4014 if (in[j] >= cstart && in[j] < cend) { 4015 col = in[j] - cstart; 4016 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4017 } 4018 else if (in[j] < 0) continue; 4019 #if defined(PETSC_USE_DEBUG) 4020 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4021 #endif 4022 else { 4023 if (mat->was_assembled) { 4024 if (!baij->colmap) { 4025 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4026 } 4027 4028 #if defined(PETSC_USE_DEBUG) 4029 #if defined (PETSC_USE_CTABLE) 4030 { PetscInt data; 4031 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4032 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4033 } 4034 #else 4035 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4036 #endif 4037 #endif 4038 #if defined (PETSC_USE_CTABLE) 4039 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4040 col = (col - 1)/bs; 4041 #else 4042 col = (baij->colmap[in[j]] - 1)/bs; 4043 #endif 4044 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4045 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4046 col = in[j]; 4047 } 4048 } 4049 else col = in[j]; 4050 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4051 } 4052 } 4053 } else { 4054 if (!baij->donotstash) { 4055 if (roworiented) { 4056 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4057 } else { 4058 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4059 } 4060 } 4061 } 4062 } 4063 4064 /* task normally handled by MatSetValuesBlocked() */ 4065 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4066 PetscFunctionReturn(0); 4067 } 4068 4069 #undef __FUNCT__ 4070 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4071 /*@ 4072 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4073 CSR format the local rows. 4074 4075 Collective on MPI_Comm 4076 4077 Input Parameters: 4078 + comm - MPI communicator 4079 . bs - the block size, only a block size of 1 is supported 4080 . m - number of local rows (Cannot be PETSC_DECIDE) 4081 . n - This value should be the same as the local size used in creating the 4082 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4083 calculated if N is given) For square matrices n is almost always m. 4084 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4085 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4086 . i - row indices 4087 . j - column indices 4088 - a - matrix values 4089 4090 Output Parameter: 4091 . mat - the matrix 4092 4093 Level: intermediate 4094 4095 Notes: 4096 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4097 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4098 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4099 4100 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4101 4102 .keywords: matrix, aij, compressed row, sparse, parallel 4103 4104 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4105 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4106 @*/ 4107 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4108 { 4109 PetscErrorCode ierr; 4110 4111 PetscFunctionBegin; 4112 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4113 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4114 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4115 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4116 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4117 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4118 PetscFunctionReturn(0); 4119 } 4120