1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.1 1996/02/09 21:28:31 bsmith Exp bsmith $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n,*idx,i; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 n = a->n; 29 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 30 for ( i=0; i<n; i++ ) idx[i] = i; 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 32 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 33 PetscFree(idx); 34 ISSetPermutation(*rperm); 35 ISSetPermutation(*cperm); 36 ISSetIdentity(*rperm); 37 ISSetIdentity(*cperm); 38 return 0; 39 } 40 41 ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,a->indexshift,&ia,&ja);CHKERRQ(ierr); 42 ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 43 44 PetscFree(ia); PetscFree(ja); 45 return 0; 46 } 47 48 49 #include "draw.h" 50 #include "pinclude/pviewer.h" 51 #include "sysio.h" 52 53 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 54 { 55 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 56 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l; 57 Scalar *aa; 58 59 ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr); 60 col_lens = (int *) PetscMalloc( (4+a->m)*sizeof(int) ); CHKPTRQ(col_lens); 61 col_lens[0] = MAT_COOKIE; 62 col_lens[1] = a->m; 63 col_lens[2] = a->n; 64 col_lens[3] = a->nz*bs*bs; 65 66 /* store lengths of each row and write (including header) to file */ 67 count = 0; 68 for ( i=0; i<a->mbs; i++ ) { 69 for ( j=0; j<bs; j++ ) { 70 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 71 } 72 } 73 ierr = SYWrite(fd,col_lens,4+a->m,SYINT,1); CHKERRQ(ierr); 74 PetscFree(col_lens); 75 76 /* store column indices (zero start index) */ 77 jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj); 78 count = 0; 79 if (!a->indexshift) { 80 for ( i=0; i<a->mbs; i++ ) { 81 for ( j=0; j<bs; j++ ) { 82 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 83 for ( l=0; l<bs; l++ ) { 84 jj[count++] = bs*a->j[k] + l; 85 /* printf(" count %d jj %d row %d\n",count-1,jj[count-1],bs*i+j);*/ 86 } 87 } 88 } 89 } 90 } else { 91 SETERRQ(1,"Not yet done"); 92 } 93 ierr = SYWrite(fd,jj,bs*bs*a->nz,SYINT,0); CHKERRQ(ierr); 94 PetscFree(jj); 95 96 /* store nonzero values */ 97 aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa); 98 count = 0; 99 for ( i=0; i<a->mbs; i++ ) { 100 for ( j=0; j<bs; j++ ) { 101 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 102 for ( l=0; l<bs; l++ ) { 103 aa[count++] = a->a[bs*bs*k + l*bs + j]; 104 } 105 } 106 } 107 } 108 ierr = SYWrite(fd,aa,bs*bs*a->nz,SYSCALAR,0); CHKERRQ(ierr); 109 PetscFree(aa); 110 return 0; 111 } 112 113 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 114 { 115 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 116 int ierr, i,j, shift = a->indexshift,format,bs = a->bs,k,l; 117 FILE *fd; 118 char *outputname; 119 120 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 121 ierr = ViewerFileGetOutputname_Private(viewer,&outputname); CHKERRQ(ierr); 122 ierr = ViewerFileGetFormat_Private(viewer,&format); 123 if (format == FILE_FORMAT_INFO) { 124 /* no need to print additional information */ ; 125 } 126 else if (format == FILE_FORMAT_MATLAB) { 127 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 128 } 129 else { 130 for ( i=0; i<a->mbs; i++ ) { 131 for ( j=0; j<bs; j++ ) { 132 fprintf(fd,"row %d:",i*bs+j); 133 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 134 for ( l=0; l<bs; l++ ) { 135 fprintf(fd," %d %g",bs*a->j[k]+l-shift,a->a[bs*bs*k + l*bs + j]); 136 } 137 } 138 fprintf(fd,"\n"); 139 } 140 } 141 } 142 fflush(fd); 143 return 0; 144 } 145 146 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 147 { 148 Mat A = (Mat) obj; 149 PetscObject vobj = (PetscObject) viewer; 150 151 if (!viewer) { 152 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 153 } 154 if (vobj->cookie == VIEWER_COOKIE) { 155 if (vobj->type == MATLAB_VIEWER) { 156 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 157 } 158 else if (vobj->type == ASCII_FILE_VIEWER||vobj->type == ASCII_FILES_VIEWER){ 159 return MatView_SeqBAIJ_ASCII(A,viewer); 160 } 161 else if (vobj->type == BINARY_FILE_VIEWER) { 162 return MatView_SeqBAIJ_Binary(A,viewer); 163 } 164 } 165 else if (vobj->cookie == DRAW_COOKIE) { 166 if (vobj->type == NULLWINDOW) return 0; 167 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 168 } 169 return 0; 170 } 171 172 173 static int MatZeroEntries_SeqBAIJ(Mat A) 174 { 175 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 176 PetscMemzero(a->a,a->bs*a->bs*(a->i[a->mbs]+a->indexshift)*sizeof(Scalar)); 177 return 0; 178 } 179 180 int MatDestroy_SeqBAIJ(PetscObject obj) 181 { 182 Mat A = (Mat) obj; 183 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 184 185 #if defined(PETSC_LOG) 186 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 187 #endif 188 PetscFree(a->a); 189 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 190 if (a->diag) PetscFree(a->diag); 191 if (a->ilen) PetscFree(a->ilen); 192 if (a->imax) PetscFree(a->imax); 193 if (a->solve_work) PetscFree(a->solve_work); 194 PetscFree(a); 195 PLogObjectDestroy(A); 196 PetscHeaderDestroy(A); 197 return 0; 198 } 199 200 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 201 { 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 203 if (op == ROW_ORIENTED) a->roworiented = 1; 204 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 205 else if (op == COLUMNS_SORTED) a->sorted = 1; 206 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 207 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 208 else if (op == ROWS_SORTED || 209 op == SYMMETRIC_MATRIX || 210 op == STRUCTURALLY_SYMMETRIC_MATRIX || 211 op == YES_NEW_DIAGONALS) 212 PLogInfo((PetscObject)A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 213 else if (op == NO_NEW_DIAGONALS) 214 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 215 else 216 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 217 return 0; 218 } 219 220 221 /* -------------------------------------------------------*/ 222 /* Should check that shapes of vectors and matrices match */ 223 /* -------------------------------------------------------*/ 224 #include "pinclude/plapack.h" 225 226 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 227 { 228 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 229 Scalar *xg,*yg; 230 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 231 register Scalar x1,x2,x3,x4,x5; 232 int mbs = a->mbs, m = a->m, i, *idx,*ii; 233 int bs = a->bs,j,n,bs2 = bs*bs; 234 235 if (a->indexshift) SETERRQ(PETSC_ERR_SUP,"MatMult_SeqBAIJ:index shift no"); 236 237 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 238 PetscMemzero(y,m*sizeof(Scalar)); 239 x = x; 240 idx = a->j; 241 v = a->a; 242 ii = a->i; 243 244 switch (bs) { 245 case 1: 246 for ( i=0; i<m; i++ ) { 247 n = ii[1] - ii[0]; ii++; 248 sum = 0.0; 249 while (n--) sum += *v++ * x[*idx++]; 250 y[i] = sum; 251 } 252 break; 253 case 2: 254 for ( i=0; i<mbs; i++ ) { 255 n = ii[1] - ii[0]; ii++; /* number of blocks in row */ 256 sum1 = 0.0; sum2 = 0.0; 257 for ( j=0; j<n; j++ ) { 258 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 259 sum1 += v[0]*x1 + v[2]*x2; 260 sum2 += v[1]*x1 + v[3]*x2; 261 v += 4; 262 } 263 y[0] += sum1; y[1] += sum2; 264 y += 2; 265 } 266 break; 267 case 3: 268 for ( i=0; i<mbs; i++ ) { 269 n = ii[1] - ii[0]; ii++; /* number of blocks in row */ 270 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 271 for ( j=0; j<n; j++ ) { 272 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 273 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 274 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 275 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 276 v += 9; 277 } 278 y[0] += sum1; y[1] += sum2; y[2] += sum3; 279 y += 3; 280 } 281 break; 282 case 4: 283 for ( i=0; i<mbs; i++ ) { 284 n = ii[1] - ii[0]; ii++; /* number of blocks in row */ 285 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 286 for ( j=0; j<n; j++ ) { 287 xb = x + 4*(*idx++); 288 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 289 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 290 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 291 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 292 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 293 v += 16; 294 } 295 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 296 y += 4; 297 } 298 break; 299 case 5: 300 for ( i=0; i<mbs; i++ ) { 301 n = ii[1] - ii[0]; ii++; /* number of blocks in row */ 302 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 303 for ( j=0; j<n; j++ ) { 304 xb = x + 5*(*idx++); 305 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 306 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 307 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 308 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 309 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 310 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 311 v += 25; 312 } 313 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 314 y += 5; 315 } 316 break; 317 /* block sizes larger then 5 by 5 are handled by BLAS */ 318 default: { 319 int _One = 1; Scalar _DOne = 1.0; 320 for ( i=0; i<mbs; i++ ) { 321 n = ii[1] - ii[0]; ii++; /* number of blocks in row */ 322 for ( j=0; j<n; j++ ) { 323 xb = x + bs*(*idx++); 324 /* do dense mat-vec product */ 325 LAgemv_("N",&bs,&bs,&_DOne,v,&bs,xb,&_One,&_DOne,y,&_One); 326 v += bs2; 327 } 328 y += bs; 329 } 330 } 331 } 332 PLogFlops(2*bs2*a->nz - m); 333 return 0; 334 } 335 336 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nzalloc,int *mem) 337 { 338 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 339 *nz = a->nz; 340 *nzalloc = a->maxnz; 341 *mem = (int)A->mem; 342 return 0; 343 } 344 345 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 346 extern int MatLUFactorNumeric_SeqBAIJ(Mat,Mat*); 347 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 348 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 349 350 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 351 { 352 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 353 *m = a->m; *n = a->n; 354 return 0; 355 } 356 357 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 358 { 359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 360 *m = 0; *n = a->m; 361 return 0; 362 } 363 364 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 365 { 366 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 367 Scalar *v = a->a; 368 double sum = 0.0; 369 int i; 370 371 if (type == NORM_FROBENIUS) { 372 for (i=0; i<a->nz; i++ ) { 373 #if defined(PETSC_COMPLEX) 374 sum += real(conj(*v)*(*v)); v++; 375 #else 376 sum += (*v)*(*v); v++; 377 #endif 378 } 379 *norm = sqrt(sum); 380 } 381 else { 382 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 383 } 384 return 0; 385 } 386 387 388 /* 389 note: This can only work for identity for row and col. It would 390 be good to check this and otherwise generate an error. 391 */ 392 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 393 { 394 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 395 int ierr; 396 Mat outA; 397 398 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 399 400 outA = inA; 401 inA->factor = FACTOR_LU; 402 a->row = row; 403 a->col = col; 404 405 a->solve_work = (Scalar *) PetscMalloc((a->m+1)*sizeof(Scalar));CHKPTRQ(a->solve_work); 406 407 /* 408 if (!a->diag) { 409 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 410 } 411 ierr = MatLUFactorNumeric_SeqBAIJ(inA,&outA); CHKERRQ(ierr); 412 */ 413 return 0; 414 } 415 416 #include "pinclude/plapack.h" 417 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 418 { 419 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 420 int one = 1, totalnz = a->bs*a->bs*a->nz; 421 BLscal_( &totalnz, alpha, a->a, &one ); 422 PLogFlops(totalnz); 423 return 0; 424 } 425 426 int MatPrintHelp_SeqBAIJ(Mat A) 427 { 428 static int called = 0; 429 MPI_Comm comm = A->comm; 430 431 if (called) return 0; else called = 1; 432 MPIU_printf(comm," Options for MATSeqBAIJ and MATMPIAIJ matrix formats (the defaults):\n"); 433 MPIU_printf(comm," -mat_lu_pivotthreshold <threshold>\n"); 434 MPIU_printf(comm," -mat_aij_oneindex - internal indices begin at 1 instead of the default 0.\n"); 435 return 0; 436 } 437 /* -------------------------------------------------------------------*/ 438 static struct _MatOps MatOps = {0, 439 0,0, 440 MatMult_SeqBAIJ,0, 441 0,0, 442 /*MatSolve_SeqBAIJ*/ 0,0, 443 0,0, 444 /*MatLUFactor_SeqBAIJ*/0,0, 445 0, 446 0, 447 MatGetInfo_SeqBAIJ,0, 448 0,0,MatNorm_SeqBAIJ, 449 0,0, 450 0, 451 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 452 MatGetReordering_SeqBAIJ, 453 /*MatLUFactorSymbolic_SeqBAIJ */0,/* MatLUFactorNumeric_SeqBAIJ*/ 0,0,0, 454 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 455 /* MatILUFactorSymbolic_SeqBAIJ */ 0,0, 456 0,0,/*MatConvert_SeqBAIJ*/ 0, 457 0,0, 458 MatConvertSameType_SeqBAIJ,0,0, 459 MatILUFactor_SeqBAIJ,0,0, 460 0,0, 461 0,0, 462 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 463 0}; 464 465 /*@C 466 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 467 (the default parallel PETSc format). For good matrix assembly performance 468 the user should preallocate the matrix storage by setting the parameter nz 469 (or nzz). By setting these parameters accurately, performance can be 470 increased by more than a factor of 50. 471 472 Input Parameters: 473 . comm - MPI communicator, set to MPI_COMM_SELF 474 . bs - size of block 475 . m - number of rows 476 . n - number of columns 477 . nz - number of block nonzeros per block row (same for all rows) 478 . nzz - number of block nonzeros per block row or PETSC_NULL 479 (possibly different for each row) 480 481 Output Parameter: 482 . A - the matrix 483 484 Notes: 485 The BAIJ format, is fully compatible with standard Fortran 77 486 storage. That is, the stored row and column indices can begin at 487 either one (as in Fortran) or zero. See the users manual for details. 488 489 Specify the preallocated storage with either nz or nnz (not both). 490 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 491 allocation. For additional details, see the users manual chapter on 492 matrices and the file $(PETSC_DIR)/Performance. 493 494 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 495 @*/ 496 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 497 { 498 Mat B; 499 Mat_SeqBAIJ *b; 500 int i,len,ierr, flg,mbs = m/bs; 501 502 if (mbs*bs != m) 503 SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize"); 504 505 *A = 0; 506 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 507 PLogObjectCreate(B); 508 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 509 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 510 B->destroy = MatDestroy_SeqBAIJ; 511 B->view = MatView_SeqBAIJ; 512 B->factor = 0; 513 B->lupivotthreshold = 1.0; 514 ierr = OptionsGetDouble(PETSC_NULL,"-mat_lu_pivotthreshold",&B->lupivotthreshold, \ 515 &flg); CHKERRQ(ierr); 516 b->row = 0; 517 b->col = 0; 518 b->indexshift = 0; 519 b->reallocs = 0; 520 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_oneindex", &flg); CHKERRQ(ierr); 521 if (flg) b->indexshift = -1; 522 523 b->m = m; 524 b->mbs = mbs; 525 b->n = n; 526 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 527 if (nnz == PETSC_NULL) { 528 if (nz == PETSC_DEFAULT) nz = 10; 529 else if (nz <= 0) nz = 1; 530 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 531 nz = nz*mbs; 532 } 533 else { 534 nz = 0; 535 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 536 } 537 538 /* allocate the matrix space */ 539 len = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int); 540 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 541 PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar)); 542 b->j = (int *) (b->a + nz*bs*bs); 543 PetscMemzero(b->j,nz*sizeof(int)); 544 b->i = b->j + nz; 545 b->singlemalloc = 1; 546 547 b->i[0] = -b->indexshift; 548 for (i=1; i<mbs+1; i++) { 549 b->i[i] = b->i[i-1] + b->imax[i-1]; 550 } 551 552 /* b->ilen will count nonzeros in each block row so far. */ 553 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 554 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 555 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 556 557 b->bs = bs; 558 b->mbs = mbs; 559 b->nz = 0; 560 b->maxnz = nz; 561 b->sorted = 0; 562 b->roworiented = 1; 563 b->nonew = 0; 564 b->diag = 0; 565 b->solve_work = 0; 566 b->spptr = 0; 567 568 *A = B; 569 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 570 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 571 return 0; 572 } 573 574 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 575 { 576 Mat C; 577 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 578 int i,len, shift = a->indexshift, mbs = a->mbs, bs = a->bs; 579 580 *B = 0; 581 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 582 PLogObjectCreate(C); 583 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 584 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 585 C->destroy = MatDestroy_SeqBAIJ; 586 C->view = MatView_SeqBAIJ; 587 C->factor = A->factor; 588 c->row = 0; 589 c->col = 0; 590 c->indexshift = shift; 591 C->assembled = PETSC_TRUE; 592 593 c->m = a->m; 594 c->n = a->n; 595 c->bs = a->bs; 596 c->mbs = a->mbs; 597 c->nbs = a->nbs; 598 599 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 600 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 601 for ( i=0; i<mbs; i++ ) { 602 c->imax[i] = a->imax[i]; 603 c->ilen[i] = a->ilen[i]; 604 } 605 606 /* allocate the matrix space */ 607 c->singlemalloc = 1; 608 len = (mbs+1)*sizeof(int)+(a->i[mbs])*(bs*bs*sizeof(Scalar)+sizeof(int)); 609 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 610 c->j = (int *) (c->a + a->i[mbs]*bs*bs + shift); 611 c->i = c->j + a->i[mbs] + shift; 612 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 613 if (mbs > 0) { 614 PetscMemcpy(c->j,a->j,(a->i[mbs]+shift)*sizeof(int)); 615 if (cpvalues == COPY_VALUES) { 616 PetscMemcpy(c->a,a->a,(bs*bs*a->i[mbs]+shift)*sizeof(Scalar)); 617 } 618 } 619 620 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 621 c->sorted = a->sorted; 622 c->roworiented = a->roworiented; 623 c->nonew = a->nonew; 624 625 if (a->diag) { 626 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 627 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 628 for ( i=0; i<mbs; i++ ) { 629 c->diag[i] = a->diag[i]; 630 } 631 } 632 else c->diag = 0; 633 c->nz = a->nz; 634 c->maxnz = a->maxnz; 635 c->solve_work = 0; 636 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 637 638 *B = C; 639 return 0; 640 } 641 642 int MatLoad_SeqBAIJ(Viewer bview,MatType type,Mat *A) 643 { 644 Mat_SeqBAIJ *a; 645 Mat B; 646 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,shift,bs=1,flg; 647 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 648 int kmax,jcount,block,idx,point,nzcountb; 649 Scalar *aa; 650 PetscObject vobj = (PetscObject) bview; 651 MPI_Comm comm = vobj->comm; 652 653 ierr = OptionsGetInt(PETSC_NULL,"-mat_baij_bs",&bs,&flg);CHKERRQ(ierr); 654 655 MPI_Comm_size(comm,&size); 656 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 657 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 658 ierr = SYRead(fd,header,4,SYINT); CHKERRQ(ierr); 659 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object in file"); 660 M = header[1]; N = header[2]; nz = header[3]; 661 662 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 663 mbs = M/bs; 664 if (bs*mbs != M) SETERRQ(1,"MatLoad_SeqBAIJ:Rows not divisble by blocksize"); 665 666 /* read in row lengths */ 667 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 668 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 669 670 /* read in column indices */ 671 jj = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(jj); 672 ierr = SYRead(fd,jj,nz,SYINT); CHKERRQ(ierr); 673 674 /* loop over row lengths determining block row lengths */ 675 browlengths = (int *) PetscMalloc( mbs*sizeof(int) );CHKPTRQ(browlengths); 676 PetscMemzero(browlengths,mbs*sizeof(int)); 677 mask = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(mask); 678 rowcount = 0; nzcount = 0; 679 for ( i=0; i<mbs; i++ ) { 680 PetscMemzero(mask,mbs*sizeof(int)); 681 for ( j=0; j<bs; j++ ) { 682 kmax = rowlengths[rowcount]; 683 for ( k=0; k<kmax; k++ ) { 684 mask[jj[nzcount++]/bs] = 1; 685 } 686 rowcount++; 687 } 688 for ( j=0; j<mbs; j++ ) { 689 if (mask[j]) browlengths[i]++; 690 } 691 } 692 693 /* create our matrix */ 694 ierr = MatCreateSeqBAIJ(comm,bs,M,N,0,browlengths,A); CHKERRQ(ierr); 695 B = *A; 696 a = (Mat_SeqBAIJ *) B->data; 697 shift = a->indexshift; 698 699 /* set matrix "i" values */ 700 a->i[0] = -shift; 701 for ( i=1; i<= mbs; i++ ) { 702 a->i[i] = a->i[i-1] + browlengths[i-1]; 703 a->ilen[i-1] = browlengths[i-1]; 704 } 705 a->nz = 0; 706 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 707 708 /* read in nonzero values */ 709 aa = (Scalar *) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(aa); 710 ierr = SYRead(fd,aa,nz,SYSCALAR); CHKERRQ(ierr); 711 712 /* set "a" and "j" values into matrix */ 713 nzcount = 0; jcount = 0; 714 for ( i=0; i<mbs; i++ ) { 715 nzcountb = nzcount; 716 PetscMemzero(mask,mbs*sizeof(int)); 717 for ( j=0; j<bs; j++ ) { 718 kmax = rowlengths[i*bs+j]; 719 for ( k=0; k<kmax; k++ ) { 720 mask[jj[nzcount++]/bs] = 1; 721 } 722 rowcount++; 723 } 724 /* set "j" values into matrix */ 725 maskcount = 1; 726 for ( j=0; j<mbs; j++ ) { 727 if (mask[j]) { 728 a->j[jcount++] = j; 729 mask[j] = maskcount++; /* what nonzero block in this row is j */ 730 } 731 } 732 /* set "a" values into matrix */ 733 for ( j=0; j<bs; j++ ) { 734 kmax = rowlengths[i*bs+j]; 735 for ( k=0; k<kmax; k++ ) { 736 block = mask[jj[nzcountb]/bs] - 1; 737 point = jj[nzcountb] - bs*(jj[nzcountb]/bs); 738 idx = bs*bs*(a->i[i] + block) + j + bs*(point); 739 /* printf("block row %d subrow %d cold %d block %d idx %d val %g a->i[i] %d point %d\n",i,j,k,block,idx, 740 aa[nzcountb],a->i[i],point); */ 741 a->a[idx] = aa[nzcountb++]; 742 } 743 } 744 } 745 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Bad new"); 746 747 PetscFree(rowlengths); 748 PetscFree(browlengths); 749 PetscFree(aa); 750 PetscFree(jj); 751 PetscFree(mask); 752 753 B->assembled = PETSC_TRUE; 754 755 /* MatView(*A,STDOUT_VIEWER_SELF); */ 756 return 0; 757 } 758 759 760 761