1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.5 1996/02/18 00:40:34 bsmith Exp bsmith $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,&ia,&ja);CHKERRQ(ierr); 41 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 42 43 PetscFree(ia); PetscFree(ja); 44 return 0; 45 } 46 47 /* 48 Adds diagonal pointers to sparse matrix structure. 49 */ 50 51 int MatMarkDiag_SeqBAIJ(Mat A) 52 { 53 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 54 int i,j, *diag, m = a->mbs; 55 56 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 57 PLogObjectMemory(A,(m+1)*sizeof(int)); 58 for ( i=0; i<m; i++ ) { 59 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 60 if (a->j[j] == i) { 61 diag[i] = j; 62 break; 63 } 64 } 65 } 66 a->diag = diag; 67 return 0; 68 } 69 70 #include "draw.h" 71 #include "pinclude/pviewer.h" 72 #include "sysio.h" 73 74 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l; 78 Scalar *aa; 79 80 ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr); 81 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 82 col_lens[0] = MAT_COOKIE; 83 col_lens[1] = a->m; 84 col_lens[2] = a->n; 85 col_lens[3] = a->nz*bs*bs; 86 87 /* store lengths of each row and write (including header) to file */ 88 count = 0; 89 for ( i=0; i<a->mbs; i++ ) { 90 for ( j=0; j<bs; j++ ) { 91 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 92 } 93 } 94 ierr = SYWrite(fd,col_lens,4+a->m,SYINT,1); CHKERRQ(ierr); 95 PetscFree(col_lens); 96 97 /* store column indices (zero start index) */ 98 jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj); 99 count = 0; 100 for ( i=0; i<a->mbs; i++ ) { 101 for ( j=0; j<bs; j++ ) { 102 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 103 for ( l=0; l<bs; l++ ) { 104 jj[count++] = bs*a->j[k] + l; 105 } 106 } 107 } 108 } 109 ierr = SYWrite(fd,jj,bs*bs*a->nz,SYINT,0); CHKERRQ(ierr); 110 PetscFree(jj); 111 112 /* store nonzero values */ 113 aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa); 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 118 for ( l=0; l<bs; l++ ) { 119 aa[count++] = a->a[bs*bs*k + l*bs + j]; 120 } 121 } 122 } 123 } 124 ierr = SYWrite(fd,aa,bs*bs*a->nz,SYSCALAR,0); CHKERRQ(ierr); 125 PetscFree(aa); 126 return 0; 127 } 128 129 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 132 int ierr, i,j,format,bs = a->bs,k,l; 133 FILE *fd; 134 char *outputname; 135 136 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 137 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 138 ierr = ViewerFileGetFormat_Private(viewer,&format); 139 if (format == FILE_FORMAT_INFO) { 140 /* no need to print additional information */ ; 141 } 142 else if (format == FILE_FORMAT_MATLAB) { 143 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 144 } 145 else { 146 for ( i=0; i<a->mbs; i++ ) { 147 for ( j=0; j<bs; j++ ) { 148 fprintf(fd,"row %d:",i*bs+j); 149 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 150 for ( l=0; l<bs; l++ ) { 151 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]); 152 } 153 } 154 fprintf(fd,"\n"); 155 } 156 } 157 } 158 fflush(fd); 159 return 0; 160 } 161 162 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 163 { 164 Mat A = (Mat) obj; 165 PetscObject vobj = (PetscObject) viewer; 166 167 if (!viewer) { 168 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 169 } 170 if (vobj->cookie == VIEWER_COOKIE) { 171 if (vobj->type == MATLAB_VIEWER) { 172 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 173 } 174 else if (vobj->type == ASCII_FILE_VIEWER||vobj->type == ASCII_FILES_VIEWER){ 175 return MatView_SeqBAIJ_ASCII(A,viewer); 176 } 177 else if (vobj->type == BINARY_FILE_VIEWER) { 178 return MatView_SeqBAIJ_Binary(A,viewer); 179 } 180 } 181 else if (vobj->cookie == DRAW_COOKIE) { 182 if (vobj->type == NULLWINDOW) return 0; 183 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 184 } 185 return 0; 186 } 187 188 189 static int MatZeroEntries_SeqBAIJ(Mat A) 190 { 191 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 192 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 193 return 0; 194 } 195 196 int MatDestroy_SeqBAIJ(PetscObject obj) 197 { 198 Mat A = (Mat) obj; 199 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 200 201 #if defined(PETSC_LOG) 202 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 203 #endif 204 PetscFree(a->a); 205 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 206 if (a->diag) PetscFree(a->diag); 207 if (a->ilen) PetscFree(a->ilen); 208 if (a->imax) PetscFree(a->imax); 209 if (a->solve_work) PetscFree(a->solve_work); 210 if (a->mult_work) PetscFree(a->mult_work); 211 PetscFree(a); 212 PLogObjectDestroy(A); 213 PetscHeaderDestroy(A); 214 return 0; 215 } 216 217 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 218 { 219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 220 if (op == ROW_ORIENTED) a->roworiented = 1; 221 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 222 else if (op == COLUMNS_SORTED) a->sorted = 1; 223 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 224 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 225 else if (op == ROWS_SORTED || 226 op == SYMMETRIC_MATRIX || 227 op == STRUCTURALLY_SYMMETRIC_MATRIX || 228 op == YES_NEW_DIAGONALS) 229 PLogInfo((PetscObject)A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 230 else if (op == NO_NEW_DIAGONALS) 231 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 232 else 233 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 234 return 0; 235 } 236 237 238 /* -------------------------------------------------------*/ 239 /* Should check that shapes of vectors and matrices match */ 240 /* -------------------------------------------------------*/ 241 #include "pinclude/plapack.h" 242 243 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 244 { 245 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 246 Scalar *xg,*yg; 247 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 248 register Scalar x1,x2,x3,x4,x5; 249 int mbs = a->mbs, m = a->m, i, *idx,*ii; 250 int bs = a->bs,j,n,bs2 = bs*bs; 251 252 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 253 PetscMemzero(y,m*sizeof(Scalar)); 254 x = x; 255 idx = a->j; 256 v = a->a; 257 ii = a->i; 258 259 switch (bs) { 260 case 1: 261 for ( i=0; i<m; i++ ) { 262 n = ii[1] - ii[0]; ii++; 263 sum = 0.0; 264 while (n--) sum += *v++ * x[*idx++]; 265 y[i] = sum; 266 } 267 break; 268 case 2: 269 for ( i=0; i<mbs; i++ ) { 270 n = ii[1] - ii[0]; ii++; 271 sum1 = 0.0; sum2 = 0.0; 272 for ( j=0; j<n; j++ ) { 273 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 274 sum1 += v[0]*x1 + v[2]*x2; 275 sum2 += v[1]*x1 + v[3]*x2; 276 v += 4; 277 } 278 y[0] += sum1; y[1] += sum2; 279 y += 2; 280 } 281 break; 282 case 3: 283 for ( i=0; i<mbs; i++ ) { 284 n = ii[1] - ii[0]; ii++; 285 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 286 for ( j=0; j<n; j++ ) { 287 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 288 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 289 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 290 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 291 v += 9; 292 } 293 y[0] += sum1; y[1] += sum2; y[2] += sum3; 294 y += 3; 295 } 296 break; 297 case 4: 298 for ( i=0; i<mbs; i++ ) { 299 n = ii[1] - ii[0]; ii++; 300 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 301 for ( j=0; j<n; j++ ) { 302 xb = x + 4*(*idx++); 303 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 304 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 305 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 306 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 307 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 308 v += 16; 309 } 310 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 311 y += 4; 312 } 313 break; 314 case 5: 315 for ( i=0; i<mbs; i++ ) { 316 n = ii[1] - ii[0]; ii++; 317 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 318 for ( j=0; j<n; j++ ) { 319 xb = x + 5*(*idx++); 320 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 321 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 322 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 323 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 324 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 325 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 326 v += 25; 327 } 328 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 329 y += 5; 330 } 331 break; 332 /* block sizes larger then 5 by 5 are handled by BLAS */ 333 default: { 334 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 335 if (!a->mult_work) { 336 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 337 CHKPTRQ(a->mult_work); 338 } 339 work = a->mult_work; 340 for ( i=0; i<mbs; i++ ) { 341 n = ii[1] - ii[0]; ii++; 342 ncols = n*bs; 343 workt = work; 344 for ( j=0; j<n; j++ ) { 345 xb = x + bs*(*idx++); 346 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 347 workt += bs; 348 } 349 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 350 v += n*bs2; 351 y += bs; 352 } 353 } 354 } 355 PLogFlops(2*bs2*a->nz - m); 356 return 0; 357 } 358 359 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 360 { 361 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 362 *nz = a->bs*a->bs*a->nz; 363 *nza = a->maxnz; 364 *mem = (int)A->mem; 365 return 0; 366 } 367 368 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 369 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 370 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 371 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 372 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 373 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 374 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 375 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 376 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 377 378 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 379 { 380 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 381 *m = a->m; *n = a->n; 382 return 0; 383 } 384 385 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 386 { 387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 388 *m = 0; *n = a->m; 389 return 0; 390 } 391 392 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 393 { 394 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 395 Scalar *v = a->a; 396 double sum = 0.0; 397 int i; 398 399 if (type == NORM_FROBENIUS) { 400 for (i=0; i<a->nz; i++ ) { 401 #if defined(PETSC_COMPLEX) 402 sum += real(conj(*v)*(*v)); v++; 403 #else 404 sum += (*v)*(*v); v++; 405 #endif 406 } 407 *norm = sqrt(sum); 408 } 409 else { 410 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 411 } 412 return 0; 413 } 414 415 /* 416 note: This can only work for identity for row and col. It would 417 be good to check this and otherwise generate an error. 418 */ 419 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 420 { 421 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 422 Mat outA; 423 int ierr; 424 425 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 426 427 outA = inA; 428 inA->factor = FACTOR_LU; 429 a->row = row; 430 a->col = col; 431 432 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 433 434 if (!a->diag) { 435 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 436 } 437 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 438 return 0; 439 } 440 441 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 442 { 443 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 444 int one = 1, totalnz = a->bs*a->bs*a->nz; 445 BLscal_( &totalnz, alpha, a->a, &one ); 446 PLogFlops(totalnz); 447 return 0; 448 } 449 450 int MatPrintHelp_SeqBAIJ(Mat A) 451 { 452 static int called = 0; 453 454 if (called) return 0; else called = 1; 455 return 0; 456 } 457 /* -------------------------------------------------------------------*/ 458 static struct _MatOps MatOps = {0, 459 0,0, 460 MatMult_SeqBAIJ,0, 461 0,0, 462 MatSolve_SeqBAIJ,0, 463 0,0, 464 MatLUFactor_SeqBAIJ,0, 465 0, 466 0, 467 MatGetInfo_SeqBAIJ,0, 468 0,0,MatNorm_SeqBAIJ, 469 0,0, 470 0, 471 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 472 MatGetReordering_SeqBAIJ, 473 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 474 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 475 MatILUFactorSymbolic_SeqBAIJ,0, 476 0,0,/*MatConvert_SeqBAIJ*/ 0, 477 0,0, 478 MatConvertSameType_SeqBAIJ,0,0, 479 MatILUFactor_SeqBAIJ,0,0, 480 0,0, 481 0,0, 482 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 483 0}; 484 485 /*@C 486 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 487 (the default parallel PETSc format). For good matrix assembly performance 488 the user should preallocate the matrix storage by setting the parameter nz 489 (or nzz). By setting these parameters accurately, performance can be 490 increased by more than a factor of 50. 491 492 Input Parameters: 493 . comm - MPI communicator, set to MPI_COMM_SELF 494 . bs - size of block 495 . m - number of rows 496 . n - number of columns 497 . nz - number of block nonzeros per block row (same for all rows) 498 . nzz - number of block nonzeros per block row or PETSC_NULL 499 (possibly different for each row) 500 501 Output Parameter: 502 . A - the matrix 503 504 Notes: 505 The BAIJ format, is fully compatible with standard Fortran 77 506 storage. That is, the stored row and column indices can begin at 507 either one (as in Fortran) or zero. See the users manual for details. 508 509 Specify the preallocated storage with either nz or nnz (not both). 510 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 511 allocation. For additional details, see the users manual chapter on 512 matrices and the file $(PETSC_DIR)/Performance. 513 514 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 515 @*/ 516 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 517 { 518 Mat B; 519 Mat_SeqBAIJ *b; 520 int i,len,ierr, flg,mbs = m/bs; 521 522 if (mbs*bs != m) 523 SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize"); 524 525 *A = 0; 526 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 527 PLogObjectCreate(B); 528 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 529 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 530 switch (bs) { 531 case 1: 532 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 533 break; 534 case 2: 535 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 536 break; 537 } 538 B->destroy = MatDestroy_SeqBAIJ; 539 B->view = MatView_SeqBAIJ; 540 B->factor = 0; 541 B->lupivotthreshold = 1.0; 542 b->row = 0; 543 b->col = 0; 544 b->reallocs = 0; 545 546 b->m = m; 547 b->mbs = mbs; 548 b->n = n; 549 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 550 if (nnz == PETSC_NULL) { 551 if (nz == PETSC_DEFAULT) nz = 5; 552 else if (nz <= 0) nz = 1; 553 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 554 nz = nz*mbs; 555 } 556 else { 557 nz = 0; 558 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 559 } 560 561 /* allocate the matrix space */ 562 len = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int); 563 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 564 PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar)); 565 b->j = (int *) (b->a + nz*bs*bs); 566 PetscMemzero(b->j,nz*sizeof(int)); 567 b->i = b->j + nz; 568 b->singlemalloc = 1; 569 570 b->i[0] = 0; 571 for (i=1; i<mbs+1; i++) { 572 b->i[i] = b->i[i-1] + b->imax[i-1]; 573 } 574 575 /* b->ilen will count nonzeros in each block row so far. */ 576 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 577 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 578 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 579 580 b->bs = bs; 581 b->mbs = mbs; 582 b->nz = 0; 583 b->maxnz = nz; 584 b->sorted = 0; 585 b->roworiented = 1; 586 b->nonew = 0; 587 b->diag = 0; 588 b->solve_work = 0; 589 b->mult_work = 0; 590 b->spptr = 0; 591 592 *A = B; 593 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 594 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 595 return 0; 596 } 597 598 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 599 { 600 Mat C; 601 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 602 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz; 603 604 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 605 606 *B = 0; 607 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 608 PLogObjectCreate(C); 609 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 610 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 611 C->destroy = MatDestroy_SeqBAIJ; 612 C->view = MatView_SeqBAIJ; 613 C->factor = A->factor; 614 c->row = 0; 615 c->col = 0; 616 C->assembled = PETSC_TRUE; 617 618 c->m = a->m; 619 c->n = a->n; 620 c->bs = a->bs; 621 c->mbs = a->mbs; 622 c->nbs = a->nbs; 623 624 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 625 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 626 for ( i=0; i<mbs; i++ ) { 627 c->imax[i] = a->imax[i]; 628 c->ilen[i] = a->ilen[i]; 629 } 630 631 /* allocate the matrix space */ 632 c->singlemalloc = 1; 633 len = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int)); 634 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 635 c->j = (int *) (c->a + nz*bs*bs); 636 c->i = c->j + nz; 637 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 638 if (mbs > 0) { 639 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 640 if (cpvalues == COPY_VALUES) { 641 PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar)); 642 } 643 } 644 645 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 646 c->sorted = a->sorted; 647 c->roworiented = a->roworiented; 648 c->nonew = a->nonew; 649 650 if (a->diag) { 651 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 652 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 653 for ( i=0; i<mbs; i++ ) { 654 c->diag[i] = a->diag[i]; 655 } 656 } 657 else c->diag = 0; 658 c->nz = a->nz; 659 c->maxnz = a->maxnz; 660 c->solve_work = 0; 661 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 662 c->mult_work = 0; 663 *B = C; 664 return 0; 665 } 666 667 int MatLoad_SeqBAIJ(Viewer bview,MatType type,Mat *A) 668 { 669 Mat_SeqBAIJ *a; 670 Mat B; 671 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 672 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 673 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 674 int *masked, nmask,tmp,ishift,bs2; 675 Scalar *aa; 676 PetscObject vobj = (PetscObject) bview; 677 MPI_Comm comm = vobj->comm; 678 679 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 680 bs2 = bs*bs; 681 682 MPI_Comm_size(comm,&size); 683 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 684 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 685 ierr = SYRead(fd,header,4,SYINT); CHKERRQ(ierr); 686 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 687 M = header[1]; N = header[2]; nz = header[3]; 688 689 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 690 691 /* 692 This code adds extra rows to make sure the number of rows is 693 divisible by the blocksize 694 */ 695 mbs = M/bs; 696 extra_rows = bs - M + bs*(mbs); 697 if (extra_rows == bs) extra_rows = 0; 698 else mbs++; 699 if (extra_rows) { 700 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 701 } 702 703 /* read in row lengths */ 704 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 705 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 706 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 707 708 /* read in column indices */ 709 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 710 ierr = SYRead(fd,jj,nz,SYINT); CHKERRQ(ierr); 711 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 712 713 /* loop over row lengths determining block row lengths */ 714 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 715 PetscMemzero(browlengths,mbs*sizeof(int)); 716 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 717 PetscMemzero(mask,mbs*sizeof(int)); 718 masked = mask + mbs; 719 rowcount = 0; nzcount = 0; 720 for ( i=0; i<mbs; i++ ) { 721 nmask = 0; 722 for ( j=0; j<bs; j++ ) { 723 kmax = rowlengths[rowcount]; 724 for ( k=0; k<kmax; k++ ) { 725 tmp = jj[nzcount++]/bs; 726 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 727 } 728 rowcount++; 729 } 730 browlengths[i] += nmask; 731 /* zero out the mask elements we set */ 732 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 733 } 734 735 /* create our matrix */ 736 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 737 CHKERRQ(ierr); 738 B = *A; 739 a = (Mat_SeqBAIJ *) B->data; 740 741 /* set matrix "i" values */ 742 a->i[0] = 0; 743 for ( i=1; i<= mbs; i++ ) { 744 a->i[i] = a->i[i-1] + browlengths[i-1]; 745 a->ilen[i-1] = browlengths[i-1]; 746 } 747 a->nz = 0; 748 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 749 750 /* read in nonzero values */ 751 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 752 ierr = SYRead(fd,aa,nz,SYSCALAR); CHKERRQ(ierr); 753 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 754 755 /* set "a" and "j" values into matrix */ 756 nzcount = 0; jcount = 0; 757 for ( i=0; i<mbs; i++ ) { 758 nzcountb = nzcount; 759 nmask = 0; 760 for ( j=0; j<bs; j++ ) { 761 kmax = rowlengths[i*bs+j]; 762 for ( k=0; k<kmax; k++ ) { 763 tmp = jj[nzcount++]/bs; 764 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 765 } 766 rowcount++; 767 } 768 /* sort the masked values */ 769 SYIsort(nmask,masked); 770 771 /* set "j" values into matrix */ 772 maskcount = 1; 773 for ( j=0; j<nmask; j++ ) { 774 a->j[jcount++] = masked[j]; 775 mask[masked[j]] = maskcount++; 776 } 777 /* set "a" values into matrix */ 778 ishift = bs2*a->i[i]; 779 for ( j=0; j<bs; j++ ) { 780 kmax = rowlengths[i*bs+j]; 781 for ( k=0; k<kmax; k++ ) { 782 tmp = jj[nzcountb]/bs ; 783 block = mask[tmp] - 1; 784 point = jj[nzcountb] - bs*tmp; 785 idx = ishift + bs2*block + j + bs*point; 786 a->a[idx] = aa[nzcountb++]; 787 } 788 } 789 /* zero out the mask elements we set */ 790 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 791 } 792 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 793 794 PetscFree(rowlengths); 795 PetscFree(browlengths); 796 PetscFree(aa); 797 PetscFree(jj); 798 PetscFree(mask); 799 800 B->assembled = PETSC_TRUE; 801 802 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 803 if (flg) { 804 Viewer viewer; 805 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 806 ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO,0);CHKERRQ(ierr); 807 ierr = MatView(B,viewer); CHKERRQ(ierr); 808 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 809 } 810 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 811 if (flg) { 812 Viewer viewer; 813 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 814 ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 815 ierr = MatView(B,viewer); CHKERRQ(ierr); 816 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 817 } 818 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 819 if (flg) { 820 Viewer viewer; 821 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 822 ierr = MatView(B,viewer); CHKERRQ(ierr); 823 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 824 } 825 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 826 if (flg) { 827 Viewer viewer; 828 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 829 ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_MATLAB,"M");CHKERRQ(ierr); 830 ierr = MatView(B,viewer); CHKERRQ(ierr); 831 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 832 } 833 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 834 if (flg) { 835 Draw win; 836 ierr = DrawOpenX(B->comm,0,0,0,0,300,300,&win); CHKERRQ(ierr); 837 ierr = MatView(B,(Viewer)win); CHKERRQ(ierr); 838 ierr = DrawSyncFlush(win); CHKERRQ(ierr); 839 ierr = DrawDestroy(win); CHKERRQ(ierr); 840 } 841 return 0; 842 } 843 844 845 846