1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.8 1996/02/23 22:03:48 bsmith Exp bsmith $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,&ia,&ja);CHKERRQ(ierr); 41 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 42 43 PetscFree(ia); PetscFree(ja); 44 return 0; 45 } 46 47 /* 48 Adds diagonal pointers to sparse matrix structure. 49 */ 50 51 int MatMarkDiag_SeqBAIJ(Mat A) 52 { 53 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 54 int i,j, *diag, m = a->mbs; 55 56 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 57 PLogObjectMemory(A,(m+1)*sizeof(int)); 58 for ( i=0; i<m; i++ ) { 59 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 60 if (a->j[j] == i) { 61 diag[i] = j; 62 break; 63 } 64 } 65 } 66 a->diag = diag; 67 return 0; 68 } 69 70 #include "draw.h" 71 #include "pinclude/pviewer.h" 72 #include "sysio.h" 73 74 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l; 78 Scalar *aa; 79 80 ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr); 81 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 82 col_lens[0] = MAT_COOKIE; 83 col_lens[1] = a->m; 84 col_lens[2] = a->n; 85 col_lens[3] = a->nz*bs*bs; 86 87 /* store lengths of each row and write (including header) to file */ 88 count = 0; 89 for ( i=0; i<a->mbs; i++ ) { 90 for ( j=0; j<bs; j++ ) { 91 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 92 } 93 } 94 ierr = SYWrite(fd,col_lens,4+a->m,SYINT,1); CHKERRQ(ierr); 95 PetscFree(col_lens); 96 97 /* store column indices (zero start index) */ 98 jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj); 99 count = 0; 100 for ( i=0; i<a->mbs; i++ ) { 101 for ( j=0; j<bs; j++ ) { 102 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 103 for ( l=0; l<bs; l++ ) { 104 jj[count++] = bs*a->j[k] + l; 105 } 106 } 107 } 108 } 109 ierr = SYWrite(fd,jj,bs*bs*a->nz,SYINT,0); CHKERRQ(ierr); 110 PetscFree(jj); 111 112 /* store nonzero values */ 113 aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa); 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 118 for ( l=0; l<bs; l++ ) { 119 aa[count++] = a->a[bs*bs*k + l*bs + j]; 120 } 121 } 122 } 123 } 124 ierr = SYWrite(fd,aa,bs*bs*a->nz,SYSCALAR,0); CHKERRQ(ierr); 125 PetscFree(aa); 126 return 0; 127 } 128 129 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 132 int ierr, i,j,format,bs = a->bs,k,l; 133 FILE *fd; 134 char *outputname; 135 136 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 137 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 138 ierr = ViewerFileGetFormat_Private(viewer,&format); 139 if (format == FILE_FORMAT_INFO) { 140 /* no need to print additional information */ ; 141 } 142 else if (format == FILE_FORMAT_MATLAB) { 143 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 144 } 145 else { 146 for ( i=0; i<a->mbs; i++ ) { 147 for ( j=0; j<bs; j++ ) { 148 fprintf(fd,"row %d:",i*bs+j); 149 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 150 for ( l=0; l<bs; l++ ) { 151 #if defined(PETSC_COMPLEX) 152 if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) { 153 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 154 real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j])); 155 } 156 else { 157 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j])); 158 } 159 #else 160 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]); 161 #endif 162 } 163 } 164 fprintf(fd,"\n"); 165 } 166 } 167 } 168 fflush(fd); 169 return 0; 170 } 171 172 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 173 { 174 Mat A = (Mat) obj; 175 PetscObject vobj = (PetscObject) viewer; 176 177 if (!viewer) { 178 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 179 } 180 if (vobj->cookie == VIEWER_COOKIE) { 181 if (vobj->type == MATLAB_VIEWER) { 182 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 183 } 184 else if (vobj->type == ASCII_FILE_VIEWER||vobj->type == ASCII_FILES_VIEWER){ 185 return MatView_SeqBAIJ_ASCII(A,viewer); 186 } 187 else if (vobj->type == BINARY_FILE_VIEWER) { 188 return MatView_SeqBAIJ_Binary(A,viewer); 189 } 190 } 191 else if (vobj->cookie == DRAW_COOKIE) { 192 if (vobj->type == NULLWINDOW) return 0; 193 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 194 } 195 return 0; 196 } 197 198 199 static int MatZeroEntries_SeqBAIJ(Mat A) 200 { 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 202 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 203 return 0; 204 } 205 206 int MatDestroy_SeqBAIJ(PetscObject obj) 207 { 208 Mat A = (Mat) obj; 209 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 210 211 #if defined(PETSC_LOG) 212 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 213 #endif 214 PetscFree(a->a); 215 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 216 if (a->diag) PetscFree(a->diag); 217 if (a->ilen) PetscFree(a->ilen); 218 if (a->imax) PetscFree(a->imax); 219 if (a->solve_work) PetscFree(a->solve_work); 220 if (a->mult_work) PetscFree(a->mult_work); 221 PetscFree(a); 222 PLogObjectDestroy(A); 223 PetscHeaderDestroy(A); 224 return 0; 225 } 226 227 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 228 { 229 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 230 if (op == ROW_ORIENTED) a->roworiented = 1; 231 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 232 else if (op == COLUMNS_SORTED) a->sorted = 1; 233 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 234 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 235 else if (op == ROWS_SORTED || 236 op == SYMMETRIC_MATRIX || 237 op == STRUCTURALLY_SYMMETRIC_MATRIX || 238 op == YES_NEW_DIAGONALS) 239 PLogInfo((PetscObject)A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 240 else if (op == NO_NEW_DIAGONALS) 241 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 242 else 243 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 244 return 0; 245 } 246 247 248 /* -------------------------------------------------------*/ 249 /* Should check that shapes of vectors and matrices match */ 250 /* -------------------------------------------------------*/ 251 #include "pinclude/plapack.h" 252 253 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 254 { 255 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 256 Scalar *xg,*yg; 257 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 258 register Scalar x1,x2,x3,x4,x5; 259 int mbs = a->mbs, m = a->m, i, *idx,*ii; 260 int bs = a->bs,j,n,bs2 = bs*bs; 261 262 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 263 PetscMemzero(y,m*sizeof(Scalar)); 264 x = x; 265 idx = a->j; 266 v = a->a; 267 ii = a->i; 268 269 switch (bs) { 270 case 1: 271 for ( i=0; i<m; i++ ) { 272 n = ii[1] - ii[0]; ii++; 273 sum = 0.0; 274 while (n--) sum += *v++ * x[*idx++]; 275 y[i] = sum; 276 } 277 break; 278 case 2: 279 for ( i=0; i<mbs; i++ ) { 280 n = ii[1] - ii[0]; ii++; 281 sum1 = 0.0; sum2 = 0.0; 282 for ( j=0; j<n; j++ ) { 283 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 284 sum1 += v[0]*x1 + v[2]*x2; 285 sum2 += v[1]*x1 + v[3]*x2; 286 v += 4; 287 } 288 y[0] += sum1; y[1] += sum2; 289 y += 2; 290 } 291 break; 292 case 3: 293 for ( i=0; i<mbs; i++ ) { 294 n = ii[1] - ii[0]; ii++; 295 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 296 for ( j=0; j<n; j++ ) { 297 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 298 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 299 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 300 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 301 v += 9; 302 } 303 y[0] += sum1; y[1] += sum2; y[2] += sum3; 304 y += 3; 305 } 306 break; 307 case 4: 308 for ( i=0; i<mbs; i++ ) { 309 n = ii[1] - ii[0]; ii++; 310 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 311 for ( j=0; j<n; j++ ) { 312 xb = x + 4*(*idx++); 313 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 314 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 315 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 316 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 317 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 318 v += 16; 319 } 320 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 321 y += 4; 322 } 323 break; 324 case 5: 325 for ( i=0; i<mbs; i++ ) { 326 n = ii[1] - ii[0]; ii++; 327 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 328 for ( j=0; j<n; j++ ) { 329 xb = x + 5*(*idx++); 330 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 331 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 332 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 333 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 334 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 335 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 336 v += 25; 337 } 338 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 339 y += 5; 340 } 341 break; 342 /* block sizes larger then 5 by 5 are handled by BLAS */ 343 default: { 344 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 345 if (!a->mult_work) { 346 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 347 CHKPTRQ(a->mult_work); 348 } 349 work = a->mult_work; 350 for ( i=0; i<mbs; i++ ) { 351 n = ii[1] - ii[0]; ii++; 352 ncols = n*bs; 353 workt = work; 354 for ( j=0; j<n; j++ ) { 355 xb = x + bs*(*idx++); 356 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 357 workt += bs; 358 } 359 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 360 v += n*bs2; 361 y += bs; 362 } 363 } 364 } 365 PLogFlops(2*bs2*a->nz - m); 366 return 0; 367 } 368 369 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 370 { 371 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 372 *nz = a->bs*a->bs*a->nz; 373 *nza = a->maxnz; 374 *mem = (int)A->mem; 375 return 0; 376 } 377 378 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 379 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 380 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 381 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 382 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 383 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 384 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 385 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 386 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 387 388 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 389 { 390 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 391 *m = a->m; *n = a->n; 392 return 0; 393 } 394 395 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 396 { 397 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 398 *m = 0; *n = a->m; 399 return 0; 400 } 401 402 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 403 { 404 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 405 Scalar *v = a->a; 406 double sum = 0.0; 407 int i; 408 409 if (type == NORM_FROBENIUS) { 410 for (i=0; i<a->nz; i++ ) { 411 #if defined(PETSC_COMPLEX) 412 sum += real(conj(*v)*(*v)); v++; 413 #else 414 sum += (*v)*(*v); v++; 415 #endif 416 } 417 *norm = sqrt(sum); 418 } 419 else { 420 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 421 } 422 return 0; 423 } 424 425 /* 426 note: This can only work for identity for row and col. It would 427 be good to check this and otherwise generate an error. 428 */ 429 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 430 { 431 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 432 Mat outA; 433 int ierr; 434 435 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 436 437 outA = inA; 438 inA->factor = FACTOR_LU; 439 a->row = row; 440 a->col = col; 441 442 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 443 444 if (!a->diag) { 445 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 446 } 447 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 448 return 0; 449 } 450 451 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 452 { 453 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 454 int one = 1, totalnz = a->bs*a->bs*a->nz; 455 BLscal_( &totalnz, alpha, a->a, &one ); 456 PLogFlops(totalnz); 457 return 0; 458 } 459 460 int MatPrintHelp_SeqBAIJ(Mat A) 461 { 462 static int called = 0; 463 464 if (called) return 0; else called = 1; 465 return 0; 466 } 467 /* -------------------------------------------------------------------*/ 468 static struct _MatOps MatOps = {0, 469 0,0, 470 MatMult_SeqBAIJ,0, 471 0,0, 472 MatSolve_SeqBAIJ,0, 473 0,0, 474 MatLUFactor_SeqBAIJ,0, 475 0, 476 0, 477 MatGetInfo_SeqBAIJ,0, 478 0,0,MatNorm_SeqBAIJ, 479 0,0, 480 0, 481 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 482 MatGetReordering_SeqBAIJ, 483 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 484 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 485 MatILUFactorSymbolic_SeqBAIJ,0, 486 0,0,/*MatConvert_SeqBAIJ*/ 0, 487 0,0, 488 MatConvertSameType_SeqBAIJ,0,0, 489 MatILUFactor_SeqBAIJ,0,0, 490 0,0, 491 0,0, 492 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 493 0}; 494 495 /*@C 496 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 497 (the default parallel PETSc format). For good matrix assembly performance 498 the user should preallocate the matrix storage by setting the parameter nz 499 (or nzz). By setting these parameters accurately, performance can be 500 increased by more than a factor of 50. 501 502 Input Parameters: 503 . comm - MPI communicator, set to MPI_COMM_SELF 504 . bs - size of block 505 . m - number of rows 506 . n - number of columns 507 . nz - number of block nonzeros per block row (same for all rows) 508 . nzz - number of block nonzeros per block row or PETSC_NULL 509 (possibly different for each row) 510 511 Output Parameter: 512 . A - the matrix 513 514 Notes: 515 The BAIJ format, is fully compatible with standard Fortran 77 516 storage. That is, the stored row and column indices can begin at 517 either one (as in Fortran) or zero. See the users manual for details. 518 519 Specify the preallocated storage with either nz or nnz (not both). 520 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 521 allocation. For additional details, see the users manual chapter on 522 matrices and the file $(PETSC_DIR)/Performance. 523 524 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 525 @*/ 526 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 527 { 528 Mat B; 529 Mat_SeqBAIJ *b; 530 int i,len,ierr, flg,mbs = m/bs; 531 532 if (mbs*bs != m) 533 SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize"); 534 535 *A = 0; 536 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 537 PLogObjectCreate(B); 538 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 539 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 540 switch (bs) { 541 case 1: 542 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 543 break; 544 case 2: 545 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 546 break; 547 case 3: 548 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 549 break; 550 case 4: 551 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 552 break; 553 case 5: 554 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 555 break; 556 } 557 B->destroy = MatDestroy_SeqBAIJ; 558 B->view = MatView_SeqBAIJ; 559 B->factor = 0; 560 B->lupivotthreshold = 1.0; 561 b->row = 0; 562 b->col = 0; 563 b->reallocs = 0; 564 565 b->m = m; 566 b->mbs = mbs; 567 b->n = n; 568 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 569 if (nnz == PETSC_NULL) { 570 if (nz == PETSC_DEFAULT) nz = 5; 571 else if (nz <= 0) nz = 1; 572 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 573 nz = nz*mbs; 574 } 575 else { 576 nz = 0; 577 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 578 } 579 580 /* allocate the matrix space */ 581 len = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int); 582 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 583 PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar)); 584 b->j = (int *) (b->a + nz*bs*bs); 585 PetscMemzero(b->j,nz*sizeof(int)); 586 b->i = b->j + nz; 587 b->singlemalloc = 1; 588 589 b->i[0] = 0; 590 for (i=1; i<mbs+1; i++) { 591 b->i[i] = b->i[i-1] + b->imax[i-1]; 592 } 593 594 /* b->ilen will count nonzeros in each block row so far. */ 595 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 596 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 597 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 598 599 b->bs = bs; 600 b->mbs = mbs; 601 b->nz = 0; 602 b->maxnz = nz; 603 b->sorted = 0; 604 b->roworiented = 1; 605 b->nonew = 0; 606 b->diag = 0; 607 b->solve_work = 0; 608 b->mult_work = 0; 609 b->spptr = 0; 610 611 *A = B; 612 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 613 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 614 return 0; 615 } 616 617 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 618 { 619 Mat C; 620 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 621 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz; 622 623 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 624 625 *B = 0; 626 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 627 PLogObjectCreate(C); 628 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 629 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 630 C->destroy = MatDestroy_SeqBAIJ; 631 C->view = MatView_SeqBAIJ; 632 C->factor = A->factor; 633 c->row = 0; 634 c->col = 0; 635 C->assembled = PETSC_TRUE; 636 637 c->m = a->m; 638 c->n = a->n; 639 c->bs = a->bs; 640 c->mbs = a->mbs; 641 c->nbs = a->nbs; 642 643 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 644 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 645 for ( i=0; i<mbs; i++ ) { 646 c->imax[i] = a->imax[i]; 647 c->ilen[i] = a->ilen[i]; 648 } 649 650 /* allocate the matrix space */ 651 c->singlemalloc = 1; 652 len = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int)); 653 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 654 c->j = (int *) (c->a + nz*bs*bs); 655 c->i = c->j + nz; 656 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 657 if (mbs > 0) { 658 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 659 if (cpvalues == COPY_VALUES) { 660 PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar)); 661 } 662 } 663 664 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 665 c->sorted = a->sorted; 666 c->roworiented = a->roworiented; 667 c->nonew = a->nonew; 668 669 if (a->diag) { 670 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 671 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 672 for ( i=0; i<mbs; i++ ) { 673 c->diag[i] = a->diag[i]; 674 } 675 } 676 else c->diag = 0; 677 c->nz = a->nz; 678 c->maxnz = a->maxnz; 679 c->solve_work = 0; 680 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 681 c->mult_work = 0; 682 *B = C; 683 return 0; 684 } 685 686 int MatLoad_SeqBAIJ(Viewer bview,MatType type,Mat *A) 687 { 688 Mat_SeqBAIJ *a; 689 Mat B; 690 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 691 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 692 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 693 int *masked, nmask,tmp,ishift,bs2; 694 Scalar *aa; 695 PetscObject vobj = (PetscObject) bview; 696 MPI_Comm comm = vobj->comm; 697 698 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 699 bs2 = bs*bs; 700 701 MPI_Comm_size(comm,&size); 702 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 703 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 704 ierr = SYRead(fd,header,4,SYINT); CHKERRQ(ierr); 705 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 706 M = header[1]; N = header[2]; nz = header[3]; 707 708 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 709 710 /* 711 This code adds extra rows to make sure the number of rows is 712 divisible by the blocksize 713 */ 714 mbs = M/bs; 715 extra_rows = bs - M + bs*(mbs); 716 if (extra_rows == bs) extra_rows = 0; 717 else mbs++; 718 if (extra_rows) { 719 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 720 } 721 722 /* read in row lengths */ 723 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 724 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 725 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 726 727 /* read in column indices */ 728 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 729 ierr = SYRead(fd,jj,nz,SYINT); CHKERRQ(ierr); 730 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 731 732 /* loop over row lengths determining block row lengths */ 733 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 734 PetscMemzero(browlengths,mbs*sizeof(int)); 735 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 736 PetscMemzero(mask,mbs*sizeof(int)); 737 masked = mask + mbs; 738 rowcount = 0; nzcount = 0; 739 for ( i=0; i<mbs; i++ ) { 740 nmask = 0; 741 for ( j=0; j<bs; j++ ) { 742 kmax = rowlengths[rowcount]; 743 for ( k=0; k<kmax; k++ ) { 744 tmp = jj[nzcount++]/bs; 745 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 746 } 747 rowcount++; 748 } 749 browlengths[i] += nmask; 750 /* zero out the mask elements we set */ 751 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 752 } 753 754 /* create our matrix */ 755 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 756 CHKERRQ(ierr); 757 B = *A; 758 a = (Mat_SeqBAIJ *) B->data; 759 760 /* set matrix "i" values */ 761 a->i[0] = 0; 762 for ( i=1; i<= mbs; i++ ) { 763 a->i[i] = a->i[i-1] + browlengths[i-1]; 764 a->ilen[i-1] = browlengths[i-1]; 765 } 766 a->nz = 0; 767 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 768 769 /* read in nonzero values */ 770 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 771 ierr = SYRead(fd,aa,nz,SYSCALAR); CHKERRQ(ierr); 772 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 773 774 /* set "a" and "j" values into matrix */ 775 nzcount = 0; jcount = 0; 776 for ( i=0; i<mbs; i++ ) { 777 nzcountb = nzcount; 778 nmask = 0; 779 for ( j=0; j<bs; j++ ) { 780 kmax = rowlengths[i*bs+j]; 781 for ( k=0; k<kmax; k++ ) { 782 tmp = jj[nzcount++]/bs; 783 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 784 } 785 rowcount++; 786 } 787 /* sort the masked values */ 788 SYIsort(nmask,masked); 789 790 /* set "j" values into matrix */ 791 maskcount = 1; 792 for ( j=0; j<nmask; j++ ) { 793 a->j[jcount++] = masked[j]; 794 mask[masked[j]] = maskcount++; 795 } 796 /* set "a" values into matrix */ 797 ishift = bs2*a->i[i]; 798 for ( j=0; j<bs; j++ ) { 799 kmax = rowlengths[i*bs+j]; 800 for ( k=0; k<kmax; k++ ) { 801 tmp = jj[nzcountb]/bs ; 802 block = mask[tmp] - 1; 803 point = jj[nzcountb] - bs*tmp; 804 idx = ishift + bs2*block + j + bs*point; 805 a->a[idx] = aa[nzcountb++]; 806 } 807 } 808 /* zero out the mask elements we set */ 809 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 810 } 811 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 812 813 PetscFree(rowlengths); 814 PetscFree(browlengths); 815 PetscFree(aa); 816 PetscFree(jj); 817 PetscFree(mask); 818 819 B->assembled = PETSC_TRUE; 820 821 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 822 if (flg) { 823 Viewer viewer; 824 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 825 ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO,0);CHKERRQ(ierr); 826 ierr = MatView(B,viewer); CHKERRQ(ierr); 827 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 828 } 829 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 830 if (flg) { 831 Viewer viewer; 832 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 833 ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 834 ierr = MatView(B,viewer); CHKERRQ(ierr); 835 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 836 } 837 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 838 if (flg) { 839 Viewer viewer; 840 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 841 ierr = MatView(B,viewer); CHKERRQ(ierr); 842 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 843 } 844 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 845 if (flg) { 846 Viewer viewer; 847 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 848 ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_MATLAB,"M");CHKERRQ(ierr); 849 ierr = MatView(B,viewer); CHKERRQ(ierr); 850 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 851 } 852 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 853 if (flg) { 854 Draw win; 855 ierr = DrawOpenX(B->comm,0,0,0,0,300,300,&win); CHKERRQ(ierr); 856 ierr = MatView(B,(Viewer)win); CHKERRQ(ierr); 857 ierr = DrawSyncFlush(win); CHKERRQ(ierr); 858 ierr = DrawDestroy(win); CHKERRQ(ierr); 859 } 860 return 0; 861 } 862 863 864 865