1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.7 1996/02/20 18:52:36 curfman Exp bsmith $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,&ia,&ja);CHKERRQ(ierr); 41 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 42 43 PetscFree(ia); PetscFree(ja); 44 return 0; 45 } 46 47 /* 48 Adds diagonal pointers to sparse matrix structure. 49 */ 50 51 int MatMarkDiag_SeqBAIJ(Mat A) 52 { 53 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 54 int i,j, *diag, m = a->mbs; 55 56 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 57 PLogObjectMemory(A,(m+1)*sizeof(int)); 58 for ( i=0; i<m; i++ ) { 59 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 60 if (a->j[j] == i) { 61 diag[i] = j; 62 break; 63 } 64 } 65 } 66 a->diag = diag; 67 return 0; 68 } 69 70 #include "draw.h" 71 #include "pinclude/pviewer.h" 72 #include "sysio.h" 73 74 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l; 78 Scalar *aa; 79 80 ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr); 81 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 82 col_lens[0] = MAT_COOKIE; 83 col_lens[1] = a->m; 84 col_lens[2] = a->n; 85 col_lens[3] = a->nz*bs*bs; 86 87 /* store lengths of each row and write (including header) to file */ 88 count = 0; 89 for ( i=0; i<a->mbs; i++ ) { 90 for ( j=0; j<bs; j++ ) { 91 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 92 } 93 } 94 ierr = SYWrite(fd,col_lens,4+a->m,SYINT,1); CHKERRQ(ierr); 95 PetscFree(col_lens); 96 97 /* store column indices (zero start index) */ 98 jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj); 99 count = 0; 100 for ( i=0; i<a->mbs; i++ ) { 101 for ( j=0; j<bs; j++ ) { 102 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 103 for ( l=0; l<bs; l++ ) { 104 jj[count++] = bs*a->j[k] + l; 105 } 106 } 107 } 108 } 109 ierr = SYWrite(fd,jj,bs*bs*a->nz,SYINT,0); CHKERRQ(ierr); 110 PetscFree(jj); 111 112 /* store nonzero values */ 113 aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa); 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 118 for ( l=0; l<bs; l++ ) { 119 aa[count++] = a->a[bs*bs*k + l*bs + j]; 120 } 121 } 122 } 123 } 124 ierr = SYWrite(fd,aa,bs*bs*a->nz,SYSCALAR,0); CHKERRQ(ierr); 125 PetscFree(aa); 126 return 0; 127 } 128 129 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 132 int ierr, i,j,format,bs = a->bs,k,l; 133 FILE *fd; 134 char *outputname; 135 136 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 137 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 138 ierr = ViewerFileGetFormat_Private(viewer,&format); 139 if (format == FILE_FORMAT_INFO) { 140 /* no need to print additional information */ ; 141 } 142 else if (format == FILE_FORMAT_MATLAB) { 143 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 144 } 145 else { 146 for ( i=0; i<a->mbs; i++ ) { 147 for ( j=0; j<bs; j++ ) { 148 fprintf(fd,"row %d:",i*bs+j); 149 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 150 for ( l=0; l<bs; l++ ) { 151 #if defined(PETSC_COMPLEX) 152 if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) { 153 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 154 real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j])); 155 } 156 else { 157 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j])); 158 } 159 #else 160 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]); 161 #endif 162 } 163 } 164 fprintf(fd,"\n"); 165 } 166 } 167 } 168 fflush(fd); 169 return 0; 170 } 171 172 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 173 { 174 Mat A = (Mat) obj; 175 PetscObject vobj = (PetscObject) viewer; 176 177 if (!viewer) { 178 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 179 } 180 if (vobj->cookie == VIEWER_COOKIE) { 181 if (vobj->type == MATLAB_VIEWER) { 182 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 183 } 184 else if (vobj->type == ASCII_FILE_VIEWER||vobj->type == ASCII_FILES_VIEWER){ 185 return MatView_SeqBAIJ_ASCII(A,viewer); 186 } 187 else if (vobj->type == BINARY_FILE_VIEWER) { 188 return MatView_SeqBAIJ_Binary(A,viewer); 189 } 190 } 191 else if (vobj->cookie == DRAW_COOKIE) { 192 if (vobj->type == NULLWINDOW) return 0; 193 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 194 } 195 return 0; 196 } 197 198 199 static int MatZeroEntries_SeqBAIJ(Mat A) 200 { 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 202 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 203 return 0; 204 } 205 206 int MatDestroy_SeqBAIJ(PetscObject obj) 207 { 208 Mat A = (Mat) obj; 209 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 210 211 #if defined(PETSC_LOG) 212 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 213 #endif 214 PetscFree(a->a); 215 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 216 if (a->diag) PetscFree(a->diag); 217 if (a->ilen) PetscFree(a->ilen); 218 if (a->imax) PetscFree(a->imax); 219 if (a->solve_work) PetscFree(a->solve_work); 220 if (a->mult_work) PetscFree(a->mult_work); 221 PetscFree(a); 222 PLogObjectDestroy(A); 223 PetscHeaderDestroy(A); 224 return 0; 225 } 226 227 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 228 { 229 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 230 if (op == ROW_ORIENTED) a->roworiented = 1; 231 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 232 else if (op == COLUMNS_SORTED) a->sorted = 1; 233 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 234 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 235 else if (op == ROWS_SORTED || 236 op == SYMMETRIC_MATRIX || 237 op == STRUCTURALLY_SYMMETRIC_MATRIX || 238 op == YES_NEW_DIAGONALS) 239 PLogInfo((PetscObject)A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 240 else if (op == NO_NEW_DIAGONALS) 241 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 242 else 243 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 244 return 0; 245 } 246 247 248 /* -------------------------------------------------------*/ 249 /* Should check that shapes of vectors and matrices match */ 250 /* -------------------------------------------------------*/ 251 #include "pinclude/plapack.h" 252 253 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 254 { 255 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 256 Scalar *xg,*yg; 257 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 258 register Scalar x1,x2,x3,x4,x5; 259 int mbs = a->mbs, m = a->m, i, *idx,*ii; 260 int bs = a->bs,j,n,bs2 = bs*bs; 261 262 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 263 PetscMemzero(y,m*sizeof(Scalar)); 264 x = x; 265 idx = a->j; 266 v = a->a; 267 ii = a->i; 268 269 switch (bs) { 270 case 1: 271 for ( i=0; i<m; i++ ) { 272 n = ii[1] - ii[0]; ii++; 273 sum = 0.0; 274 while (n--) sum += *v++ * x[*idx++]; 275 y[i] = sum; 276 } 277 break; 278 case 2: 279 for ( i=0; i<mbs; i++ ) { 280 n = ii[1] - ii[0]; ii++; 281 sum1 = 0.0; sum2 = 0.0; 282 for ( j=0; j<n; j++ ) { 283 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 284 sum1 += v[0]*x1 + v[2]*x2; 285 sum2 += v[1]*x1 + v[3]*x2; 286 v += 4; 287 } 288 y[0] += sum1; y[1] += sum2; 289 y += 2; 290 } 291 break; 292 case 3: 293 for ( i=0; i<mbs; i++ ) { 294 n = ii[1] - ii[0]; ii++; 295 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 296 for ( j=0; j<n; j++ ) { 297 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 298 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 299 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 300 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 301 v += 9; 302 } 303 y[0] += sum1; y[1] += sum2; y[2] += sum3; 304 y += 3; 305 } 306 break; 307 case 4: 308 for ( i=0; i<mbs; i++ ) { 309 n = ii[1] - ii[0]; ii++; 310 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 311 for ( j=0; j<n; j++ ) { 312 xb = x + 4*(*idx++); 313 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 314 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 315 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 316 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 317 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 318 v += 16; 319 } 320 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 321 y += 4; 322 } 323 break; 324 case 5: 325 for ( i=0; i<mbs; i++ ) { 326 n = ii[1] - ii[0]; ii++; 327 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 328 for ( j=0; j<n; j++ ) { 329 xb = x + 5*(*idx++); 330 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 331 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 332 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 333 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 334 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 335 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 336 v += 25; 337 } 338 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 339 y += 5; 340 } 341 break; 342 /* block sizes larger then 5 by 5 are handled by BLAS */ 343 default: { 344 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 345 if (!a->mult_work) { 346 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 347 CHKPTRQ(a->mult_work); 348 } 349 work = a->mult_work; 350 for ( i=0; i<mbs; i++ ) { 351 n = ii[1] - ii[0]; ii++; 352 ncols = n*bs; 353 workt = work; 354 for ( j=0; j<n; j++ ) { 355 xb = x + bs*(*idx++); 356 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 357 workt += bs; 358 } 359 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 360 v += n*bs2; 361 y += bs; 362 } 363 } 364 } 365 PLogFlops(2*bs2*a->nz - m); 366 return 0; 367 } 368 369 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 370 { 371 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 372 *nz = a->bs*a->bs*a->nz; 373 *nza = a->maxnz; 374 *mem = (int)A->mem; 375 return 0; 376 } 377 378 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 379 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 380 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 381 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 382 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 383 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 384 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 385 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 386 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 387 388 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 389 { 390 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 391 *m = a->m; *n = a->n; 392 return 0; 393 } 394 395 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 396 { 397 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 398 *m = 0; *n = a->m; 399 return 0; 400 } 401 402 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 403 { 404 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 405 Scalar *v = a->a; 406 double sum = 0.0; 407 int i; 408 409 if (type == NORM_FROBENIUS) { 410 for (i=0; i<a->nz; i++ ) { 411 #if defined(PETSC_COMPLEX) 412 sum += real(conj(*v)*(*v)); v++; 413 #else 414 sum += (*v)*(*v); v++; 415 #endif 416 } 417 *norm = sqrt(sum); 418 } 419 else { 420 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 421 } 422 return 0; 423 } 424 425 /* 426 note: This can only work for identity for row and col. It would 427 be good to check this and otherwise generate an error. 428 */ 429 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 430 { 431 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 432 Mat outA; 433 int ierr; 434 435 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 436 437 outA = inA; 438 inA->factor = FACTOR_LU; 439 a->row = row; 440 a->col = col; 441 442 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 443 444 if (!a->diag) { 445 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 446 } 447 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 448 return 0; 449 } 450 451 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 452 { 453 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 454 int one = 1, totalnz = a->bs*a->bs*a->nz; 455 BLscal_( &totalnz, alpha, a->a, &one ); 456 PLogFlops(totalnz); 457 return 0; 458 } 459 460 int MatPrintHelp_SeqBAIJ(Mat A) 461 { 462 static int called = 0; 463 464 if (called) return 0; else called = 1; 465 return 0; 466 } 467 /* -------------------------------------------------------------------*/ 468 static struct _MatOps MatOps = {0, 469 0,0, 470 MatMult_SeqBAIJ,0, 471 0,0, 472 MatSolve_SeqBAIJ,0, 473 0,0, 474 MatLUFactor_SeqBAIJ,0, 475 0, 476 0, 477 MatGetInfo_SeqBAIJ,0, 478 0,0,MatNorm_SeqBAIJ, 479 0,0, 480 0, 481 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 482 MatGetReordering_SeqBAIJ, 483 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 484 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 485 MatILUFactorSymbolic_SeqBAIJ,0, 486 0,0,/*MatConvert_SeqBAIJ*/ 0, 487 0,0, 488 MatConvertSameType_SeqBAIJ,0,0, 489 MatILUFactor_SeqBAIJ,0,0, 490 0,0, 491 0,0, 492 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 493 0}; 494 495 /*@C 496 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 497 (the default parallel PETSc format). For good matrix assembly performance 498 the user should preallocate the matrix storage by setting the parameter nz 499 (or nzz). By setting these parameters accurately, performance can be 500 increased by more than a factor of 50. 501 502 Input Parameters: 503 . comm - MPI communicator, set to MPI_COMM_SELF 504 . bs - size of block 505 . m - number of rows 506 . n - number of columns 507 . nz - number of block nonzeros per block row (same for all rows) 508 . nzz - number of block nonzeros per block row or PETSC_NULL 509 (possibly different for each row) 510 511 Output Parameter: 512 . A - the matrix 513 514 Notes: 515 The BAIJ format, is fully compatible with standard Fortran 77 516 storage. That is, the stored row and column indices can begin at 517 either one (as in Fortran) or zero. See the users manual for details. 518 519 Specify the preallocated storage with either nz or nnz (not both). 520 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 521 allocation. For additional details, see the users manual chapter on 522 matrices and the file $(PETSC_DIR)/Performance. 523 524 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 525 @*/ 526 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 527 { 528 Mat B; 529 Mat_SeqBAIJ *b; 530 int i,len,ierr, flg,mbs = m/bs; 531 532 if (mbs*bs != m) 533 SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize"); 534 535 *A = 0; 536 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 537 PLogObjectCreate(B); 538 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 539 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 540 switch (bs) { 541 case 1: 542 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 543 break; 544 case 2: 545 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 546 case 3: 547 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 548 break; 549 } 550 B->destroy = MatDestroy_SeqBAIJ; 551 B->view = MatView_SeqBAIJ; 552 B->factor = 0; 553 B->lupivotthreshold = 1.0; 554 b->row = 0; 555 b->col = 0; 556 b->reallocs = 0; 557 558 b->m = m; 559 b->mbs = mbs; 560 b->n = n; 561 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 562 if (nnz == PETSC_NULL) { 563 if (nz == PETSC_DEFAULT) nz = 5; 564 else if (nz <= 0) nz = 1; 565 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 566 nz = nz*mbs; 567 } 568 else { 569 nz = 0; 570 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 571 } 572 573 /* allocate the matrix space */ 574 len = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int); 575 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 576 PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar)); 577 b->j = (int *) (b->a + nz*bs*bs); 578 PetscMemzero(b->j,nz*sizeof(int)); 579 b->i = b->j + nz; 580 b->singlemalloc = 1; 581 582 b->i[0] = 0; 583 for (i=1; i<mbs+1; i++) { 584 b->i[i] = b->i[i-1] + b->imax[i-1]; 585 } 586 587 /* b->ilen will count nonzeros in each block row so far. */ 588 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 589 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 590 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 591 592 b->bs = bs; 593 b->mbs = mbs; 594 b->nz = 0; 595 b->maxnz = nz; 596 b->sorted = 0; 597 b->roworiented = 1; 598 b->nonew = 0; 599 b->diag = 0; 600 b->solve_work = 0; 601 b->mult_work = 0; 602 b->spptr = 0; 603 604 *A = B; 605 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 606 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 607 return 0; 608 } 609 610 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 611 { 612 Mat C; 613 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 614 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz; 615 616 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 617 618 *B = 0; 619 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 620 PLogObjectCreate(C); 621 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 622 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 623 C->destroy = MatDestroy_SeqBAIJ; 624 C->view = MatView_SeqBAIJ; 625 C->factor = A->factor; 626 c->row = 0; 627 c->col = 0; 628 C->assembled = PETSC_TRUE; 629 630 c->m = a->m; 631 c->n = a->n; 632 c->bs = a->bs; 633 c->mbs = a->mbs; 634 c->nbs = a->nbs; 635 636 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 637 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 638 for ( i=0; i<mbs; i++ ) { 639 c->imax[i] = a->imax[i]; 640 c->ilen[i] = a->ilen[i]; 641 } 642 643 /* allocate the matrix space */ 644 c->singlemalloc = 1; 645 len = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int)); 646 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 647 c->j = (int *) (c->a + nz*bs*bs); 648 c->i = c->j + nz; 649 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 650 if (mbs > 0) { 651 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 652 if (cpvalues == COPY_VALUES) { 653 PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar)); 654 } 655 } 656 657 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 658 c->sorted = a->sorted; 659 c->roworiented = a->roworiented; 660 c->nonew = a->nonew; 661 662 if (a->diag) { 663 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 664 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 665 for ( i=0; i<mbs; i++ ) { 666 c->diag[i] = a->diag[i]; 667 } 668 } 669 else c->diag = 0; 670 c->nz = a->nz; 671 c->maxnz = a->maxnz; 672 c->solve_work = 0; 673 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 674 c->mult_work = 0; 675 *B = C; 676 return 0; 677 } 678 679 int MatLoad_SeqBAIJ(Viewer bview,MatType type,Mat *A) 680 { 681 Mat_SeqBAIJ *a; 682 Mat B; 683 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 684 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 685 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 686 int *masked, nmask,tmp,ishift,bs2; 687 Scalar *aa; 688 PetscObject vobj = (PetscObject) bview; 689 MPI_Comm comm = vobj->comm; 690 691 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 692 bs2 = bs*bs; 693 694 MPI_Comm_size(comm,&size); 695 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 696 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 697 ierr = SYRead(fd,header,4,SYINT); CHKERRQ(ierr); 698 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 699 M = header[1]; N = header[2]; nz = header[3]; 700 701 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 702 703 /* 704 This code adds extra rows to make sure the number of rows is 705 divisible by the blocksize 706 */ 707 mbs = M/bs; 708 extra_rows = bs - M + bs*(mbs); 709 if (extra_rows == bs) extra_rows = 0; 710 else mbs++; 711 if (extra_rows) { 712 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 713 } 714 715 /* read in row lengths */ 716 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 717 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 718 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 719 720 /* read in column indices */ 721 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 722 ierr = SYRead(fd,jj,nz,SYINT); CHKERRQ(ierr); 723 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 724 725 /* loop over row lengths determining block row lengths */ 726 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 727 PetscMemzero(browlengths,mbs*sizeof(int)); 728 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 729 PetscMemzero(mask,mbs*sizeof(int)); 730 masked = mask + mbs; 731 rowcount = 0; nzcount = 0; 732 for ( i=0; i<mbs; i++ ) { 733 nmask = 0; 734 for ( j=0; j<bs; j++ ) { 735 kmax = rowlengths[rowcount]; 736 for ( k=0; k<kmax; k++ ) { 737 tmp = jj[nzcount++]/bs; 738 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 739 } 740 rowcount++; 741 } 742 browlengths[i] += nmask; 743 /* zero out the mask elements we set */ 744 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 745 } 746 747 /* create our matrix */ 748 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 749 CHKERRQ(ierr); 750 B = *A; 751 a = (Mat_SeqBAIJ *) B->data; 752 753 /* set matrix "i" values */ 754 a->i[0] = 0; 755 for ( i=1; i<= mbs; i++ ) { 756 a->i[i] = a->i[i-1] + browlengths[i-1]; 757 a->ilen[i-1] = browlengths[i-1]; 758 } 759 a->nz = 0; 760 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 761 762 /* read in nonzero values */ 763 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 764 ierr = SYRead(fd,aa,nz,SYSCALAR); CHKERRQ(ierr); 765 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 766 767 /* set "a" and "j" values into matrix */ 768 nzcount = 0; jcount = 0; 769 for ( i=0; i<mbs; i++ ) { 770 nzcountb = nzcount; 771 nmask = 0; 772 for ( j=0; j<bs; j++ ) { 773 kmax = rowlengths[i*bs+j]; 774 for ( k=0; k<kmax; k++ ) { 775 tmp = jj[nzcount++]/bs; 776 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 777 } 778 rowcount++; 779 } 780 /* sort the masked values */ 781 SYIsort(nmask,masked); 782 783 /* set "j" values into matrix */ 784 maskcount = 1; 785 for ( j=0; j<nmask; j++ ) { 786 a->j[jcount++] = masked[j]; 787 mask[masked[j]] = maskcount++; 788 } 789 /* set "a" values into matrix */ 790 ishift = bs2*a->i[i]; 791 for ( j=0; j<bs; j++ ) { 792 kmax = rowlengths[i*bs+j]; 793 for ( k=0; k<kmax; k++ ) { 794 tmp = jj[nzcountb]/bs ; 795 block = mask[tmp] - 1; 796 point = jj[nzcountb] - bs*tmp; 797 idx = ishift + bs2*block + j + bs*point; 798 a->a[idx] = aa[nzcountb++]; 799 } 800 } 801 /* zero out the mask elements we set */ 802 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 803 } 804 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 805 806 PetscFree(rowlengths); 807 PetscFree(browlengths); 808 PetscFree(aa); 809 PetscFree(jj); 810 PetscFree(mask); 811 812 B->assembled = PETSC_TRUE; 813 814 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 815 if (flg) { 816 Viewer viewer; 817 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 818 ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO,0);CHKERRQ(ierr); 819 ierr = MatView(B,viewer); CHKERRQ(ierr); 820 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 821 } 822 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 823 if (flg) { 824 Viewer viewer; 825 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 826 ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 827 ierr = MatView(B,viewer); CHKERRQ(ierr); 828 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 829 } 830 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 831 if (flg) { 832 Viewer viewer; 833 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 834 ierr = MatView(B,viewer); CHKERRQ(ierr); 835 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 836 } 837 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 838 if (flg) { 839 Viewer viewer; 840 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 841 ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_MATLAB,"M");CHKERRQ(ierr); 842 ierr = MatView(B,viewer); CHKERRQ(ierr); 843 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 844 } 845 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 846 if (flg) { 847 Draw win; 848 ierr = DrawOpenX(B->comm,0,0,0,0,300,300,&win); CHKERRQ(ierr); 849 ierr = MatView(B,(Viewer)win); CHKERRQ(ierr); 850 ierr = DrawSyncFlush(win); CHKERRQ(ierr); 851 ierr = DrawDestroy(win); CHKERRQ(ierr); 852 } 853 return 0; 854 } 855 856 857 858