1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.4 1996/02/17 05:41:27 bsmith Exp bsmith $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,&ia,&ja);CHKERRQ(ierr); 41 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 42 43 PetscFree(ia); PetscFree(ja); 44 return 0; 45 } 46 47 /* 48 Adds diagonal pointers to sparse matrix structure. 49 */ 50 51 int MatMarkDiag_SeqBAIJ(Mat A) 52 { 53 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 54 int i,j, *diag, m = a->mbs; 55 56 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 57 PLogObjectMemory(A,(m+1)*sizeof(int)); 58 for ( i=0; i<m; i++ ) { 59 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 60 if (a->j[j] == i) { 61 diag[i] = j; 62 break; 63 } 64 } 65 } 66 a->diag = diag; 67 return 0; 68 } 69 70 #include "draw.h" 71 #include "pinclude/pviewer.h" 72 #include "sysio.h" 73 74 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l; 78 Scalar *aa; 79 80 ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr); 81 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 82 col_lens[0] = MAT_COOKIE; 83 col_lens[1] = a->m; 84 col_lens[2] = a->n; 85 col_lens[3] = a->nz*bs*bs; 86 87 /* store lengths of each row and write (including header) to file */ 88 count = 0; 89 for ( i=0; i<a->mbs; i++ ) { 90 for ( j=0; j<bs; j++ ) { 91 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 92 } 93 } 94 ierr = SYWrite(fd,col_lens,4+a->m,SYINT,1); CHKERRQ(ierr); 95 PetscFree(col_lens); 96 97 /* store column indices (zero start index) */ 98 jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj); 99 count = 0; 100 for ( i=0; i<a->mbs; i++ ) { 101 for ( j=0; j<bs; j++ ) { 102 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 103 for ( l=0; l<bs; l++ ) { 104 jj[count++] = bs*a->j[k] + l; 105 } 106 } 107 } 108 } 109 ierr = SYWrite(fd,jj,bs*bs*a->nz,SYINT,0); CHKERRQ(ierr); 110 PetscFree(jj); 111 112 /* store nonzero values */ 113 aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa); 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 118 for ( l=0; l<bs; l++ ) { 119 aa[count++] = a->a[bs*bs*k + l*bs + j]; 120 } 121 } 122 } 123 } 124 ierr = SYWrite(fd,aa,bs*bs*a->nz,SYSCALAR,0); CHKERRQ(ierr); 125 PetscFree(aa); 126 return 0; 127 } 128 129 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 132 int ierr, i,j,format,bs = a->bs,k,l; 133 FILE *fd; 134 char *outputname; 135 136 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 137 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 138 ierr = ViewerFileGetFormat_Private(viewer,&format); 139 if (format == FILE_FORMAT_INFO) { 140 /* no need to print additional information */ ; 141 } 142 else if (format == FILE_FORMAT_MATLAB) { 143 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 144 } 145 else { 146 for ( i=0; i<a->mbs; i++ ) { 147 for ( j=0; j<bs; j++ ) { 148 fprintf(fd,"row %d:",i*bs+j); 149 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 150 for ( l=0; l<bs; l++ ) { 151 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]); 152 } 153 } 154 fprintf(fd,"\n"); 155 } 156 } 157 } 158 fflush(fd); 159 return 0; 160 } 161 162 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 163 { 164 Mat A = (Mat) obj; 165 PetscObject vobj = (PetscObject) viewer; 166 167 if (!viewer) { 168 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 169 } 170 if (vobj->cookie == VIEWER_COOKIE) { 171 if (vobj->type == MATLAB_VIEWER) { 172 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 173 } 174 else if (vobj->type == ASCII_FILE_VIEWER||vobj->type == ASCII_FILES_VIEWER){ 175 return MatView_SeqBAIJ_ASCII(A,viewer); 176 } 177 else if (vobj->type == BINARY_FILE_VIEWER) { 178 return MatView_SeqBAIJ_Binary(A,viewer); 179 } 180 } 181 else if (vobj->cookie == DRAW_COOKIE) { 182 if (vobj->type == NULLWINDOW) return 0; 183 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 184 } 185 return 0; 186 } 187 188 189 static int MatZeroEntries_SeqBAIJ(Mat A) 190 { 191 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 192 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 193 return 0; 194 } 195 196 int MatDestroy_SeqBAIJ(PetscObject obj) 197 { 198 Mat A = (Mat) obj; 199 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 200 201 #if defined(PETSC_LOG) 202 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 203 #endif 204 PetscFree(a->a); 205 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 206 if (a->diag) PetscFree(a->diag); 207 if (a->ilen) PetscFree(a->ilen); 208 if (a->imax) PetscFree(a->imax); 209 if (a->solve_work) PetscFree(a->solve_work); 210 if (a->mult_work) PetscFree(a->mult_work); 211 PetscFree(a); 212 PLogObjectDestroy(A); 213 PetscHeaderDestroy(A); 214 return 0; 215 } 216 217 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 218 { 219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 220 if (op == ROW_ORIENTED) a->roworiented = 1; 221 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 222 else if (op == COLUMNS_SORTED) a->sorted = 1; 223 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 224 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 225 else if (op == ROWS_SORTED || 226 op == SYMMETRIC_MATRIX || 227 op == STRUCTURALLY_SYMMETRIC_MATRIX || 228 op == YES_NEW_DIAGONALS) 229 PLogInfo((PetscObject)A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 230 else if (op == NO_NEW_DIAGONALS) 231 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 232 else 233 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 234 return 0; 235 } 236 237 238 /* -------------------------------------------------------*/ 239 /* Should check that shapes of vectors and matrices match */ 240 /* -------------------------------------------------------*/ 241 #include "pinclude/plapack.h" 242 243 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 244 { 245 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 246 Scalar *xg,*yg; 247 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 248 register Scalar x1,x2,x3,x4,x5; 249 int mbs = a->mbs, m = a->m, i, *idx,*ii; 250 int bs = a->bs,j,n,bs2 = bs*bs; 251 252 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 253 PetscMemzero(y,m*sizeof(Scalar)); 254 x = x; 255 idx = a->j; 256 v = a->a; 257 ii = a->i; 258 259 switch (bs) { 260 case 1: 261 for ( i=0; i<m; i++ ) { 262 n = ii[1] - ii[0]; ii++; 263 sum = 0.0; 264 while (n--) sum += *v++ * x[*idx++]; 265 y[i] = sum; 266 } 267 break; 268 case 2: 269 for ( i=0; i<mbs; i++ ) { 270 n = ii[1] - ii[0]; ii++; 271 sum1 = 0.0; sum2 = 0.0; 272 for ( j=0; j<n; j++ ) { 273 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 274 sum1 += v[0]*x1 + v[2]*x2; 275 sum2 += v[1]*x1 + v[3]*x2; 276 v += 4; 277 } 278 y[0] += sum1; y[1] += sum2; 279 y += 2; 280 } 281 break; 282 case 3: 283 for ( i=0; i<mbs; i++ ) { 284 n = ii[1] - ii[0]; ii++; 285 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 286 for ( j=0; j<n; j++ ) { 287 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 288 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 289 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 290 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 291 v += 9; 292 } 293 y[0] += sum1; y[1] += sum2; y[2] += sum3; 294 y += 3; 295 } 296 break; 297 case 4: 298 for ( i=0; i<mbs; i++ ) { 299 n = ii[1] - ii[0]; ii++; 300 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 301 for ( j=0; j<n; j++ ) { 302 xb = x + 4*(*idx++); 303 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 304 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 305 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 306 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 307 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 308 v += 16; 309 } 310 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 311 y += 4; 312 } 313 break; 314 case 5: 315 for ( i=0; i<mbs; i++ ) { 316 n = ii[1] - ii[0]; ii++; 317 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 318 for ( j=0; j<n; j++ ) { 319 xb = x + 5*(*idx++); 320 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 321 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 322 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 323 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 324 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 325 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 326 v += 25; 327 } 328 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 329 y += 5; 330 } 331 break; 332 /* block sizes larger then 5 by 5 are handled by BLAS */ 333 default: { 334 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 335 if (!a->mult_work) { 336 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 337 CHKPTRQ(a->mult_work); 338 } 339 work = a->mult_work; 340 for ( i=0; i<mbs; i++ ) { 341 n = ii[1] - ii[0]; ii++; 342 ncols = n*bs; 343 workt = work; 344 for ( j=0; j<n; j++ ) { 345 xb = x + bs*(*idx++); 346 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 347 workt += bs; 348 } 349 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 350 v += n*bs2; 351 y += bs; 352 } 353 } 354 } 355 PLogFlops(2*bs2*a->nz - m); 356 return 0; 357 } 358 359 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 360 { 361 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 362 *nz = a->bs*a->bs*a->nz; 363 *nza = a->maxnz; 364 *mem = (int)A->mem; 365 return 0; 366 } 367 368 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 369 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 370 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 371 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 372 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 373 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 374 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 375 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 376 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 377 378 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 379 { 380 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 381 *m = a->m; *n = a->n; 382 return 0; 383 } 384 385 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 386 { 387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 388 *m = 0; *n = a->m; 389 return 0; 390 } 391 392 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 393 { 394 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 395 Scalar *v = a->a; 396 double sum = 0.0; 397 int i; 398 399 if (type == NORM_FROBENIUS) { 400 for (i=0; i<a->nz; i++ ) { 401 #if defined(PETSC_COMPLEX) 402 sum += real(conj(*v)*(*v)); v++; 403 #else 404 sum += (*v)*(*v); v++; 405 #endif 406 } 407 *norm = sqrt(sum); 408 } 409 else { 410 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 411 } 412 return 0; 413 } 414 415 /* 416 note: This can only work for identity for row and col. It would 417 be good to check this and otherwise generate an error. 418 */ 419 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 420 { 421 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 422 Mat outA; 423 int ierr; 424 425 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 426 427 outA = inA; 428 inA->factor = FACTOR_LU; 429 a->row = row; 430 a->col = col; 431 432 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 433 434 if (!a->diag) { 435 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 436 } 437 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 438 return 0; 439 } 440 441 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 442 { 443 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 444 int one = 1, totalnz = a->bs*a->bs*a->nz; 445 BLscal_( &totalnz, alpha, a->a, &one ); 446 PLogFlops(totalnz); 447 return 0; 448 } 449 450 int MatPrintHelp_SeqBAIJ(Mat A) 451 { 452 static int called = 0; 453 454 if (called) return 0; else called = 1; 455 return 0; 456 } 457 /* -------------------------------------------------------------------*/ 458 static struct _MatOps MatOps = {0, 459 0,0, 460 MatMult_SeqBAIJ,0, 461 0,0, 462 MatSolve_SeqBAIJ,0, 463 0,0, 464 MatLUFactor_SeqBAIJ,0, 465 0, 466 0, 467 MatGetInfo_SeqBAIJ,0, 468 0,0,MatNorm_SeqBAIJ, 469 0,0, 470 0, 471 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 472 MatGetReordering_SeqBAIJ, 473 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 474 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 475 MatILUFactorSymbolic_SeqBAIJ,0, 476 0,0,/*MatConvert_SeqBAIJ*/ 0, 477 0,0, 478 MatConvertSameType_SeqBAIJ,0,0, 479 MatILUFactor_SeqBAIJ,0,0, 480 0,0, 481 0,0, 482 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 483 0}; 484 485 /*@C 486 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 487 (the default parallel PETSc format). For good matrix assembly performance 488 the user should preallocate the matrix storage by setting the parameter nz 489 (or nzz). By setting these parameters accurately, performance can be 490 increased by more than a factor of 50. 491 492 Input Parameters: 493 . comm - MPI communicator, set to MPI_COMM_SELF 494 . bs - size of block 495 . m - number of rows 496 . n - number of columns 497 . nz - number of block nonzeros per block row (same for all rows) 498 . nzz - number of block nonzeros per block row or PETSC_NULL 499 (possibly different for each row) 500 501 Output Parameter: 502 . A - the matrix 503 504 Notes: 505 The BAIJ format, is fully compatible with standard Fortran 77 506 storage. That is, the stored row and column indices can begin at 507 either one (as in Fortran) or zero. See the users manual for details. 508 509 Specify the preallocated storage with either nz or nnz (not both). 510 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 511 allocation. For additional details, see the users manual chapter on 512 matrices and the file $(PETSC_DIR)/Performance. 513 514 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 515 @*/ 516 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 517 { 518 Mat B; 519 Mat_SeqBAIJ *b; 520 int i,len,ierr, flg,mbs = m/bs; 521 522 if (mbs*bs != m) 523 SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize"); 524 525 *A = 0; 526 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 527 PLogObjectCreate(B); 528 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 529 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 530 switch (bs) { 531 case 1: 532 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 533 break; 534 } 535 B->destroy = MatDestroy_SeqBAIJ; 536 B->view = MatView_SeqBAIJ; 537 B->factor = 0; 538 B->lupivotthreshold = 1.0; 539 b->row = 0; 540 b->col = 0; 541 b->reallocs = 0; 542 543 b->m = m; 544 b->mbs = mbs; 545 b->n = n; 546 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 547 if (nnz == PETSC_NULL) { 548 if (nz == PETSC_DEFAULT) nz = 5; 549 else if (nz <= 0) nz = 1; 550 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 551 nz = nz*mbs; 552 } 553 else { 554 nz = 0; 555 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 556 } 557 558 /* allocate the matrix space */ 559 len = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int); 560 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 561 PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar)); 562 b->j = (int *) (b->a + nz*bs*bs); 563 PetscMemzero(b->j,nz*sizeof(int)); 564 b->i = b->j + nz; 565 b->singlemalloc = 1; 566 567 b->i[0] = 0; 568 for (i=1; i<mbs+1; i++) { 569 b->i[i] = b->i[i-1] + b->imax[i-1]; 570 } 571 572 /* b->ilen will count nonzeros in each block row so far. */ 573 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 574 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 575 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 576 577 b->bs = bs; 578 b->mbs = mbs; 579 b->nz = 0; 580 b->maxnz = nz; 581 b->sorted = 0; 582 b->roworiented = 1; 583 b->nonew = 0; 584 b->diag = 0; 585 b->solve_work = 0; 586 b->mult_work = 0; 587 b->spptr = 0; 588 589 *A = B; 590 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 591 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 592 return 0; 593 } 594 595 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 596 { 597 Mat C; 598 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 599 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz; 600 601 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 602 603 *B = 0; 604 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 605 PLogObjectCreate(C); 606 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 607 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 608 C->destroy = MatDestroy_SeqBAIJ; 609 C->view = MatView_SeqBAIJ; 610 C->factor = A->factor; 611 c->row = 0; 612 c->col = 0; 613 C->assembled = PETSC_TRUE; 614 615 c->m = a->m; 616 c->n = a->n; 617 c->bs = a->bs; 618 c->mbs = a->mbs; 619 c->nbs = a->nbs; 620 621 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 622 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 623 for ( i=0; i<mbs; i++ ) { 624 c->imax[i] = a->imax[i]; 625 c->ilen[i] = a->ilen[i]; 626 } 627 628 /* allocate the matrix space */ 629 c->singlemalloc = 1; 630 len = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int)); 631 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 632 c->j = (int *) (c->a + nz*bs*bs); 633 c->i = c->j + nz; 634 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 635 if (mbs > 0) { 636 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 637 if (cpvalues == COPY_VALUES) { 638 PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar)); 639 } 640 } 641 642 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 643 c->sorted = a->sorted; 644 c->roworiented = a->roworiented; 645 c->nonew = a->nonew; 646 647 if (a->diag) { 648 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 649 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 650 for ( i=0; i<mbs; i++ ) { 651 c->diag[i] = a->diag[i]; 652 } 653 } 654 else c->diag = 0; 655 c->nz = a->nz; 656 c->maxnz = a->maxnz; 657 c->solve_work = 0; 658 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 659 c->mult_work = 0; 660 *B = C; 661 return 0; 662 } 663 664 int MatLoad_SeqBAIJ(Viewer bview,MatType type,Mat *A) 665 { 666 Mat_SeqBAIJ *a; 667 Mat B; 668 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 669 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 670 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 671 int *masked, nmask,tmp,ishift,bs2; 672 Scalar *aa; 673 PetscObject vobj = (PetscObject) bview; 674 MPI_Comm comm = vobj->comm; 675 676 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 677 bs2 = bs*bs; 678 679 MPI_Comm_size(comm,&size); 680 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 681 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 682 ierr = SYRead(fd,header,4,SYINT); CHKERRQ(ierr); 683 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 684 M = header[1]; N = header[2]; nz = header[3]; 685 686 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 687 688 /* 689 This code adds extra rows to make sure the number of rows is 690 divisible by the blocksize 691 */ 692 mbs = M/bs; 693 extra_rows = bs - M + bs*(mbs); 694 if (extra_rows == bs) extra_rows = 0; 695 else mbs++; 696 if (extra_rows) { 697 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 698 } 699 700 /* read in row lengths */ 701 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 702 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 703 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 704 705 /* read in column indices */ 706 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 707 ierr = SYRead(fd,jj,nz,SYINT); CHKERRQ(ierr); 708 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 709 710 /* loop over row lengths determining block row lengths */ 711 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 712 PetscMemzero(browlengths,mbs*sizeof(int)); 713 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 714 PetscMemzero(mask,mbs*sizeof(int)); 715 masked = mask + mbs; 716 rowcount = 0; nzcount = 0; 717 for ( i=0; i<mbs; i++ ) { 718 nmask = 0; 719 for ( j=0; j<bs; j++ ) { 720 kmax = rowlengths[rowcount]; 721 for ( k=0; k<kmax; k++ ) { 722 tmp = jj[nzcount++]/bs; 723 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 724 } 725 rowcount++; 726 } 727 browlengths[i] += nmask; 728 /* zero out the mask elements we set */ 729 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 730 } 731 732 /* create our matrix */ 733 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 734 CHKERRQ(ierr); 735 B = *A; 736 a = (Mat_SeqBAIJ *) B->data; 737 738 /* set matrix "i" values */ 739 a->i[0] = 0; 740 for ( i=1; i<= mbs; i++ ) { 741 a->i[i] = a->i[i-1] + browlengths[i-1]; 742 a->ilen[i-1] = browlengths[i-1]; 743 } 744 a->nz = 0; 745 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 746 747 /* read in nonzero values */ 748 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 749 ierr = SYRead(fd,aa,nz,SYSCALAR); CHKERRQ(ierr); 750 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 751 752 /* set "a" and "j" values into matrix */ 753 nzcount = 0; jcount = 0; 754 for ( i=0; i<mbs; i++ ) { 755 nzcountb = nzcount; 756 nmask = 0; 757 for ( j=0; j<bs; j++ ) { 758 kmax = rowlengths[i*bs+j]; 759 for ( k=0; k<kmax; k++ ) { 760 tmp = jj[nzcount++]/bs; 761 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 762 } 763 rowcount++; 764 } 765 /* sort the masked values */ 766 SYIsort(nmask,masked); 767 768 /* set "j" values into matrix */ 769 maskcount = 1; 770 for ( j=0; j<nmask; j++ ) { 771 a->j[jcount++] = masked[j]; 772 mask[masked[j]] = maskcount++; 773 } 774 /* set "a" values into matrix */ 775 ishift = bs2*a->i[i]; 776 for ( j=0; j<bs; j++ ) { 777 kmax = rowlengths[i*bs+j]; 778 for ( k=0; k<kmax; k++ ) { 779 tmp = jj[nzcountb]/bs ; 780 block = mask[tmp] - 1; 781 point = jj[nzcountb] - bs*tmp; 782 idx = ishift + bs2*block + j + bs*point; 783 a->a[idx] = aa[nzcountb++]; 784 } 785 } 786 /* zero out the mask elements we set */ 787 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 788 } 789 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 790 791 PetscFree(rowlengths); 792 PetscFree(browlengths); 793 PetscFree(aa); 794 PetscFree(jj); 795 PetscFree(mask); 796 797 B->assembled = PETSC_TRUE; 798 799 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 800 if (flg) { 801 Viewer viewer; 802 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 803 ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO,0);CHKERRQ(ierr); 804 ierr = MatView(B,viewer); CHKERRQ(ierr); 805 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 806 } 807 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 808 if (flg) { 809 Viewer viewer; 810 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 811 ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 812 ierr = MatView(B,viewer); CHKERRQ(ierr); 813 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 814 } 815 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 816 if (flg) { 817 Viewer viewer; 818 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 819 ierr = MatView(B,viewer); CHKERRQ(ierr); 820 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 821 } 822 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 823 if (flg) { 824 Viewer viewer; 825 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 826 ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_MATLAB,"M");CHKERRQ(ierr); 827 ierr = MatView(B,viewer); CHKERRQ(ierr); 828 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 829 } 830 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 831 if (flg) { 832 Draw win; 833 ierr = DrawOpenX(B->comm,0,0,0,0,300,300,&win); CHKERRQ(ierr); 834 ierr = MatView(B,(Viewer)win); CHKERRQ(ierr); 835 ierr = DrawSyncFlush(win); CHKERRQ(ierr); 836 ierr = DrawDestroy(win); CHKERRQ(ierr); 837 } 838 return 0; 839 } 840 841 842 843