1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.6 1996/02/19 03:51:17 bsmith Exp curfman $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,&ia,&ja);CHKERRQ(ierr); 41 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 42 43 PetscFree(ia); PetscFree(ja); 44 return 0; 45 } 46 47 /* 48 Adds diagonal pointers to sparse matrix structure. 49 */ 50 51 int MatMarkDiag_SeqBAIJ(Mat A) 52 { 53 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 54 int i,j, *diag, m = a->mbs; 55 56 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 57 PLogObjectMemory(A,(m+1)*sizeof(int)); 58 for ( i=0; i<m; i++ ) { 59 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 60 if (a->j[j] == i) { 61 diag[i] = j; 62 break; 63 } 64 } 65 } 66 a->diag = diag; 67 return 0; 68 } 69 70 #include "draw.h" 71 #include "pinclude/pviewer.h" 72 #include "sysio.h" 73 74 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l; 78 Scalar *aa; 79 80 ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr); 81 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 82 col_lens[0] = MAT_COOKIE; 83 col_lens[1] = a->m; 84 col_lens[2] = a->n; 85 col_lens[3] = a->nz*bs*bs; 86 87 /* store lengths of each row and write (including header) to file */ 88 count = 0; 89 for ( i=0; i<a->mbs; i++ ) { 90 for ( j=0; j<bs; j++ ) { 91 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 92 } 93 } 94 ierr = SYWrite(fd,col_lens,4+a->m,SYINT,1); CHKERRQ(ierr); 95 PetscFree(col_lens); 96 97 /* store column indices (zero start index) */ 98 jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj); 99 count = 0; 100 for ( i=0; i<a->mbs; i++ ) { 101 for ( j=0; j<bs; j++ ) { 102 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 103 for ( l=0; l<bs; l++ ) { 104 jj[count++] = bs*a->j[k] + l; 105 } 106 } 107 } 108 } 109 ierr = SYWrite(fd,jj,bs*bs*a->nz,SYINT,0); CHKERRQ(ierr); 110 PetscFree(jj); 111 112 /* store nonzero values */ 113 aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa); 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 118 for ( l=0; l<bs; l++ ) { 119 aa[count++] = a->a[bs*bs*k + l*bs + j]; 120 } 121 } 122 } 123 } 124 ierr = SYWrite(fd,aa,bs*bs*a->nz,SYSCALAR,0); CHKERRQ(ierr); 125 PetscFree(aa); 126 return 0; 127 } 128 129 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 132 int ierr, i,j,format,bs = a->bs,k,l; 133 FILE *fd; 134 char *outputname; 135 136 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 137 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 138 ierr = ViewerFileGetFormat_Private(viewer,&format); 139 if (format == FILE_FORMAT_INFO) { 140 /* no need to print additional information */ ; 141 } 142 else if (format == FILE_FORMAT_MATLAB) { 143 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 144 } 145 else { 146 for ( i=0; i<a->mbs; i++ ) { 147 for ( j=0; j<bs; j++ ) { 148 fprintf(fd,"row %d:",i*bs+j); 149 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 150 for ( l=0; l<bs; l++ ) { 151 #if defined(PETSC_COMPLEX) 152 if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) { 153 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 154 real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j])); 155 } 156 else { 157 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j])); 158 } 159 #else 160 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]); 161 #endif 162 } 163 } 164 fprintf(fd,"\n"); 165 } 166 } 167 } 168 fflush(fd); 169 return 0; 170 } 171 172 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 173 { 174 Mat A = (Mat) obj; 175 PetscObject vobj = (PetscObject) viewer; 176 177 if (!viewer) { 178 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 179 } 180 if (vobj->cookie == VIEWER_COOKIE) { 181 if (vobj->type == MATLAB_VIEWER) { 182 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 183 } 184 else if (vobj->type == ASCII_FILE_VIEWER||vobj->type == ASCII_FILES_VIEWER){ 185 return MatView_SeqBAIJ_ASCII(A,viewer); 186 } 187 else if (vobj->type == BINARY_FILE_VIEWER) { 188 return MatView_SeqBAIJ_Binary(A,viewer); 189 } 190 } 191 else if (vobj->cookie == DRAW_COOKIE) { 192 if (vobj->type == NULLWINDOW) return 0; 193 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 194 } 195 return 0; 196 } 197 198 199 static int MatZeroEntries_SeqBAIJ(Mat A) 200 { 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 202 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 203 return 0; 204 } 205 206 int MatDestroy_SeqBAIJ(PetscObject obj) 207 { 208 Mat A = (Mat) obj; 209 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 210 211 #if defined(PETSC_LOG) 212 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 213 #endif 214 PetscFree(a->a); 215 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 216 if (a->diag) PetscFree(a->diag); 217 if (a->ilen) PetscFree(a->ilen); 218 if (a->imax) PetscFree(a->imax); 219 if (a->solve_work) PetscFree(a->solve_work); 220 if (a->mult_work) PetscFree(a->mult_work); 221 PetscFree(a); 222 PLogObjectDestroy(A); 223 PetscHeaderDestroy(A); 224 return 0; 225 } 226 227 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 228 { 229 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 230 if (op == ROW_ORIENTED) a->roworiented = 1; 231 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 232 else if (op == COLUMNS_SORTED) a->sorted = 1; 233 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 234 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 235 else if (op == ROWS_SORTED || 236 op == SYMMETRIC_MATRIX || 237 op == STRUCTURALLY_SYMMETRIC_MATRIX || 238 op == YES_NEW_DIAGONALS) 239 PLogInfo((PetscObject)A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 240 else if (op == NO_NEW_DIAGONALS) 241 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 242 else 243 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 244 return 0; 245 } 246 247 248 /* -------------------------------------------------------*/ 249 /* Should check that shapes of vectors and matrices match */ 250 /* -------------------------------------------------------*/ 251 #include "pinclude/plapack.h" 252 253 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 254 { 255 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 256 Scalar *xg,*yg; 257 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 258 register Scalar x1,x2,x3,x4,x5; 259 int mbs = a->mbs, m = a->m, i, *idx,*ii; 260 int bs = a->bs,j,n,bs2 = bs*bs; 261 262 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 263 PetscMemzero(y,m*sizeof(Scalar)); 264 x = x; 265 idx = a->j; 266 v = a->a; 267 ii = a->i; 268 269 switch (bs) { 270 case 1: 271 for ( i=0; i<m; i++ ) { 272 n = ii[1] - ii[0]; ii++; 273 sum = 0.0; 274 while (n--) sum += *v++ * x[*idx++]; 275 y[i] = sum; 276 } 277 break; 278 case 2: 279 for ( i=0; i<mbs; i++ ) { 280 n = ii[1] - ii[0]; ii++; 281 sum1 = 0.0; sum2 = 0.0; 282 for ( j=0; j<n; j++ ) { 283 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 284 sum1 += v[0]*x1 + v[2]*x2; 285 sum2 += v[1]*x1 + v[3]*x2; 286 v += 4; 287 } 288 y[0] += sum1; y[1] += sum2; 289 y += 2; 290 } 291 break; 292 case 3: 293 for ( i=0; i<mbs; i++ ) { 294 n = ii[1] - ii[0]; ii++; 295 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 296 for ( j=0; j<n; j++ ) { 297 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 298 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 299 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 300 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 301 v += 9; 302 } 303 y[0] += sum1; y[1] += sum2; y[2] += sum3; 304 y += 3; 305 } 306 break; 307 case 4: 308 for ( i=0; i<mbs; i++ ) { 309 n = ii[1] - ii[0]; ii++; 310 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 311 for ( j=0; j<n; j++ ) { 312 xb = x + 4*(*idx++); 313 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 314 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 315 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 316 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 317 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 318 v += 16; 319 } 320 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 321 y += 4; 322 } 323 break; 324 case 5: 325 for ( i=0; i<mbs; i++ ) { 326 n = ii[1] - ii[0]; ii++; 327 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 328 for ( j=0; j<n; j++ ) { 329 xb = x + 5*(*idx++); 330 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 331 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 332 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 333 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 334 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 335 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 336 v += 25; 337 } 338 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 339 y += 5; 340 } 341 break; 342 /* block sizes larger then 5 by 5 are handled by BLAS */ 343 default: { 344 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 345 if (!a->mult_work) { 346 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 347 CHKPTRQ(a->mult_work); 348 } 349 work = a->mult_work; 350 for ( i=0; i<mbs; i++ ) { 351 n = ii[1] - ii[0]; ii++; 352 ncols = n*bs; 353 workt = work; 354 for ( j=0; j<n; j++ ) { 355 xb = x + bs*(*idx++); 356 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 357 workt += bs; 358 } 359 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 360 v += n*bs2; 361 y += bs; 362 } 363 } 364 } 365 PLogFlops(2*bs2*a->nz - m); 366 return 0; 367 } 368 369 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 370 { 371 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 372 *nz = a->bs*a->bs*a->nz; 373 *nza = a->maxnz; 374 *mem = (int)A->mem; 375 return 0; 376 } 377 378 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 379 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 380 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 381 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 382 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 383 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 384 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 385 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 386 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 387 388 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 389 { 390 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 391 *m = a->m; *n = a->n; 392 return 0; 393 } 394 395 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 396 { 397 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 398 *m = 0; *n = a->m; 399 return 0; 400 } 401 402 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 403 { 404 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 405 Scalar *v = a->a; 406 double sum = 0.0; 407 int i; 408 409 if (type == NORM_FROBENIUS) { 410 for (i=0; i<a->nz; i++ ) { 411 #if defined(PETSC_COMPLEX) 412 sum += real(conj(*v)*(*v)); v++; 413 #else 414 sum += (*v)*(*v); v++; 415 #endif 416 } 417 *norm = sqrt(sum); 418 } 419 else { 420 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 421 } 422 return 0; 423 } 424 425 /* 426 note: This can only work for identity for row and col. It would 427 be good to check this and otherwise generate an error. 428 */ 429 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 430 { 431 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 432 Mat outA; 433 int ierr; 434 435 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 436 437 outA = inA; 438 inA->factor = FACTOR_LU; 439 a->row = row; 440 a->col = col; 441 442 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 443 444 if (!a->diag) { 445 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 446 } 447 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 448 return 0; 449 } 450 451 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 452 { 453 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 454 int one = 1, totalnz = a->bs*a->bs*a->nz; 455 BLscal_( &totalnz, alpha, a->a, &one ); 456 PLogFlops(totalnz); 457 return 0; 458 } 459 460 int MatPrintHelp_SeqBAIJ(Mat A) 461 { 462 static int called = 0; 463 464 if (called) return 0; else called = 1; 465 return 0; 466 } 467 /* -------------------------------------------------------------------*/ 468 static struct _MatOps MatOps = {0, 469 0,0, 470 MatMult_SeqBAIJ,0, 471 0,0, 472 MatSolve_SeqBAIJ,0, 473 0,0, 474 MatLUFactor_SeqBAIJ,0, 475 0, 476 0, 477 MatGetInfo_SeqBAIJ,0, 478 0,0,MatNorm_SeqBAIJ, 479 0,0, 480 0, 481 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 482 MatGetReordering_SeqBAIJ, 483 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 484 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 485 MatILUFactorSymbolic_SeqBAIJ,0, 486 0,0,/*MatConvert_SeqBAIJ*/ 0, 487 0,0, 488 MatConvertSameType_SeqBAIJ,0,0, 489 MatILUFactor_SeqBAIJ,0,0, 490 0,0, 491 0,0, 492 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 493 0}; 494 495 /*@C 496 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 497 (the default parallel PETSc format). For good matrix assembly performance 498 the user should preallocate the matrix storage by setting the parameter nz 499 (or nzz). By setting these parameters accurately, performance can be 500 increased by more than a factor of 50. 501 502 Input Parameters: 503 . comm - MPI communicator, set to MPI_COMM_SELF 504 . bs - size of block 505 . m - number of rows 506 . n - number of columns 507 . nz - number of block nonzeros per block row (same for all rows) 508 . nzz - number of block nonzeros per block row or PETSC_NULL 509 (possibly different for each row) 510 511 Output Parameter: 512 . A - the matrix 513 514 Notes: 515 The BAIJ format, is fully compatible with standard Fortran 77 516 storage. That is, the stored row and column indices can begin at 517 either one (as in Fortran) or zero. See the users manual for details. 518 519 Specify the preallocated storage with either nz or nnz (not both). 520 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 521 allocation. For additional details, see the users manual chapter on 522 matrices and the file $(PETSC_DIR)/Performance. 523 524 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 525 @*/ 526 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 527 { 528 Mat B; 529 Mat_SeqBAIJ *b; 530 int i,len,ierr, flg,mbs = m/bs; 531 532 if (mbs*bs != m) 533 SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize"); 534 535 *A = 0; 536 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 537 PLogObjectCreate(B); 538 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 539 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 540 switch (bs) { 541 case 1: 542 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 543 break; 544 case 2: 545 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 546 break; 547 } 548 B->destroy = MatDestroy_SeqBAIJ; 549 B->view = MatView_SeqBAIJ; 550 B->factor = 0; 551 B->lupivotthreshold = 1.0; 552 b->row = 0; 553 b->col = 0; 554 b->reallocs = 0; 555 556 b->m = m; 557 b->mbs = mbs; 558 b->n = n; 559 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 560 if (nnz == PETSC_NULL) { 561 if (nz == PETSC_DEFAULT) nz = 5; 562 else if (nz <= 0) nz = 1; 563 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 564 nz = nz*mbs; 565 } 566 else { 567 nz = 0; 568 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 569 } 570 571 /* allocate the matrix space */ 572 len = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int); 573 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 574 PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar)); 575 b->j = (int *) (b->a + nz*bs*bs); 576 PetscMemzero(b->j,nz*sizeof(int)); 577 b->i = b->j + nz; 578 b->singlemalloc = 1; 579 580 b->i[0] = 0; 581 for (i=1; i<mbs+1; i++) { 582 b->i[i] = b->i[i-1] + b->imax[i-1]; 583 } 584 585 /* b->ilen will count nonzeros in each block row so far. */ 586 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 587 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 588 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 589 590 b->bs = bs; 591 b->mbs = mbs; 592 b->nz = 0; 593 b->maxnz = nz; 594 b->sorted = 0; 595 b->roworiented = 1; 596 b->nonew = 0; 597 b->diag = 0; 598 b->solve_work = 0; 599 b->mult_work = 0; 600 b->spptr = 0; 601 602 *A = B; 603 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 604 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 605 return 0; 606 } 607 608 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 609 { 610 Mat C; 611 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 612 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz; 613 614 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 615 616 *B = 0; 617 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 618 PLogObjectCreate(C); 619 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 620 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 621 C->destroy = MatDestroy_SeqBAIJ; 622 C->view = MatView_SeqBAIJ; 623 C->factor = A->factor; 624 c->row = 0; 625 c->col = 0; 626 C->assembled = PETSC_TRUE; 627 628 c->m = a->m; 629 c->n = a->n; 630 c->bs = a->bs; 631 c->mbs = a->mbs; 632 c->nbs = a->nbs; 633 634 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 635 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 636 for ( i=0; i<mbs; i++ ) { 637 c->imax[i] = a->imax[i]; 638 c->ilen[i] = a->ilen[i]; 639 } 640 641 /* allocate the matrix space */ 642 c->singlemalloc = 1; 643 len = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int)); 644 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 645 c->j = (int *) (c->a + nz*bs*bs); 646 c->i = c->j + nz; 647 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 648 if (mbs > 0) { 649 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 650 if (cpvalues == COPY_VALUES) { 651 PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar)); 652 } 653 } 654 655 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 656 c->sorted = a->sorted; 657 c->roworiented = a->roworiented; 658 c->nonew = a->nonew; 659 660 if (a->diag) { 661 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 662 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 663 for ( i=0; i<mbs; i++ ) { 664 c->diag[i] = a->diag[i]; 665 } 666 } 667 else c->diag = 0; 668 c->nz = a->nz; 669 c->maxnz = a->maxnz; 670 c->solve_work = 0; 671 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 672 c->mult_work = 0; 673 *B = C; 674 return 0; 675 } 676 677 int MatLoad_SeqBAIJ(Viewer bview,MatType type,Mat *A) 678 { 679 Mat_SeqBAIJ *a; 680 Mat B; 681 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 682 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 683 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 684 int *masked, nmask,tmp,ishift,bs2; 685 Scalar *aa; 686 PetscObject vobj = (PetscObject) bview; 687 MPI_Comm comm = vobj->comm; 688 689 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 690 bs2 = bs*bs; 691 692 MPI_Comm_size(comm,&size); 693 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 694 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 695 ierr = SYRead(fd,header,4,SYINT); CHKERRQ(ierr); 696 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 697 M = header[1]; N = header[2]; nz = header[3]; 698 699 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 700 701 /* 702 This code adds extra rows to make sure the number of rows is 703 divisible by the blocksize 704 */ 705 mbs = M/bs; 706 extra_rows = bs - M + bs*(mbs); 707 if (extra_rows == bs) extra_rows = 0; 708 else mbs++; 709 if (extra_rows) { 710 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 711 } 712 713 /* read in row lengths */ 714 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 715 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 716 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 717 718 /* read in column indices */ 719 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 720 ierr = SYRead(fd,jj,nz,SYINT); CHKERRQ(ierr); 721 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 722 723 /* loop over row lengths determining block row lengths */ 724 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 725 PetscMemzero(browlengths,mbs*sizeof(int)); 726 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 727 PetscMemzero(mask,mbs*sizeof(int)); 728 masked = mask + mbs; 729 rowcount = 0; nzcount = 0; 730 for ( i=0; i<mbs; i++ ) { 731 nmask = 0; 732 for ( j=0; j<bs; j++ ) { 733 kmax = rowlengths[rowcount]; 734 for ( k=0; k<kmax; k++ ) { 735 tmp = jj[nzcount++]/bs; 736 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 737 } 738 rowcount++; 739 } 740 browlengths[i] += nmask; 741 /* zero out the mask elements we set */ 742 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 743 } 744 745 /* create our matrix */ 746 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 747 CHKERRQ(ierr); 748 B = *A; 749 a = (Mat_SeqBAIJ *) B->data; 750 751 /* set matrix "i" values */ 752 a->i[0] = 0; 753 for ( i=1; i<= mbs; i++ ) { 754 a->i[i] = a->i[i-1] + browlengths[i-1]; 755 a->ilen[i-1] = browlengths[i-1]; 756 } 757 a->nz = 0; 758 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 759 760 /* read in nonzero values */ 761 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 762 ierr = SYRead(fd,aa,nz,SYSCALAR); CHKERRQ(ierr); 763 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 764 765 /* set "a" and "j" values into matrix */ 766 nzcount = 0; jcount = 0; 767 for ( i=0; i<mbs; i++ ) { 768 nzcountb = nzcount; 769 nmask = 0; 770 for ( j=0; j<bs; j++ ) { 771 kmax = rowlengths[i*bs+j]; 772 for ( k=0; k<kmax; k++ ) { 773 tmp = jj[nzcount++]/bs; 774 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 775 } 776 rowcount++; 777 } 778 /* sort the masked values */ 779 SYIsort(nmask,masked); 780 781 /* set "j" values into matrix */ 782 maskcount = 1; 783 for ( j=0; j<nmask; j++ ) { 784 a->j[jcount++] = masked[j]; 785 mask[masked[j]] = maskcount++; 786 } 787 /* set "a" values into matrix */ 788 ishift = bs2*a->i[i]; 789 for ( j=0; j<bs; j++ ) { 790 kmax = rowlengths[i*bs+j]; 791 for ( k=0; k<kmax; k++ ) { 792 tmp = jj[nzcountb]/bs ; 793 block = mask[tmp] - 1; 794 point = jj[nzcountb] - bs*tmp; 795 idx = ishift + bs2*block + j + bs*point; 796 a->a[idx] = aa[nzcountb++]; 797 } 798 } 799 /* zero out the mask elements we set */ 800 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 801 } 802 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 803 804 PetscFree(rowlengths); 805 PetscFree(browlengths); 806 PetscFree(aa); 807 PetscFree(jj); 808 PetscFree(mask); 809 810 B->assembled = PETSC_TRUE; 811 812 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 813 if (flg) { 814 Viewer viewer; 815 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 816 ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO,0);CHKERRQ(ierr); 817 ierr = MatView(B,viewer); CHKERRQ(ierr); 818 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 819 } 820 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 821 if (flg) { 822 Viewer viewer; 823 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 824 ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 825 ierr = MatView(B,viewer); CHKERRQ(ierr); 826 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 827 } 828 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 829 if (flg) { 830 Viewer viewer; 831 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 832 ierr = MatView(B,viewer); CHKERRQ(ierr); 833 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 834 } 835 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 836 if (flg) { 837 Viewer viewer; 838 ierr = ViewerFileOpenASCII(B->comm,"stdout",&viewer);CHKERRQ(ierr); 839 ierr = ViewerFileSetFormat(viewer,FILE_FORMAT_MATLAB,"M");CHKERRQ(ierr); 840 ierr = MatView(B,viewer); CHKERRQ(ierr); 841 ierr = ViewerDestroy(viewer); CHKERRQ(ierr); 842 } 843 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 844 if (flg) { 845 Draw win; 846 ierr = DrawOpenX(B->comm,0,0,0,0,300,300,&win); CHKERRQ(ierr); 847 ierr = MatView(B,(Viewer)win); CHKERRQ(ierr); 848 ierr = DrawSyncFlush(win); CHKERRQ(ierr); 849 ierr = DrawDestroy(win); CHKERRQ(ierr); 850 } 851 return 0; 852 } 853 854 855 856