1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.17 1996/03/25 22:43:35 balay Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = a->indexshift; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45 CHKERRQ(ierr); 46 ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 47 PetscFree(ia); PetscFree(ja); 48 } else { 49 if (ishift == oshift) { 50 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51 } 52 else if (ishift == -1) { 53 /* temporarily subtract 1 from i and j indices */ 54 int nz = a->i[a->n] - 1; 55 for ( i=0; i<nz; i++ ) a->j[i]--; 56 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58 for ( i=0; i<nz; i++ ) a->j[i]++; 59 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60 } else { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[a->n] - 1; 63 for ( i=0; i<nz; i++ ) a->j[i]++; 64 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66 for ( i=0; i<nz; i++ ) a->j[i]--; 67 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68 } 69 } 70 return 0; 71 } 72 73 /* 74 Adds diagonal pointers to sparse matrix structure. 75 */ 76 77 int MatMarkDiag_SeqBAIJ(Mat A) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,j, *diag, m = a->mbs; 81 82 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83 PLogObjectMemory(A,(m+1)*sizeof(int)); 84 for ( i=0; i<m; i++ ) { 85 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86 if (a->j[j] == i) { 87 diag[i] = j; 88 break; 89 } 90 } 91 } 92 a->diag = diag; 93 return 0; 94 } 95 96 #include "draw.h" 97 #include "pinclude/pviewer.h" 98 #include "sys.h" 99 100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 101 { 102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 103 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l; 104 Scalar *aa; 105 106 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 107 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 108 col_lens[0] = MAT_COOKIE; 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs*bs; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs*bs*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs*bs*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs*bs*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 return 0; 153 } 154 155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 156 { 157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 158 int ierr, i,j,format,bs = a->bs,k,l; 159 FILE *fd; 160 char *outputname; 161 162 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 163 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 164 ierr = ViewerGetFormat(viewer,&format); 165 if (format == ASCII_FORMAT_INFO) { 166 /* no need to print additional information */ ; 167 } 168 else if (format == ASCII_FORMAT_MATLAB) { 169 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 170 } 171 else { 172 for ( i=0; i<a->mbs; i++ ) { 173 for ( j=0; j<bs; j++ ) { 174 fprintf(fd,"row %d:",i*bs+j); 175 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 176 for ( l=0; l<bs; l++ ) { 177 #if defined(PETSC_COMPLEX) 178 if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) { 179 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 180 real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j])); 181 } 182 else { 183 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j])); 184 } 185 #else 186 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]); 187 #endif 188 } 189 } 190 fprintf(fd,"\n"); 191 } 192 } 193 } 194 fflush(fd); 195 return 0; 196 } 197 198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 199 { 200 Mat A = (Mat) obj; 201 ViewerType vtype; 202 int ierr; 203 204 if (!viewer) { 205 viewer = STDOUT_VIEWER_SELF; 206 } 207 208 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 209 if (vtype == MATLAB_VIEWER) { 210 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 211 } 212 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 213 return MatView_SeqBAIJ_ASCII(A,viewer); 214 } 215 else if (vtype == BINARY_FILE_VIEWER) { 216 return MatView_SeqBAIJ_Binary(A,viewer); 217 } 218 else if (vtype == DRAW_VIEWER) { 219 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 220 } 221 return 0; 222 } 223 224 225 static int MatZeroEntries_SeqBAIJ(Mat A) 226 { 227 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 228 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 229 return 0; 230 } 231 232 int MatDestroy_SeqBAIJ(PetscObject obj) 233 { 234 Mat A = (Mat) obj; 235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 236 237 #if defined(PETSC_LOG) 238 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 239 #endif 240 PetscFree(a->a); 241 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 242 if (a->diag) PetscFree(a->diag); 243 if (a->ilen) PetscFree(a->ilen); 244 if (a->imax) PetscFree(a->imax); 245 if (a->solve_work) PetscFree(a->solve_work); 246 if (a->mult_work) PetscFree(a->mult_work); 247 PetscFree(a); 248 PLogObjectDestroy(A); 249 PetscHeaderDestroy(A); 250 return 0; 251 } 252 253 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 254 { 255 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 256 if (op == ROW_ORIENTED) a->roworiented = 1; 257 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 258 else if (op == COLUMNS_SORTED) a->sorted = 1; 259 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 260 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 261 else if (op == ROWS_SORTED || 262 op == SYMMETRIC_MATRIX || 263 op == STRUCTURALLY_SYMMETRIC_MATRIX || 264 op == YES_NEW_DIAGONALS) 265 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 266 else if (op == NO_NEW_DIAGONALS) 267 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 268 else 269 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 270 return 0; 271 } 272 273 274 /* -------------------------------------------------------*/ 275 /* Should check that shapes of vectors and matrices match */ 276 /* -------------------------------------------------------*/ 277 #include "pinclude/plapack.h" 278 279 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 280 { 281 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 282 Scalar *xg,*yg; 283 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 284 register Scalar x1,x2,x3,x4,x5; 285 int mbs = a->mbs, m = a->m, i, *idx,*ii; 286 int bs = a->bs,j,n,bs2 = bs*bs; 287 288 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 289 PetscMemzero(y,m*sizeof(Scalar)); 290 x = x; 291 idx = a->j; 292 v = a->a; 293 ii = a->i; 294 295 switch (bs) { 296 case 1: 297 for ( i=0; i<m; i++ ) { 298 n = ii[1] - ii[0]; ii++; 299 sum = 0.0; 300 while (n--) sum += *v++ * x[*idx++]; 301 y[i] = sum; 302 } 303 break; 304 case 2: 305 for ( i=0; i<mbs; i++ ) { 306 n = ii[1] - ii[0]; ii++; 307 sum1 = 0.0; sum2 = 0.0; 308 for ( j=0; j<n; j++ ) { 309 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 310 sum1 += v[0]*x1 + v[2]*x2; 311 sum2 += v[1]*x1 + v[3]*x2; 312 v += 4; 313 } 314 y[0] += sum1; y[1] += sum2; 315 y += 2; 316 } 317 break; 318 case 3: 319 for ( i=0; i<mbs; i++ ) { 320 n = ii[1] - ii[0]; ii++; 321 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 322 for ( j=0; j<n; j++ ) { 323 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 324 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 325 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 326 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 327 v += 9; 328 } 329 y[0] += sum1; y[1] += sum2; y[2] += sum3; 330 y += 3; 331 } 332 break; 333 case 4: 334 for ( i=0; i<mbs; i++ ) { 335 n = ii[1] - ii[0]; ii++; 336 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 337 for ( j=0; j<n; j++ ) { 338 xb = x + 4*(*idx++); 339 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 340 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 341 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 342 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 343 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 344 v += 16; 345 } 346 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 347 y += 4; 348 } 349 break; 350 case 5: 351 for ( i=0; i<mbs; i++ ) { 352 n = ii[1] - ii[0]; ii++; 353 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 354 for ( j=0; j<n; j++ ) { 355 xb = x + 5*(*idx++); 356 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 357 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 358 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 359 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 360 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 361 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 362 v += 25; 363 } 364 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 365 y += 5; 366 } 367 break; 368 /* block sizes larger then 5 by 5 are handled by BLAS */ 369 default: { 370 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 371 if (!a->mult_work) { 372 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 373 CHKPTRQ(a->mult_work); 374 } 375 work = a->mult_work; 376 for ( i=0; i<mbs; i++ ) { 377 n = ii[1] - ii[0]; ii++; 378 ncols = n*bs; 379 workt = work; 380 for ( j=0; j<n; j++ ) { 381 xb = x + bs*(*idx++); 382 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 383 workt += bs; 384 } 385 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 386 v += n*bs2; 387 y += bs; 388 } 389 } 390 } 391 PLogFlops(2*bs2*a->nz - m); 392 return 0; 393 } 394 395 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 396 { 397 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 398 if (nz) *nz = a->bs*a->bs*a->nz; 399 if (nza) *nza = a->maxnz; 400 if (mem) *mem = (int)A->mem; 401 return 0; 402 } 403 404 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 405 { 406 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 407 408 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 409 410 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 411 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 412 (a->nz != b->nz)||(a->indexshift != b->indexshift)) { 413 *flg = PETSC_FALSE; return 0; 414 } 415 416 /* if the a->i are the same */ 417 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 418 *flg = PETSC_FALSE; return 0; 419 } 420 421 /* if a->j are the same */ 422 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 423 *flg = PETSC_FALSE; return 0; 424 } 425 426 /* if a->a are the same */ 427 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 428 *flg = PETSC_FALSE; return 0; 429 } 430 *flg = PETSC_TRUE; 431 return 0; 432 433 } 434 435 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 436 { 437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 438 int i,j,k,n,row,bs,*ai,*aj,ambs; 439 Scalar *x, zero = 0.0,*aa,*aa_j; 440 441 bs = a->bs; 442 aa = a->a; 443 ai = a->i; 444 aj = a->j; 445 ambs = a->mbs; 446 447 VecSet(&zero,v); 448 VecGetArray(v,&x); VecGetLocalSize(v,&n); 449 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqAIJ:Nonconforming matrix and vector"); 450 for ( i=0; i<ambs; i++ ) { 451 for ( j=ai[i]; j<ai[i+1]; j++ ) { 452 if (aj[j] == i) { 453 row = i*bs; 454 aa_j = aa+j*bs*bs; 455 for (k=0; k<bs*bs; k+=(bs+1),row++) x[row] = aa_j[k]; 456 break; 457 } 458 } 459 } 460 return 0; 461 } 462 463 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 464 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 465 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 466 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 467 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 468 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 469 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 470 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 471 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 472 473 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 474 { 475 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 476 *m = a->m; *n = a->n; 477 return 0; 478 } 479 480 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 481 { 482 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 483 *m = 0; *n = a->m; 484 return 0; 485 } 486 487 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 488 { 489 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 490 Scalar *v = a->a; 491 double sum = 0.0; 492 int i; 493 494 if (type == NORM_FROBENIUS) { 495 for (i=0; i<a->nz; i++ ) { 496 #if defined(PETSC_COMPLEX) 497 sum += real(conj(*v)*(*v)); v++; 498 #else 499 sum += (*v)*(*v); v++; 500 #endif 501 } 502 *norm = sqrt(sum); 503 } 504 else { 505 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 506 } 507 return 0; 508 } 509 510 /* 511 note: This can only work for identity for row and col. It would 512 be good to check this and otherwise generate an error. 513 */ 514 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 515 { 516 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 517 Mat outA; 518 int ierr; 519 520 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 521 522 outA = inA; 523 inA->factor = FACTOR_LU; 524 a->row = row; 525 a->col = col; 526 527 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 528 529 if (!a->diag) { 530 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 531 } 532 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 533 return 0; 534 } 535 536 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 537 { 538 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 539 int one = 1, totalnz = a->bs*a->bs*a->nz; 540 BLscal_( &totalnz, alpha, a->a, &one ); 541 PLogFlops(totalnz); 542 return 0; 543 } 544 545 int MatPrintHelp_SeqBAIJ(Mat A) 546 { 547 static int called = 0; 548 549 if (called) return 0; else called = 1; 550 return 0; 551 } 552 /* -------------------------------------------------------------------*/ 553 static struct _MatOps MatOps = {0, 554 0,0, 555 MatMult_SeqBAIJ,0, 556 0,0, 557 MatSolve_SeqBAIJ,0, 558 0,0, 559 MatLUFactor_SeqBAIJ,0, 560 0, 561 0, 562 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 563 MatGetDiagonal_SeqBAIJ,0,MatNorm_SeqBAIJ, 564 0,0, 565 0, 566 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 567 MatGetReordering_SeqBAIJ, 568 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 569 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 570 MatILUFactorSymbolic_SeqBAIJ,0, 571 0,0,/* MatConvert_SeqBAIJ */ 0, 572 0,0, 573 MatConvertSameType_SeqBAIJ,0,0, 574 MatILUFactor_SeqBAIJ,0,0, 575 0,0, 576 0,0, 577 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 578 0}; 579 580 /*@C 581 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 582 (the default parallel PETSc format). For good matrix assembly performance 583 the user should preallocate the matrix storage by setting the parameter nz 584 (or nzz). By setting these parameters accurately, performance can be 585 increased by more than a factor of 50. 586 587 Input Parameters: 588 . comm - MPI communicator, set to MPI_COMM_SELF 589 . bs - size of block 590 . m - number of rows 591 . n - number of columns 592 . nz - number of block nonzeros per block row (same for all rows) 593 . nzz - number of block nonzeros per block row or PETSC_NULL 594 (possibly different for each row) 595 596 Output Parameter: 597 . A - the matrix 598 599 Notes: 600 The BAIJ format, is fully compatible with standard Fortran 77 601 storage. That is, the stored row and column indices can begin at 602 either one (as in Fortran) or zero. See the users manual for details. 603 604 Specify the preallocated storage with either nz or nnz (not both). 605 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 606 allocation. For additional details, see the users manual chapter on 607 matrices and the file $(PETSC_DIR)/Performance. 608 609 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 610 @*/ 611 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 612 { 613 Mat B; 614 Mat_SeqBAIJ *b; 615 int i,len,ierr, flg,mbs = m/bs; 616 617 if (mbs*bs != m) 618 SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize"); 619 620 *A = 0; 621 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 622 PLogObjectCreate(B); 623 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 624 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 625 switch (bs) { 626 case 1: 627 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 628 break; 629 case 2: 630 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 631 break; 632 case 3: 633 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 634 break; 635 case 4: 636 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 637 break; 638 case 5: 639 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 640 break; 641 } 642 B->destroy = MatDestroy_SeqBAIJ; 643 B->view = MatView_SeqBAIJ; 644 B->factor = 0; 645 B->lupivotthreshold = 1.0; 646 b->row = 0; 647 b->col = 0; 648 b->reallocs = 0; 649 650 b->m = m; 651 b->mbs = mbs; 652 b->n = n; 653 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 654 if (nnz == PETSC_NULL) { 655 if (nz == PETSC_DEFAULT) nz = 5; 656 else if (nz <= 0) nz = 1; 657 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 658 nz = nz*mbs; 659 } 660 else { 661 nz = 0; 662 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 663 } 664 665 /* allocate the matrix space */ 666 len = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int); 667 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 668 PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar)); 669 b->j = (int *) (b->a + nz*bs*bs); 670 PetscMemzero(b->j,nz*sizeof(int)); 671 b->i = b->j + nz; 672 b->singlemalloc = 1; 673 674 b->i[0] = 0; 675 for (i=1; i<mbs+1; i++) { 676 b->i[i] = b->i[i-1] + b->imax[i-1]; 677 } 678 679 /* b->ilen will count nonzeros in each block row so far. */ 680 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 681 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 682 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 683 684 b->bs = bs; 685 b->mbs = mbs; 686 b->nz = 0; 687 b->maxnz = nz; 688 b->sorted = 0; 689 b->roworiented = 1; 690 b->nonew = 0; 691 b->diag = 0; 692 b->solve_work = 0; 693 b->mult_work = 0; 694 b->spptr = 0; 695 696 *A = B; 697 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 698 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 699 return 0; 700 } 701 702 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 703 { 704 Mat C; 705 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 706 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz; 707 708 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 709 710 *B = 0; 711 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 712 PLogObjectCreate(C); 713 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 714 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 715 C->destroy = MatDestroy_SeqBAIJ; 716 C->view = MatView_SeqBAIJ; 717 C->factor = A->factor; 718 c->row = 0; 719 c->col = 0; 720 C->assembled = PETSC_TRUE; 721 722 c->m = a->m; 723 c->n = a->n; 724 c->bs = a->bs; 725 c->mbs = a->mbs; 726 c->nbs = a->nbs; 727 728 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 729 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 730 for ( i=0; i<mbs; i++ ) { 731 c->imax[i] = a->imax[i]; 732 c->ilen[i] = a->ilen[i]; 733 } 734 735 /* allocate the matrix space */ 736 c->singlemalloc = 1; 737 len = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int)); 738 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 739 c->j = (int *) (c->a + nz*bs*bs); 740 c->i = c->j + nz; 741 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 742 if (mbs > 0) { 743 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 744 if (cpvalues == COPY_VALUES) { 745 PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar)); 746 } 747 } 748 749 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 750 c->sorted = a->sorted; 751 c->roworiented = a->roworiented; 752 c->nonew = a->nonew; 753 754 if (a->diag) { 755 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 756 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 757 for ( i=0; i<mbs; i++ ) { 758 c->diag[i] = a->diag[i]; 759 } 760 } 761 else c->diag = 0; 762 c->nz = a->nz; 763 c->maxnz = a->maxnz; 764 c->solve_work = 0; 765 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 766 c->mult_work = 0; 767 *B = C; 768 return 0; 769 } 770 771 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 772 { 773 Mat_SeqBAIJ *a; 774 Mat B; 775 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 776 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 777 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 778 int *masked, nmask,tmp,ishift,bs2; 779 Scalar *aa; 780 MPI_Comm comm = ((PetscObject) viewer)->comm; 781 782 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 783 bs2 = bs*bs; 784 785 MPI_Comm_size(comm,&size); 786 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 787 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 788 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 789 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 790 M = header[1]; N = header[2]; nz = header[3]; 791 792 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 793 794 /* 795 This code adds extra rows to make sure the number of rows is 796 divisible by the blocksize 797 */ 798 mbs = M/bs; 799 extra_rows = bs - M + bs*(mbs); 800 if (extra_rows == bs) extra_rows = 0; 801 else mbs++; 802 if (extra_rows) { 803 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 804 } 805 806 /* read in row lengths */ 807 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 808 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 809 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 810 811 /* read in column indices */ 812 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 813 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 814 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 815 816 /* loop over row lengths determining block row lengths */ 817 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 818 PetscMemzero(browlengths,mbs*sizeof(int)); 819 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 820 PetscMemzero(mask,mbs*sizeof(int)); 821 masked = mask + mbs; 822 rowcount = 0; nzcount = 0; 823 for ( i=0; i<mbs; i++ ) { 824 nmask = 0; 825 for ( j=0; j<bs; j++ ) { 826 kmax = rowlengths[rowcount]; 827 for ( k=0; k<kmax; k++ ) { 828 tmp = jj[nzcount++]/bs; 829 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 830 } 831 rowcount++; 832 } 833 browlengths[i] += nmask; 834 /* zero out the mask elements we set */ 835 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 836 } 837 838 /* create our matrix */ 839 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 840 CHKERRQ(ierr); 841 B = *A; 842 a = (Mat_SeqBAIJ *) B->data; 843 844 /* set matrix "i" values */ 845 a->i[0] = 0; 846 for ( i=1; i<= mbs; i++ ) { 847 a->i[i] = a->i[i-1] + browlengths[i-1]; 848 a->ilen[i-1] = browlengths[i-1]; 849 } 850 a->nz = 0; 851 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 852 853 /* read in nonzero values */ 854 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 855 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 856 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 857 858 /* set "a" and "j" values into matrix */ 859 nzcount = 0; jcount = 0; 860 for ( i=0; i<mbs; i++ ) { 861 nzcountb = nzcount; 862 nmask = 0; 863 for ( j=0; j<bs; j++ ) { 864 kmax = rowlengths[i*bs+j]; 865 for ( k=0; k<kmax; k++ ) { 866 tmp = jj[nzcount++]/bs; 867 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 868 } 869 rowcount++; 870 } 871 /* sort the masked values */ 872 PetscSortInt(nmask,masked); 873 874 /* set "j" values into matrix */ 875 maskcount = 1; 876 for ( j=0; j<nmask; j++ ) { 877 a->j[jcount++] = masked[j]; 878 mask[masked[j]] = maskcount++; 879 } 880 /* set "a" values into matrix */ 881 ishift = bs2*a->i[i]; 882 for ( j=0; j<bs; j++ ) { 883 kmax = rowlengths[i*bs+j]; 884 for ( k=0; k<kmax; k++ ) { 885 tmp = jj[nzcountb]/bs ; 886 block = mask[tmp] - 1; 887 point = jj[nzcountb] - bs*tmp; 888 idx = ishift + bs2*block + j + bs*point; 889 a->a[idx] = aa[nzcountb++]; 890 } 891 } 892 /* zero out the mask elements we set */ 893 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 894 } 895 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 896 897 PetscFree(rowlengths); 898 PetscFree(browlengths); 899 PetscFree(aa); 900 PetscFree(jj); 901 PetscFree(mask); 902 903 B->assembled = PETSC_TRUE; 904 905 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 906 if (flg) { 907 Viewer tviewer; 908 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 909 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 910 ierr = MatView(B,tviewer); CHKERRQ(ierr); 911 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 912 } 913 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 914 if (flg) { 915 Viewer tviewer; 916 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 917 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 918 ierr = MatView(B,tviewer); CHKERRQ(ierr); 919 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 920 } 921 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 922 if (flg) { 923 Viewer tviewer; 924 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 925 ierr = MatView(B,tviewer); CHKERRQ(ierr); 926 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 927 } 928 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 929 if (flg) { 930 Viewer tviewer; 931 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 932 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 933 ierr = MatView(B,tviewer); CHKERRQ(ierr); 934 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 935 } 936 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 937 if (flg) { 938 Viewer tviewer; 939 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 940 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 941 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 942 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 943 } 944 return 0; 945 } 946 947 948 949