1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.18 1996/03/25 23:39:02 balay Exp bsmith $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = a->indexshift; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45 CHKERRQ(ierr); 46 ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 47 PetscFree(ia); PetscFree(ja); 48 } else { 49 if (ishift == oshift) { 50 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51 } 52 else if (ishift == -1) { 53 /* temporarily subtract 1 from i and j indices */ 54 int nz = a->i[a->n] - 1; 55 for ( i=0; i<nz; i++ ) a->j[i]--; 56 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58 for ( i=0; i<nz; i++ ) a->j[i]++; 59 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60 } else { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[a->n] - 1; 63 for ( i=0; i<nz; i++ ) a->j[i]++; 64 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66 for ( i=0; i<nz; i++ ) a->j[i]--; 67 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68 } 69 } 70 return 0; 71 } 72 73 /* 74 Adds diagonal pointers to sparse matrix structure. 75 */ 76 77 int MatMarkDiag_SeqBAIJ(Mat A) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,j, *diag, m = a->mbs; 81 82 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83 PLogObjectMemory(A,(m+1)*sizeof(int)); 84 for ( i=0; i<m; i++ ) { 85 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86 if (a->j[j] == i) { 87 diag[i] = j; 88 break; 89 } 90 } 91 } 92 a->diag = diag; 93 return 0; 94 } 95 96 #include "draw.h" 97 #include "pinclude/pviewer.h" 98 #include "sys.h" 99 100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 101 { 102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 103 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l; 104 Scalar *aa; 105 106 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 107 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 108 col_lens[0] = MAT_COOKIE; 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs*bs; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs*bs*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs*bs*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs*bs*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 return 0; 153 } 154 155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 156 { 157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 158 int ierr, i,j,format,bs = a->bs,k,l; 159 FILE *fd; 160 char *outputname; 161 162 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 163 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 164 ierr = ViewerGetFormat(viewer,&format); 165 if (format == ASCII_FORMAT_INFO) { 166 /* no need to print additional information */ ; 167 } 168 else if (format == ASCII_FORMAT_MATLAB) { 169 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 170 } 171 else { 172 for ( i=0; i<a->mbs; i++ ) { 173 for ( j=0; j<bs; j++ ) { 174 fprintf(fd,"row %d:",i*bs+j); 175 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 176 for ( l=0; l<bs; l++ ) { 177 #if defined(PETSC_COMPLEX) 178 if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) { 179 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 180 real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j])); 181 } 182 else { 183 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j])); 184 } 185 #else 186 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]); 187 #endif 188 } 189 } 190 fprintf(fd,"\n"); 191 } 192 } 193 } 194 fflush(fd); 195 return 0; 196 } 197 198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 199 { 200 Mat A = (Mat) obj; 201 ViewerType vtype; 202 int ierr; 203 204 if (!viewer) { 205 viewer = STDOUT_VIEWER_SELF; 206 } 207 208 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 209 if (vtype == MATLAB_VIEWER) { 210 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 211 } 212 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 213 return MatView_SeqBAIJ_ASCII(A,viewer); 214 } 215 else if (vtype == BINARY_FILE_VIEWER) { 216 return MatView_SeqBAIJ_Binary(A,viewer); 217 } 218 else if (vtype == DRAW_VIEWER) { 219 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 220 } 221 return 0; 222 } 223 224 225 static int MatZeroEntries_SeqBAIJ(Mat A) 226 { 227 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 228 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 229 return 0; 230 } 231 232 int MatDestroy_SeqBAIJ(PetscObject obj) 233 { 234 Mat A = (Mat) obj; 235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 236 237 #if defined(PETSC_LOG) 238 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 239 #endif 240 PetscFree(a->a); 241 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 242 if (a->diag) PetscFree(a->diag); 243 if (a->ilen) PetscFree(a->ilen); 244 if (a->imax) PetscFree(a->imax); 245 if (a->solve_work) PetscFree(a->solve_work); 246 if (a->mult_work) PetscFree(a->mult_work); 247 PetscFree(a); 248 return 0; 249 } 250 251 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 252 { 253 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 254 if (op == ROW_ORIENTED) a->roworiented = 1; 255 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 256 else if (op == COLUMNS_SORTED) a->sorted = 1; 257 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 258 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 259 else if (op == ROWS_SORTED || 260 op == SYMMETRIC_MATRIX || 261 op == STRUCTURALLY_SYMMETRIC_MATRIX || 262 op == YES_NEW_DIAGONALS) 263 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 264 else if (op == NO_NEW_DIAGONALS) 265 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 266 else 267 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 268 return 0; 269 } 270 271 272 /* -------------------------------------------------------*/ 273 /* Should check that shapes of vectors and matrices match */ 274 /* -------------------------------------------------------*/ 275 #include "pinclude/plapack.h" 276 277 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 278 { 279 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 280 Scalar *xg,*yg; 281 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 282 register Scalar x1,x2,x3,x4,x5; 283 int mbs = a->mbs, m = a->m, i, *idx,*ii; 284 int bs = a->bs,j,n,bs2 = bs*bs; 285 286 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 287 PetscMemzero(y,m*sizeof(Scalar)); 288 x = x; 289 idx = a->j; 290 v = a->a; 291 ii = a->i; 292 293 switch (bs) { 294 case 1: 295 for ( i=0; i<m; i++ ) { 296 n = ii[1] - ii[0]; ii++; 297 sum = 0.0; 298 while (n--) sum += *v++ * x[*idx++]; 299 y[i] = sum; 300 } 301 break; 302 case 2: 303 for ( i=0; i<mbs; i++ ) { 304 n = ii[1] - ii[0]; ii++; 305 sum1 = 0.0; sum2 = 0.0; 306 for ( j=0; j<n; j++ ) { 307 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 308 sum1 += v[0]*x1 + v[2]*x2; 309 sum2 += v[1]*x1 + v[3]*x2; 310 v += 4; 311 } 312 y[0] += sum1; y[1] += sum2; 313 y += 2; 314 } 315 break; 316 case 3: 317 for ( i=0; i<mbs; i++ ) { 318 n = ii[1] - ii[0]; ii++; 319 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 320 for ( j=0; j<n; j++ ) { 321 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 322 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 323 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 324 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 325 v += 9; 326 } 327 y[0] += sum1; y[1] += sum2; y[2] += sum3; 328 y += 3; 329 } 330 break; 331 case 4: 332 for ( i=0; i<mbs; i++ ) { 333 n = ii[1] - ii[0]; ii++; 334 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 335 for ( j=0; j<n; j++ ) { 336 xb = x + 4*(*idx++); 337 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 338 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 339 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 340 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 341 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 342 v += 16; 343 } 344 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 345 y += 4; 346 } 347 break; 348 case 5: 349 for ( i=0; i<mbs; i++ ) { 350 n = ii[1] - ii[0]; ii++; 351 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 352 for ( j=0; j<n; j++ ) { 353 xb = x + 5*(*idx++); 354 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 355 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 356 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 357 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 358 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 359 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 360 v += 25; 361 } 362 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 363 y += 5; 364 } 365 break; 366 /* block sizes larger then 5 by 5 are handled by BLAS */ 367 default: { 368 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 369 if (!a->mult_work) { 370 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 371 CHKPTRQ(a->mult_work); 372 } 373 work = a->mult_work; 374 for ( i=0; i<mbs; i++ ) { 375 n = ii[1] - ii[0]; ii++; 376 ncols = n*bs; 377 workt = work; 378 for ( j=0; j<n; j++ ) { 379 xb = x + bs*(*idx++); 380 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 381 workt += bs; 382 } 383 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 384 v += n*bs2; 385 y += bs; 386 } 387 } 388 } 389 PLogFlops(2*bs2*a->nz - m); 390 return 0; 391 } 392 393 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 394 { 395 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 396 if (nz) *nz = a->bs*a->bs*a->nz; 397 if (nza) *nza = a->maxnz; 398 if (mem) *mem = (int)A->mem; 399 return 0; 400 } 401 402 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 403 { 404 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 405 406 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 407 408 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 409 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 410 (a->nz != b->nz)||(a->indexshift != b->indexshift)) { 411 *flg = PETSC_FALSE; return 0; 412 } 413 414 /* if the a->i are the same */ 415 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 416 *flg = PETSC_FALSE; return 0; 417 } 418 419 /* if a->j are the same */ 420 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 421 *flg = PETSC_FALSE; return 0; 422 } 423 424 /* if a->a are the same */ 425 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 426 *flg = PETSC_FALSE; return 0; 427 } 428 *flg = PETSC_TRUE; 429 return 0; 430 431 } 432 433 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 434 { 435 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 436 int i,j,k,n,row,bs,*ai,*aj,ambs; 437 Scalar *x, zero = 0.0,*aa,*aa_j; 438 439 bs = a->bs; 440 aa = a->a; 441 ai = a->i; 442 aj = a->j; 443 ambs = a->mbs; 444 445 VecSet(&zero,v); 446 VecGetArray(v,&x); VecGetLocalSize(v,&n); 447 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqAIJ:Nonconforming matrix and vector"); 448 for ( i=0; i<ambs; i++ ) { 449 for ( j=ai[i]; j<ai[i+1]; j++ ) { 450 if (aj[j] == i) { 451 row = i*bs; 452 aa_j = aa+j*bs*bs; 453 for (k=0; k<bs*bs; k+=(bs+1),row++) x[row] = aa_j[k]; 454 break; 455 } 456 } 457 } 458 return 0; 459 } 460 461 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 462 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 463 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 464 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 465 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 466 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 467 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 468 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 469 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 470 471 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 472 { 473 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 474 *m = a->m; *n = a->n; 475 return 0; 476 } 477 478 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 479 { 480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 481 *m = 0; *n = a->m; 482 return 0; 483 } 484 485 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 486 { 487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 488 Scalar *v = a->a; 489 double sum = 0.0; 490 int i; 491 492 if (type == NORM_FROBENIUS) { 493 for (i=0; i<a->nz; i++ ) { 494 #if defined(PETSC_COMPLEX) 495 sum += real(conj(*v)*(*v)); v++; 496 #else 497 sum += (*v)*(*v); v++; 498 #endif 499 } 500 *norm = sqrt(sum); 501 } 502 else { 503 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 504 } 505 return 0; 506 } 507 508 /* 509 note: This can only work for identity for row and col. It would 510 be good to check this and otherwise generate an error. 511 */ 512 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 513 { 514 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 515 Mat outA; 516 int ierr; 517 518 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 519 520 outA = inA; 521 inA->factor = FACTOR_LU; 522 a->row = row; 523 a->col = col; 524 525 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 526 527 if (!a->diag) { 528 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 529 } 530 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 531 return 0; 532 } 533 534 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 535 { 536 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 537 int one = 1, totalnz = a->bs*a->bs*a->nz; 538 BLscal_( &totalnz, alpha, a->a, &one ); 539 PLogFlops(totalnz); 540 return 0; 541 } 542 543 int MatPrintHelp_SeqBAIJ(Mat A) 544 { 545 static int called = 0; 546 547 if (called) return 0; else called = 1; 548 return 0; 549 } 550 /* -------------------------------------------------------------------*/ 551 static struct _MatOps MatOps = {0, 552 0,0, 553 MatMult_SeqBAIJ,0, 554 0,0, 555 MatSolve_SeqBAIJ,0, 556 0,0, 557 MatLUFactor_SeqBAIJ,0, 558 0, 559 0, 560 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 561 MatGetDiagonal_SeqBAIJ,0,MatNorm_SeqBAIJ, 562 0,0, 563 0, 564 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 565 MatGetReordering_SeqBAIJ, 566 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 567 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 568 MatILUFactorSymbolic_SeqBAIJ,0, 569 0,0,/* MatConvert_SeqBAIJ */ 0, 570 0,0, 571 MatConvertSameType_SeqBAIJ,0,0, 572 MatILUFactor_SeqBAIJ,0,0, 573 0,0, 574 0,0, 575 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 576 0}; 577 578 /*@C 579 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 580 (the default parallel PETSc format). For good matrix assembly performance 581 the user should preallocate the matrix storage by setting the parameter nz 582 (or nzz). By setting these parameters accurately, performance can be 583 increased by more than a factor of 50. 584 585 Input Parameters: 586 . comm - MPI communicator, set to MPI_COMM_SELF 587 . bs - size of block 588 . m - number of rows 589 . n - number of columns 590 . nz - number of block nonzeros per block row (same for all rows) 591 . nzz - number of block nonzeros per block row or PETSC_NULL 592 (possibly different for each row) 593 594 Output Parameter: 595 . A - the matrix 596 597 Notes: 598 The BAIJ format, is fully compatible with standard Fortran 77 599 storage. That is, the stored row and column indices can begin at 600 either one (as in Fortran) or zero. See the users manual for details. 601 602 Specify the preallocated storage with either nz or nnz (not both). 603 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 604 allocation. For additional details, see the users manual chapter on 605 matrices and the file $(PETSC_DIR)/Performance. 606 607 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 608 @*/ 609 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 610 { 611 Mat B; 612 Mat_SeqBAIJ *b; 613 int i,len,ierr, flg,mbs = m/bs; 614 615 if (mbs*bs != m) 616 SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize"); 617 618 *A = 0; 619 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 620 PLogObjectCreate(B); 621 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 622 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 623 switch (bs) { 624 case 1: 625 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 626 break; 627 case 2: 628 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 629 break; 630 case 3: 631 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 632 break; 633 case 4: 634 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 635 break; 636 case 5: 637 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 638 break; 639 } 640 B->destroy = MatDestroy_SeqBAIJ; 641 B->view = MatView_SeqBAIJ; 642 B->factor = 0; 643 B->lupivotthreshold = 1.0; 644 b->row = 0; 645 b->col = 0; 646 b->reallocs = 0; 647 648 b->m = m; 649 b->mbs = mbs; 650 b->n = n; 651 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 652 if (nnz == PETSC_NULL) { 653 if (nz == PETSC_DEFAULT) nz = 5; 654 else if (nz <= 0) nz = 1; 655 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 656 nz = nz*mbs; 657 } 658 else { 659 nz = 0; 660 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 661 } 662 663 /* allocate the matrix space */ 664 len = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int); 665 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 666 PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar)); 667 b->j = (int *) (b->a + nz*bs*bs); 668 PetscMemzero(b->j,nz*sizeof(int)); 669 b->i = b->j + nz; 670 b->singlemalloc = 1; 671 672 b->i[0] = 0; 673 for (i=1; i<mbs+1; i++) { 674 b->i[i] = b->i[i-1] + b->imax[i-1]; 675 } 676 677 /* b->ilen will count nonzeros in each block row so far. */ 678 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 679 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 680 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 681 682 b->bs = bs; 683 b->mbs = mbs; 684 b->nz = 0; 685 b->maxnz = nz; 686 b->sorted = 0; 687 b->roworiented = 1; 688 b->nonew = 0; 689 b->diag = 0; 690 b->solve_work = 0; 691 b->mult_work = 0; 692 b->spptr = 0; 693 694 *A = B; 695 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 696 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 697 return 0; 698 } 699 700 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 701 { 702 Mat C; 703 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 704 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz; 705 706 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 707 708 *B = 0; 709 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 710 PLogObjectCreate(C); 711 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 712 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 713 C->destroy = MatDestroy_SeqBAIJ; 714 C->view = MatView_SeqBAIJ; 715 C->factor = A->factor; 716 c->row = 0; 717 c->col = 0; 718 C->assembled = PETSC_TRUE; 719 720 c->m = a->m; 721 c->n = a->n; 722 c->bs = a->bs; 723 c->mbs = a->mbs; 724 c->nbs = a->nbs; 725 726 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 727 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 728 for ( i=0; i<mbs; i++ ) { 729 c->imax[i] = a->imax[i]; 730 c->ilen[i] = a->ilen[i]; 731 } 732 733 /* allocate the matrix space */ 734 c->singlemalloc = 1; 735 len = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int)); 736 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 737 c->j = (int *) (c->a + nz*bs*bs); 738 c->i = c->j + nz; 739 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 740 if (mbs > 0) { 741 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 742 if (cpvalues == COPY_VALUES) { 743 PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar)); 744 } 745 } 746 747 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 748 c->sorted = a->sorted; 749 c->roworiented = a->roworiented; 750 c->nonew = a->nonew; 751 752 if (a->diag) { 753 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 754 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 755 for ( i=0; i<mbs; i++ ) { 756 c->diag[i] = a->diag[i]; 757 } 758 } 759 else c->diag = 0; 760 c->nz = a->nz; 761 c->maxnz = a->maxnz; 762 c->solve_work = 0; 763 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 764 c->mult_work = 0; 765 *B = C; 766 return 0; 767 } 768 769 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 770 { 771 Mat_SeqBAIJ *a; 772 Mat B; 773 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 774 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 775 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 776 int *masked, nmask,tmp,ishift,bs2; 777 Scalar *aa; 778 MPI_Comm comm = ((PetscObject) viewer)->comm; 779 780 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 781 bs2 = bs*bs; 782 783 MPI_Comm_size(comm,&size); 784 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 785 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 786 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 787 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 788 M = header[1]; N = header[2]; nz = header[3]; 789 790 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 791 792 /* 793 This code adds extra rows to make sure the number of rows is 794 divisible by the blocksize 795 */ 796 mbs = M/bs; 797 extra_rows = bs - M + bs*(mbs); 798 if (extra_rows == bs) extra_rows = 0; 799 else mbs++; 800 if (extra_rows) { 801 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 802 } 803 804 /* read in row lengths */ 805 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 806 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 807 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 808 809 /* read in column indices */ 810 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 811 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 812 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 813 814 /* loop over row lengths determining block row lengths */ 815 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 816 PetscMemzero(browlengths,mbs*sizeof(int)); 817 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 818 PetscMemzero(mask,mbs*sizeof(int)); 819 masked = mask + mbs; 820 rowcount = 0; nzcount = 0; 821 for ( i=0; i<mbs; i++ ) { 822 nmask = 0; 823 for ( j=0; j<bs; j++ ) { 824 kmax = rowlengths[rowcount]; 825 for ( k=0; k<kmax; k++ ) { 826 tmp = jj[nzcount++]/bs; 827 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 828 } 829 rowcount++; 830 } 831 browlengths[i] += nmask; 832 /* zero out the mask elements we set */ 833 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 834 } 835 836 /* create our matrix */ 837 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 838 CHKERRQ(ierr); 839 B = *A; 840 a = (Mat_SeqBAIJ *) B->data; 841 842 /* set matrix "i" values */ 843 a->i[0] = 0; 844 for ( i=1; i<= mbs; i++ ) { 845 a->i[i] = a->i[i-1] + browlengths[i-1]; 846 a->ilen[i-1] = browlengths[i-1]; 847 } 848 a->nz = 0; 849 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 850 851 /* read in nonzero values */ 852 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 853 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 854 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 855 856 /* set "a" and "j" values into matrix */ 857 nzcount = 0; jcount = 0; 858 for ( i=0; i<mbs; i++ ) { 859 nzcountb = nzcount; 860 nmask = 0; 861 for ( j=0; j<bs; j++ ) { 862 kmax = rowlengths[i*bs+j]; 863 for ( k=0; k<kmax; k++ ) { 864 tmp = jj[nzcount++]/bs; 865 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 866 } 867 rowcount++; 868 } 869 /* sort the masked values */ 870 PetscSortInt(nmask,masked); 871 872 /* set "j" values into matrix */ 873 maskcount = 1; 874 for ( j=0; j<nmask; j++ ) { 875 a->j[jcount++] = masked[j]; 876 mask[masked[j]] = maskcount++; 877 } 878 /* set "a" values into matrix */ 879 ishift = bs2*a->i[i]; 880 for ( j=0; j<bs; j++ ) { 881 kmax = rowlengths[i*bs+j]; 882 for ( k=0; k<kmax; k++ ) { 883 tmp = jj[nzcountb]/bs ; 884 block = mask[tmp] - 1; 885 point = jj[nzcountb] - bs*tmp; 886 idx = ishift + bs2*block + j + bs*point; 887 a->a[idx] = aa[nzcountb++]; 888 } 889 } 890 /* zero out the mask elements we set */ 891 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 892 } 893 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 894 895 PetscFree(rowlengths); 896 PetscFree(browlengths); 897 PetscFree(aa); 898 PetscFree(jj); 899 PetscFree(mask); 900 901 B->assembled = PETSC_TRUE; 902 903 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 904 if (flg) { 905 Viewer tviewer; 906 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 907 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 908 ierr = MatView(B,tviewer); CHKERRQ(ierr); 909 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 910 } 911 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 912 if (flg) { 913 Viewer tviewer; 914 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 915 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 916 ierr = MatView(B,tviewer); CHKERRQ(ierr); 917 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 918 } 919 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 920 if (flg) { 921 Viewer tviewer; 922 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 923 ierr = MatView(B,tviewer); CHKERRQ(ierr); 924 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 925 } 926 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 927 if (flg) { 928 Viewer tviewer; 929 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 930 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 931 ierr = MatView(B,tviewer); CHKERRQ(ierr); 932 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 933 } 934 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 935 if (flg) { 936 Viewer tviewer; 937 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 938 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 939 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 940 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 941 } 942 return 0; 943 } 944 945 946 947