1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.19 1996/03/26 18:56:10 bsmith Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = a->indexshift; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45 CHKERRQ(ierr); 46 ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 47 PetscFree(ia); PetscFree(ja); 48 } else { 49 if (ishift == oshift) { 50 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51 } 52 else if (ishift == -1) { 53 /* temporarily subtract 1 from i and j indices */ 54 int nz = a->i[a->n] - 1; 55 for ( i=0; i<nz; i++ ) a->j[i]--; 56 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58 for ( i=0; i<nz; i++ ) a->j[i]++; 59 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60 } else { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[a->n] - 1; 63 for ( i=0; i<nz; i++ ) a->j[i]++; 64 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66 for ( i=0; i<nz; i++ ) a->j[i]--; 67 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68 } 69 } 70 return 0; 71 } 72 73 /* 74 Adds diagonal pointers to sparse matrix structure. 75 */ 76 77 int MatMarkDiag_SeqBAIJ(Mat A) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,j, *diag, m = a->mbs; 81 82 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83 PLogObjectMemory(A,(m+1)*sizeof(int)); 84 for ( i=0; i<m; i++ ) { 85 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86 if (a->j[j] == i) { 87 diag[i] = j; 88 break; 89 } 90 } 91 } 92 a->diag = diag; 93 return 0; 94 } 95 96 #include "draw.h" 97 #include "pinclude/pviewer.h" 98 #include "sys.h" 99 100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 101 { 102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 103 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l; 104 Scalar *aa; 105 106 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 107 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 108 col_lens[0] = MAT_COOKIE; 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs*bs; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs*bs*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs*bs*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs*bs*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 return 0; 153 } 154 155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 156 { 157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 158 int ierr, i,j,format,bs = a->bs,k,l; 159 FILE *fd; 160 char *outputname; 161 162 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 163 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 164 ierr = ViewerGetFormat(viewer,&format); 165 if (format == ASCII_FORMAT_INFO) { 166 /* no need to print additional information */ ; 167 } 168 else if (format == ASCII_FORMAT_MATLAB) { 169 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 170 } 171 else { 172 for ( i=0; i<a->mbs; i++ ) { 173 for ( j=0; j<bs; j++ ) { 174 fprintf(fd,"row %d:",i*bs+j); 175 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 176 for ( l=0; l<bs; l++ ) { 177 #if defined(PETSC_COMPLEX) 178 if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) { 179 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 180 real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j])); 181 } 182 else { 183 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j])); 184 } 185 #else 186 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]); 187 #endif 188 } 189 } 190 fprintf(fd,"\n"); 191 } 192 } 193 } 194 fflush(fd); 195 return 0; 196 } 197 198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 199 { 200 Mat A = (Mat) obj; 201 ViewerType vtype; 202 int ierr; 203 204 if (!viewer) { 205 viewer = STDOUT_VIEWER_SELF; 206 } 207 208 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 209 if (vtype == MATLAB_VIEWER) { 210 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 211 } 212 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 213 return MatView_SeqBAIJ_ASCII(A,viewer); 214 } 215 else if (vtype == BINARY_FILE_VIEWER) { 216 return MatView_SeqBAIJ_Binary(A,viewer); 217 } 218 else if (vtype == DRAW_VIEWER) { 219 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 220 } 221 return 0; 222 } 223 224 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 225 { 226 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 227 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i; 228 Scalar *aa,*v_i,*aa_i; 229 230 bs = a->bs; 231 ai = a->i; 232 aj = a->j; 233 aa = a->a; 234 235 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 236 237 bn = row/bs; /* Block number */ 238 bp = row % bs; /* Block Position */ 239 M = ai[bn+1] - ai[bn]; 240 *nz = bs*M; 241 242 if (v) { 243 *v = 0; 244 if (*nz) { 245 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 246 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 247 v_i = *v + i*bs; 248 aa_i = aa + bs*bs*(ai[bn] + i); 249 for ( j=bp,k=0; j<bs*bs; j+=bs,k++ ) {v_i[k] = aa_i[j];} 250 } 251 } 252 } 253 254 if (idx) { 255 *idx = 0; 256 if (*nz) { 257 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 258 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 259 idx_i = *idx + i*bs; 260 itmp = bs*aj[ai[bn] + i]; 261 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 262 } 263 } 264 } 265 return 0; 266 } 267 268 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 269 { 270 if (idx) {if (*idx) PetscFree(*idx);} 271 if (v) {if (*v) PetscFree(*v);} 272 return 0; 273 } 274 275 static int MatZeroEntries_SeqBAIJ(Mat A) 276 { 277 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 278 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 279 return 0; 280 } 281 282 int MatDestroy_SeqBAIJ(PetscObject obj) 283 { 284 Mat A = (Mat) obj; 285 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 286 287 #if defined(PETSC_LOG) 288 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 289 #endif 290 PetscFree(a->a); 291 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 292 if (a->diag) PetscFree(a->diag); 293 if (a->ilen) PetscFree(a->ilen); 294 if (a->imax) PetscFree(a->imax); 295 if (a->solve_work) PetscFree(a->solve_work); 296 if (a->mult_work) PetscFree(a->mult_work); 297 PetscFree(a); 298 return 0; 299 } 300 301 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 302 { 303 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 304 if (op == ROW_ORIENTED) a->roworiented = 1; 305 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 306 else if (op == COLUMNS_SORTED) a->sorted = 1; 307 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 308 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 309 else if (op == ROWS_SORTED || 310 op == SYMMETRIC_MATRIX || 311 op == STRUCTURALLY_SYMMETRIC_MATRIX || 312 op == YES_NEW_DIAGONALS) 313 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 314 else if (op == NO_NEW_DIAGONALS) 315 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 316 else 317 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 318 return 0; 319 } 320 321 322 /* -------------------------------------------------------*/ 323 /* Should check that shapes of vectors and matrices match */ 324 /* -------------------------------------------------------*/ 325 #include "pinclude/plapack.h" 326 327 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 328 { 329 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 330 Scalar *xg,*yg; 331 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 332 register Scalar x1,x2,x3,x4,x5; 333 int mbs = a->mbs, m = a->m, i, *idx,*ii; 334 int bs = a->bs,j,n,bs2 = bs*bs; 335 336 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 337 PetscMemzero(y,m*sizeof(Scalar)); 338 x = x; 339 idx = a->j; 340 v = a->a; 341 ii = a->i; 342 343 switch (bs) { 344 case 1: 345 for ( i=0; i<m; i++ ) { 346 n = ii[1] - ii[0]; ii++; 347 sum = 0.0; 348 while (n--) sum += *v++ * x[*idx++]; 349 y[i] = sum; 350 } 351 break; 352 case 2: 353 for ( i=0; i<mbs; i++ ) { 354 n = ii[1] - ii[0]; ii++; 355 sum1 = 0.0; sum2 = 0.0; 356 for ( j=0; j<n; j++ ) { 357 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 358 sum1 += v[0]*x1 + v[2]*x2; 359 sum2 += v[1]*x1 + v[3]*x2; 360 v += 4; 361 } 362 y[0] += sum1; y[1] += sum2; 363 y += 2; 364 } 365 break; 366 case 3: 367 for ( i=0; i<mbs; i++ ) { 368 n = ii[1] - ii[0]; ii++; 369 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 370 for ( j=0; j<n; j++ ) { 371 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 372 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 373 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 374 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 375 v += 9; 376 } 377 y[0] += sum1; y[1] += sum2; y[2] += sum3; 378 y += 3; 379 } 380 break; 381 case 4: 382 for ( i=0; i<mbs; i++ ) { 383 n = ii[1] - ii[0]; ii++; 384 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 385 for ( j=0; j<n; j++ ) { 386 xb = x + 4*(*idx++); 387 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 388 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 389 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 390 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 391 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 392 v += 16; 393 } 394 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 395 y += 4; 396 } 397 break; 398 case 5: 399 for ( i=0; i<mbs; i++ ) { 400 n = ii[1] - ii[0]; ii++; 401 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 402 for ( j=0; j<n; j++ ) { 403 xb = x + 5*(*idx++); 404 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 405 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 406 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 407 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 408 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 409 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 410 v += 25; 411 } 412 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 413 y += 5; 414 } 415 break; 416 /* block sizes larger then 5 by 5 are handled by BLAS */ 417 default: { 418 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 419 if (!a->mult_work) { 420 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 421 CHKPTRQ(a->mult_work); 422 } 423 work = a->mult_work; 424 for ( i=0; i<mbs; i++ ) { 425 n = ii[1] - ii[0]; ii++; 426 ncols = n*bs; 427 workt = work; 428 for ( j=0; j<n; j++ ) { 429 xb = x + bs*(*idx++); 430 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 431 workt += bs; 432 } 433 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 434 v += n*bs2; 435 y += bs; 436 } 437 } 438 } 439 PLogFlops(2*bs2*a->nz - m); 440 return 0; 441 } 442 443 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 444 { 445 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 446 if (nz) *nz = a->bs*a->bs*a->nz; 447 if (nza) *nza = a->maxnz; 448 if (mem) *mem = (int)A->mem; 449 return 0; 450 } 451 452 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 453 { 454 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 455 456 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 457 458 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 459 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 460 (a->nz != b->nz)||(a->indexshift != b->indexshift)) { 461 *flg = PETSC_FALSE; return 0; 462 } 463 464 /* if the a->i are the same */ 465 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 466 *flg = PETSC_FALSE; return 0; 467 } 468 469 /* if a->j are the same */ 470 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 471 *flg = PETSC_FALSE; return 0; 472 } 473 474 /* if a->a are the same */ 475 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 476 *flg = PETSC_FALSE; return 0; 477 } 478 *flg = PETSC_TRUE; 479 return 0; 480 481 } 482 483 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 484 { 485 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 486 int i,j,k,n,row,bs,*ai,*aj,ambs; 487 Scalar *x, zero = 0.0,*aa,*aa_j; 488 489 bs = a->bs; 490 aa = a->a; 491 ai = a->i; 492 aj = a->j; 493 ambs = a->mbs; 494 495 VecSet(&zero,v); 496 VecGetArray(v,&x); VecGetLocalSize(v,&n); 497 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 498 for ( i=0; i<ambs; i++ ) { 499 for ( j=ai[i]; j<ai[i+1]; j++ ) { 500 if (aj[j] == i) { 501 row = i*bs; 502 aa_j = aa+j*bs*bs; 503 for (k=0; k<bs*bs; k+=(bs+1),row++) x[row] = aa_j[k]; 504 break; 505 } 506 } 507 } 508 return 0; 509 } 510 511 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 512 { 513 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 514 Scalar *l,*r,x,*v,*aa,*li,*ri; 515 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs; 516 517 ai = a->i; 518 aj = a->j; 519 aa = a->a; 520 m = a->m; 521 n = a->n; 522 bs = a->bs; 523 mbs = a->mbs; 524 525 if (ll) { 526 VecGetArray(ll,&l); VecGetSize(ll,&lm); 527 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 528 for ( i=0; i<mbs; i++ ) { /* for each block row */ 529 M = ai[i+1] - ai[i]; 530 li = l + i*bs; 531 v = aa + bs*bs*ai[i]; 532 for ( j=0; j<M; j++ ) { /* for each block */ 533 for ( k=0; k<bs*bs; k++ ) { 534 (*v++) *= li[k%bs]; 535 } 536 } 537 } 538 } 539 540 if (rr) { 541 VecGetArray(rr,&r); VecGetSize(rr,&rn); 542 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 543 for ( i=0; i<mbs; i++ ) { /* for each block row */ 544 M = ai[i+1] - ai[i]; 545 v = aa + bs*bs*ai[i]; 546 for ( j=0; j<M; j++ ) { /* for each block */ 547 ri = r + bs*aj[ai[i]+j]; 548 for ( k=0; k<bs; k++ ) { 549 x = ri[k]; 550 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 551 } 552 } 553 } 554 } 555 return 0; 556 } 557 558 559 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 560 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 561 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 562 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 563 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 564 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 565 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 566 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 567 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 568 569 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 570 { 571 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 572 *m = a->m; *n = a->n; 573 return 0; 574 } 575 576 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 577 { 578 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 579 *m = 0; *n = a->m; 580 return 0; 581 } 582 583 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 584 { 585 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 586 Scalar *v = a->a; 587 double sum = 0.0; 588 int i; 589 590 if (type == NORM_FROBENIUS) { 591 for (i=0; i<a->nz; i++ ) { 592 #if defined(PETSC_COMPLEX) 593 sum += real(conj(*v)*(*v)); v++; 594 #else 595 sum += (*v)*(*v); v++; 596 #endif 597 } 598 *norm = sqrt(sum); 599 } 600 else { 601 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 602 } 603 return 0; 604 } 605 606 /* 607 note: This can only work for identity for row and col. It would 608 be good to check this and otherwise generate an error. 609 */ 610 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 611 { 612 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 613 Mat outA; 614 int ierr; 615 616 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 617 618 outA = inA; 619 inA->factor = FACTOR_LU; 620 a->row = row; 621 a->col = col; 622 623 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 624 625 if (!a->diag) { 626 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 627 } 628 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 629 return 0; 630 } 631 632 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 633 { 634 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 635 int one = 1, totalnz = a->bs*a->bs*a->nz; 636 BLscal_( &totalnz, alpha, a->a, &one ); 637 PLogFlops(totalnz); 638 return 0; 639 } 640 641 int MatPrintHelp_SeqBAIJ(Mat A) 642 { 643 static int called = 0; 644 645 if (called) return 0; else called = 1; 646 return 0; 647 } 648 /* -------------------------------------------------------------------*/ 649 static struct _MatOps MatOps = {0, 650 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 651 MatMult_SeqBAIJ,0, 652 0,0, 653 MatSolve_SeqBAIJ,0, 654 0,0, 655 MatLUFactor_SeqBAIJ,0, 656 0, 657 0, 658 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 659 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 660 0,0, 661 0, 662 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 663 MatGetReordering_SeqBAIJ, 664 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 665 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 666 MatILUFactorSymbolic_SeqBAIJ,0, 667 0,0,/* MatConvert_SeqBAIJ */ 0, 668 0,0, 669 MatConvertSameType_SeqBAIJ,0,0, 670 MatILUFactor_SeqBAIJ,0,0, 671 0,0, 672 0,0, 673 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 674 0}; 675 676 /*@C 677 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 678 (the default parallel PETSc format). For good matrix assembly performance 679 the user should preallocate the matrix storage by setting the parameter nz 680 (or nzz). By setting these parameters accurately, performance can be 681 increased by more than a factor of 50. 682 683 Input Parameters: 684 . comm - MPI communicator, set to MPI_COMM_SELF 685 . bs - size of block 686 . m - number of rows 687 . n - number of columns 688 . nz - number of block nonzeros per block row (same for all rows) 689 . nzz - number of block nonzeros per block row or PETSC_NULL 690 (possibly different for each row) 691 692 Output Parameter: 693 . A - the matrix 694 695 Notes: 696 The BAIJ format, is fully compatible with standard Fortran 77 697 storage. That is, the stored row and column indices can begin at 698 either one (as in Fortran) or zero. See the users manual for details. 699 700 Specify the preallocated storage with either nz or nnz (not both). 701 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 702 allocation. For additional details, see the users manual chapter on 703 matrices and the file $(PETSC_DIR)/Performance. 704 705 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 706 @*/ 707 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 708 { 709 Mat B; 710 Mat_SeqBAIJ *b; 711 int i,len,ierr, flg,mbs = m/bs; 712 713 if (mbs*bs != m) 714 SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize"); 715 716 *A = 0; 717 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 718 PLogObjectCreate(B); 719 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 720 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 721 switch (bs) { 722 case 1: 723 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 724 break; 725 case 2: 726 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 727 break; 728 case 3: 729 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 730 break; 731 case 4: 732 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 733 break; 734 case 5: 735 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 736 break; 737 } 738 B->destroy = MatDestroy_SeqBAIJ; 739 B->view = MatView_SeqBAIJ; 740 B->factor = 0; 741 B->lupivotthreshold = 1.0; 742 b->row = 0; 743 b->col = 0; 744 b->reallocs = 0; 745 746 b->m = m; 747 b->mbs = mbs; 748 b->n = n; 749 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 750 if (nnz == PETSC_NULL) { 751 if (nz == PETSC_DEFAULT) nz = 5; 752 else if (nz <= 0) nz = 1; 753 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 754 nz = nz*mbs; 755 } 756 else { 757 nz = 0; 758 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 759 } 760 761 /* allocate the matrix space */ 762 len = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int); 763 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 764 PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar)); 765 b->j = (int *) (b->a + nz*bs*bs); 766 PetscMemzero(b->j,nz*sizeof(int)); 767 b->i = b->j + nz; 768 b->singlemalloc = 1; 769 770 b->i[0] = 0; 771 for (i=1; i<mbs+1; i++) { 772 b->i[i] = b->i[i-1] + b->imax[i-1]; 773 } 774 775 /* b->ilen will count nonzeros in each block row so far. */ 776 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 777 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 778 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 779 780 b->bs = bs; 781 b->mbs = mbs; 782 b->nz = 0; 783 b->maxnz = nz; 784 b->sorted = 0; 785 b->roworiented = 1; 786 b->nonew = 0; 787 b->diag = 0; 788 b->solve_work = 0; 789 b->mult_work = 0; 790 b->spptr = 0; 791 792 *A = B; 793 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 794 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 795 return 0; 796 } 797 798 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 799 { 800 Mat C; 801 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 802 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz; 803 804 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 805 806 *B = 0; 807 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 808 PLogObjectCreate(C); 809 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 810 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 811 C->destroy = MatDestroy_SeqBAIJ; 812 C->view = MatView_SeqBAIJ; 813 C->factor = A->factor; 814 c->row = 0; 815 c->col = 0; 816 C->assembled = PETSC_TRUE; 817 818 c->m = a->m; 819 c->n = a->n; 820 c->bs = a->bs; 821 c->mbs = a->mbs; 822 c->nbs = a->nbs; 823 824 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 825 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 826 for ( i=0; i<mbs; i++ ) { 827 c->imax[i] = a->imax[i]; 828 c->ilen[i] = a->ilen[i]; 829 } 830 831 /* allocate the matrix space */ 832 c->singlemalloc = 1; 833 len = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int)); 834 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 835 c->j = (int *) (c->a + nz*bs*bs); 836 c->i = c->j + nz; 837 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 838 if (mbs > 0) { 839 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 840 if (cpvalues == COPY_VALUES) { 841 PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar)); 842 } 843 } 844 845 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 846 c->sorted = a->sorted; 847 c->roworiented = a->roworiented; 848 c->nonew = a->nonew; 849 850 if (a->diag) { 851 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 852 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 853 for ( i=0; i<mbs; i++ ) { 854 c->diag[i] = a->diag[i]; 855 } 856 } 857 else c->diag = 0; 858 c->nz = a->nz; 859 c->maxnz = a->maxnz; 860 c->solve_work = 0; 861 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 862 c->mult_work = 0; 863 *B = C; 864 return 0; 865 } 866 867 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 868 { 869 Mat_SeqBAIJ *a; 870 Mat B; 871 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 872 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 873 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 874 int *masked, nmask,tmp,ishift,bs2; 875 Scalar *aa; 876 MPI_Comm comm = ((PetscObject) viewer)->comm; 877 878 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 879 bs2 = bs*bs; 880 881 MPI_Comm_size(comm,&size); 882 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 883 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 884 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 885 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 886 M = header[1]; N = header[2]; nz = header[3]; 887 888 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 889 890 /* 891 This code adds extra rows to make sure the number of rows is 892 divisible by the blocksize 893 */ 894 mbs = M/bs; 895 extra_rows = bs - M + bs*(mbs); 896 if (extra_rows == bs) extra_rows = 0; 897 else mbs++; 898 if (extra_rows) { 899 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 900 } 901 902 /* read in row lengths */ 903 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 904 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 905 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 906 907 /* read in column indices */ 908 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 909 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 910 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 911 912 /* loop over row lengths determining block row lengths */ 913 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 914 PetscMemzero(browlengths,mbs*sizeof(int)); 915 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 916 PetscMemzero(mask,mbs*sizeof(int)); 917 masked = mask + mbs; 918 rowcount = 0; nzcount = 0; 919 for ( i=0; i<mbs; i++ ) { 920 nmask = 0; 921 for ( j=0; j<bs; j++ ) { 922 kmax = rowlengths[rowcount]; 923 for ( k=0; k<kmax; k++ ) { 924 tmp = jj[nzcount++]/bs; 925 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 926 } 927 rowcount++; 928 } 929 browlengths[i] += nmask; 930 /* zero out the mask elements we set */ 931 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 932 } 933 934 /* create our matrix */ 935 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 936 CHKERRQ(ierr); 937 B = *A; 938 a = (Mat_SeqBAIJ *) B->data; 939 940 /* set matrix "i" values */ 941 a->i[0] = 0; 942 for ( i=1; i<= mbs; i++ ) { 943 a->i[i] = a->i[i-1] + browlengths[i-1]; 944 a->ilen[i-1] = browlengths[i-1]; 945 } 946 a->indexshift = 0; 947 a->nz = 0; 948 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 949 950 /* read in nonzero values */ 951 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 952 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 953 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 954 955 /* set "a" and "j" values into matrix */ 956 nzcount = 0; jcount = 0; 957 for ( i=0; i<mbs; i++ ) { 958 nzcountb = nzcount; 959 nmask = 0; 960 for ( j=0; j<bs; j++ ) { 961 kmax = rowlengths[i*bs+j]; 962 for ( k=0; k<kmax; k++ ) { 963 tmp = jj[nzcount++]/bs; 964 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 965 } 966 rowcount++; 967 } 968 /* sort the masked values */ 969 PetscSortInt(nmask,masked); 970 971 /* set "j" values into matrix */ 972 maskcount = 1; 973 for ( j=0; j<nmask; j++ ) { 974 a->j[jcount++] = masked[j]; 975 mask[masked[j]] = maskcount++; 976 } 977 /* set "a" values into matrix */ 978 ishift = bs2*a->i[i]; 979 for ( j=0; j<bs; j++ ) { 980 kmax = rowlengths[i*bs+j]; 981 for ( k=0; k<kmax; k++ ) { 982 tmp = jj[nzcountb]/bs ; 983 block = mask[tmp] - 1; 984 point = jj[nzcountb] - bs*tmp; 985 idx = ishift + bs2*block + j + bs*point; 986 a->a[idx] = aa[nzcountb++]; 987 } 988 } 989 /* zero out the mask elements we set */ 990 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 991 } 992 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 993 994 PetscFree(rowlengths); 995 PetscFree(browlengths); 996 PetscFree(aa); 997 PetscFree(jj); 998 PetscFree(mask); 999 1000 B->assembled = PETSC_TRUE; 1001 1002 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1003 if (flg) { 1004 Viewer tviewer; 1005 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1006 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1007 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1008 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1009 } 1010 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1011 if (flg) { 1012 Viewer tviewer; 1013 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1014 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1015 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1016 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1017 } 1018 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1019 if (flg) { 1020 Viewer tviewer; 1021 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1022 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1023 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1024 } 1025 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1026 if (flg) { 1027 Viewer tviewer; 1028 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1029 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1030 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1031 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1032 } 1033 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1034 if (flg) { 1035 Viewer tviewer; 1036 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1037 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1038 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1039 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1040 } 1041 return 0; 1042 } 1043 1044 1045 1046