1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.23 1996/03/31 16:51:11 bsmith Exp curfman $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = a->indexshift; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45 CHKERRQ(ierr); 46 ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 47 PetscFree(ia); PetscFree(ja); 48 } else { 49 if (ishift == oshift) { 50 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51 } 52 else if (ishift == -1) { 53 /* temporarily subtract 1 from i and j indices */ 54 int nz = a->i[a->n] - 1; 55 for ( i=0; i<nz; i++ ) a->j[i]--; 56 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58 for ( i=0; i<nz; i++ ) a->j[i]++; 59 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60 } else { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[a->n] - 1; 63 for ( i=0; i<nz; i++ ) a->j[i]++; 64 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66 for ( i=0; i<nz; i++ ) a->j[i]--; 67 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68 } 69 } 70 return 0; 71 } 72 73 /* 74 Adds diagonal pointers to sparse matrix structure. 75 */ 76 77 int MatMarkDiag_SeqBAIJ(Mat A) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,j, *diag, m = a->mbs; 81 82 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83 PLogObjectMemory(A,(m+1)*sizeof(int)); 84 for ( i=0; i<m; i++ ) { 85 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86 if (a->j[j] == i) { 87 diag[i] = j; 88 break; 89 } 90 } 91 } 92 a->diag = diag; 93 return 0; 94 } 95 96 #include "draw.h" 97 #include "pinclude/pviewer.h" 98 #include "sys.h" 99 100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 101 { 102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 103 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l; 104 Scalar *aa; 105 106 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 107 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 108 col_lens[0] = MAT_COOKIE; 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs*bs; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs*bs*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs*bs*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs*bs*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 return 0; 153 } 154 155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 156 { 157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 158 int ierr, i,j,format,bs = a->bs,k,l; 159 FILE *fd; 160 char *outputname; 161 162 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 163 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 164 ierr = ViewerGetFormat(viewer,&format); 165 if (format == ASCII_FORMAT_INFO) { 166 /* no need to print additional information */ ; 167 } 168 else if (format == ASCII_FORMAT_MATLAB) { 169 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 170 } 171 else { 172 for ( i=0; i<a->mbs; i++ ) { 173 for ( j=0; j<bs; j++ ) { 174 fprintf(fd,"row %d:",i*bs+j); 175 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 176 for ( l=0; l<bs; l++ ) { 177 #if defined(PETSC_COMPLEX) 178 if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) { 179 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 180 real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j])); 181 } 182 else { 183 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j])); 184 } 185 #else 186 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]); 187 #endif 188 } 189 } 190 fprintf(fd,"\n"); 191 } 192 } 193 } 194 fflush(fd); 195 return 0; 196 } 197 198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 199 { 200 Mat A = (Mat) obj; 201 ViewerType vtype; 202 int ierr; 203 204 if (!viewer) { 205 viewer = STDOUT_VIEWER_SELF; 206 } 207 208 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 209 if (vtype == MATLAB_VIEWER) { 210 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 211 } 212 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 213 return MatView_SeqBAIJ_ASCII(A,viewer); 214 } 215 else if (vtype == BINARY_FILE_VIEWER) { 216 return MatView_SeqBAIJ_Binary(A,viewer); 217 } 218 else if (vtype == DRAW_VIEWER) { 219 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 220 } 221 return 0; 222 } 223 224 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 225 { 226 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 227 *m = a->m; *n = a->n; 228 return 0; 229 } 230 231 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 232 { 233 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 234 *m = 0; *n = a->m; 235 return 0; 236 } 237 238 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 239 { 240 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 241 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i; 242 Scalar *aa,*v_i,*aa_i; 243 244 bs = a->bs; 245 ai = a->i; 246 aj = a->j; 247 aa = a->a; 248 249 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 250 251 bn = row/bs; /* Block number */ 252 bp = row % bs; /* Block Position */ 253 M = ai[bn+1] - ai[bn]; 254 *nz = bs*M; 255 256 if (v) { 257 *v = 0; 258 if (*nz) { 259 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 260 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 261 v_i = *v + i*bs; 262 aa_i = aa + bs*bs*(ai[bn] + i); 263 for ( j=bp,k=0; j<bs*bs; j+=bs,k++ ) {v_i[k] = aa_i[j];} 264 } 265 } 266 } 267 268 if (idx) { 269 *idx = 0; 270 if (*nz) { 271 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 272 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 273 idx_i = *idx + i*bs; 274 itmp = bs*aj[ai[bn] + i]; 275 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 276 } 277 } 278 } 279 return 0; 280 } 281 282 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 283 { 284 if (idx) {if (*idx) PetscFree(*idx);} 285 if (v) {if (*v) PetscFree(*v);} 286 return 0; 287 } 288 289 static int MatZeroEntries_SeqBAIJ(Mat A) 290 { 291 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 292 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 293 return 0; 294 } 295 296 int MatDestroy_SeqBAIJ(PetscObject obj) 297 { 298 Mat A = (Mat) obj; 299 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 300 301 #if defined(PETSC_LOG) 302 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 303 #endif 304 PetscFree(a->a); 305 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 306 if (a->diag) PetscFree(a->diag); 307 if (a->ilen) PetscFree(a->ilen); 308 if (a->imax) PetscFree(a->imax); 309 if (a->solve_work) PetscFree(a->solve_work); 310 if (a->mult_work) PetscFree(a->mult_work); 311 PetscFree(a); 312 PLogObjectDestroy(A); 313 PetscHeaderDestroy(A); 314 return 0; 315 } 316 317 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 318 { 319 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 320 if (op == ROW_ORIENTED) a->roworiented = 1; 321 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 322 else if (op == COLUMNS_SORTED) a->sorted = 1; 323 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 324 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 325 else if (op == ROWS_SORTED || 326 op == SYMMETRIC_MATRIX || 327 op == STRUCTURALLY_SYMMETRIC_MATRIX || 328 op == YES_NEW_DIAGONALS) 329 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 330 else if (op == NO_NEW_DIAGONALS) 331 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 332 else 333 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 334 return 0; 335 } 336 337 338 /* -------------------------------------------------------*/ 339 /* Should check that shapes of vectors and matrices match */ 340 /* -------------------------------------------------------*/ 341 #include "pinclude/plapack.h" 342 343 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 344 { 345 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 346 Scalar *xg,*yg; 347 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 348 register Scalar x1,x2,x3,x4,x5; 349 int mbs = a->mbs, m = a->m, i, *idx,*ii; 350 int bs = a->bs,j,n,bs2 = bs*bs; 351 352 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 353 PetscMemzero(y,m*sizeof(Scalar)); 354 x = x; 355 idx = a->j; 356 v = a->a; 357 ii = a->i; 358 359 switch (bs) { 360 case 1: 361 for ( i=0; i<m; i++ ) { 362 n = ii[1] - ii[0]; ii++; 363 sum = 0.0; 364 while (n--) sum += *v++ * x[*idx++]; 365 y[i] = sum; 366 } 367 break; 368 case 2: 369 for ( i=0; i<mbs; i++ ) { 370 n = ii[1] - ii[0]; ii++; 371 sum1 = 0.0; sum2 = 0.0; 372 for ( j=0; j<n; j++ ) { 373 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 374 sum1 += v[0]*x1 + v[2]*x2; 375 sum2 += v[1]*x1 + v[3]*x2; 376 v += 4; 377 } 378 y[0] += sum1; y[1] += sum2; 379 y += 2; 380 } 381 break; 382 case 3: 383 for ( i=0; i<mbs; i++ ) { 384 n = ii[1] - ii[0]; ii++; 385 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 386 for ( j=0; j<n; j++ ) { 387 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 388 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 389 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 390 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 391 v += 9; 392 } 393 y[0] += sum1; y[1] += sum2; y[2] += sum3; 394 y += 3; 395 } 396 break; 397 case 4: 398 for ( i=0; i<mbs; i++ ) { 399 n = ii[1] - ii[0]; ii++; 400 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 401 for ( j=0; j<n; j++ ) { 402 xb = x + 4*(*idx++); 403 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 404 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 405 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 406 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 407 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 408 v += 16; 409 } 410 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 411 y += 4; 412 } 413 break; 414 case 5: 415 for ( i=0; i<mbs; i++ ) { 416 n = ii[1] - ii[0]; ii++; 417 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 418 for ( j=0; j<n; j++ ) { 419 xb = x + 5*(*idx++); 420 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 421 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 422 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 423 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 424 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 425 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 426 v += 25; 427 } 428 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 429 y += 5; 430 } 431 break; 432 /* block sizes larger then 5 by 5 are handled by BLAS */ 433 default: { 434 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 435 if (!a->mult_work) { 436 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 437 CHKPTRQ(a->mult_work); 438 } 439 work = a->mult_work; 440 for ( i=0; i<mbs; i++ ) { 441 n = ii[1] - ii[0]; ii++; 442 ncols = n*bs; 443 workt = work; 444 for ( j=0; j<n; j++ ) { 445 xb = x + bs*(*idx++); 446 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 447 workt += bs; 448 } 449 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 450 v += n*bs2; 451 y += bs; 452 } 453 } 454 } 455 PLogFlops(2*bs2*a->nz - m); 456 return 0; 457 } 458 459 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 460 { 461 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 462 if (nz) *nz = a->bs*a->bs*a->nz; 463 if (nza) *nza = a->maxnz; 464 if (mem) *mem = (int)A->mem; 465 return 0; 466 } 467 468 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 469 { 470 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 471 472 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 473 474 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 475 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 476 (a->nz != b->nz)||(a->indexshift != b->indexshift)) { 477 *flg = PETSC_FALSE; return 0; 478 } 479 480 /* if the a->i are the same */ 481 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 482 *flg = PETSC_FALSE; return 0; 483 } 484 485 /* if a->j are the same */ 486 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 487 *flg = PETSC_FALSE; return 0; 488 } 489 490 /* if a->a are the same */ 491 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 492 *flg = PETSC_FALSE; return 0; 493 } 494 *flg = PETSC_TRUE; 495 return 0; 496 497 } 498 499 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 500 { 501 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 502 int i,j,k,n,row,bs,*ai,*aj,ambs; 503 Scalar *x, zero = 0.0,*aa,*aa_j; 504 505 bs = a->bs; 506 aa = a->a; 507 ai = a->i; 508 aj = a->j; 509 ambs = a->mbs; 510 511 VecSet(&zero,v); 512 VecGetArray(v,&x); VecGetLocalSize(v,&n); 513 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 514 for ( i=0; i<ambs; i++ ) { 515 for ( j=ai[i]; j<ai[i+1]; j++ ) { 516 if (aj[j] == i) { 517 row = i*bs; 518 aa_j = aa+j*bs*bs; 519 for (k=0; k<bs*bs; k+=(bs+1),row++) x[row] = aa_j[k]; 520 break; 521 } 522 } 523 } 524 return 0; 525 } 526 527 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 528 { 529 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 530 Scalar *l,*r,x,*v,*aa,*li,*ri; 531 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs; 532 533 ai = a->i; 534 aj = a->j; 535 aa = a->a; 536 m = a->m; 537 n = a->n; 538 bs = a->bs; 539 mbs = a->mbs; 540 541 if (ll) { 542 VecGetArray(ll,&l); VecGetSize(ll,&lm); 543 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 544 for ( i=0; i<mbs; i++ ) { /* for each block row */ 545 M = ai[i+1] - ai[i]; 546 li = l + i*bs; 547 v = aa + bs*bs*ai[i]; 548 for ( j=0; j<M; j++ ) { /* for each block */ 549 for ( k=0; k<bs*bs; k++ ) { 550 (*v++) *= li[k%bs]; 551 } 552 } 553 } 554 } 555 556 if (rr) { 557 VecGetArray(rr,&r); VecGetSize(rr,&rn); 558 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 559 for ( i=0; i<mbs; i++ ) { /* for each block row */ 560 M = ai[i+1] - ai[i]; 561 v = aa + bs*bs*ai[i]; 562 for ( j=0; j<M; j++ ) { /* for each block */ 563 ri = r + bs*aj[ai[i]+j]; 564 for ( k=0; k<bs; k++ ) { 565 x = ri[k]; 566 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 567 } 568 } 569 } 570 } 571 return 0; 572 } 573 574 575 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 576 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 577 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 578 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 579 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 580 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 581 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 582 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 583 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 584 585 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 586 { 587 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 588 Scalar *v = a->a; 589 double sum = 0.0; 590 int i; 591 592 if (type == NORM_FROBENIUS) { 593 for (i=0; i<a->nz; i++ ) { 594 #if defined(PETSC_COMPLEX) 595 sum += real(conj(*v)*(*v)); v++; 596 #else 597 sum += (*v)*(*v); v++; 598 #endif 599 } 600 *norm = sqrt(sum); 601 } 602 else { 603 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 604 } 605 return 0; 606 } 607 608 /* 609 note: This can only work for identity for row and col. It would 610 be good to check this and otherwise generate an error. 611 */ 612 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 613 { 614 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 615 Mat outA; 616 int ierr; 617 618 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 619 620 outA = inA; 621 inA->factor = FACTOR_LU; 622 a->row = row; 623 a->col = col; 624 625 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 626 627 if (!a->diag) { 628 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 629 } 630 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 631 return 0; 632 } 633 634 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 635 { 636 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 637 int one = 1, totalnz = a->bs*a->bs*a->nz; 638 BLscal_( &totalnz, alpha, a->a, &one ); 639 PLogFlops(totalnz); 640 return 0; 641 } 642 643 int MatPrintHelp_SeqBAIJ(Mat A) 644 { 645 static int called = 0; 646 647 if (called) return 0; else called = 1; 648 return 0; 649 } 650 /* -------------------------------------------------------------------*/ 651 static struct _MatOps MatOps = {0, 652 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 653 MatMult_SeqBAIJ,0, 654 0,0, 655 MatSolve_SeqBAIJ,0, 656 0,0, 657 MatLUFactor_SeqBAIJ,0, 658 0, 659 0, 660 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 661 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 662 0,0, 663 0, 664 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 665 MatGetReordering_SeqBAIJ, 666 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 667 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 668 MatILUFactorSymbolic_SeqBAIJ,0, 669 0,0,/* MatConvert_SeqBAIJ */ 0, 670 0,0, 671 MatConvertSameType_SeqBAIJ,0,0, 672 MatILUFactor_SeqBAIJ,0,0, 673 0,0, 674 0,0, 675 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 676 0}; 677 678 /*@C 679 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 680 (the default parallel PETSc format). For good matrix assembly performance 681 the user should preallocate the matrix storage by setting the parameter nz 682 (or nzz). By setting these parameters accurately, performance can be 683 increased by more than a factor of 50. 684 685 Input Parameters: 686 . comm - MPI communicator, set to MPI_COMM_SELF 687 . bs - size of block 688 . m - number of rows 689 . n - number of columns 690 . nz - number of block nonzeros per block row (same for all rows) 691 . nzz - number of block nonzeros per block row or PETSC_NULL 692 (possibly different for each row) 693 694 Output Parameter: 695 . A - the matrix 696 697 Notes: 698 The BAIJ format, is fully compatible with standard Fortran 77 699 storage. That is, the stored row and column indices can begin at 700 either one (as in Fortran) or zero. See the users manual for details. 701 702 Specify the preallocated storage with either nz or nnz (not both). 703 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 704 allocation. For additional details, see the users manual chapter on 705 matrices and the file $(PETSC_DIR)/Performance. 706 707 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 708 @*/ 709 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 710 { 711 Mat B; 712 Mat_SeqBAIJ *b; 713 int i,len,ierr, flg,mbs = m/bs; 714 715 if (mbs*bs != m) 716 SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize"); 717 718 *A = 0; 719 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 720 PLogObjectCreate(B); 721 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 722 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 723 switch (bs) { 724 case 1: 725 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 726 break; 727 case 2: 728 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 729 break; 730 case 3: 731 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 732 break; 733 case 4: 734 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 735 break; 736 case 5: 737 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 738 break; 739 } 740 B->destroy = MatDestroy_SeqBAIJ; 741 B->view = MatView_SeqBAIJ; 742 B->factor = 0; 743 B->lupivotthreshold = 1.0; 744 b->row = 0; 745 b->col = 0; 746 b->reallocs = 0; 747 748 b->m = m; 749 b->mbs = mbs; 750 b->n = n; 751 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 752 if (nnz == PETSC_NULL) { 753 if (nz == PETSC_DEFAULT) nz = 5; 754 else if (nz <= 0) nz = 1; 755 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 756 nz = nz*mbs; 757 } 758 else { 759 nz = 0; 760 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 761 } 762 763 /* allocate the matrix space */ 764 len = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int); 765 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 766 PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar)); 767 b->j = (int *) (b->a + nz*bs*bs); 768 PetscMemzero(b->j,nz*sizeof(int)); 769 b->i = b->j + nz; 770 b->singlemalloc = 1; 771 772 b->i[0] = 0; 773 for (i=1; i<mbs+1; i++) { 774 b->i[i] = b->i[i-1] + b->imax[i-1]; 775 } 776 777 /* b->ilen will count nonzeros in each block row so far. */ 778 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 779 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 780 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 781 782 b->bs = bs; 783 b->mbs = mbs; 784 b->nz = 0; 785 b->maxnz = nz; 786 b->sorted = 0; 787 b->roworiented = 1; 788 b->nonew = 0; 789 b->diag = 0; 790 b->solve_work = 0; 791 b->mult_work = 0; 792 b->spptr = 0; 793 794 *A = B; 795 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 796 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 797 return 0; 798 } 799 800 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 801 { 802 Mat C; 803 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 804 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz; 805 806 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 807 808 *B = 0; 809 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 810 PLogObjectCreate(C); 811 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 812 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 813 C->destroy = MatDestroy_SeqBAIJ; 814 C->view = MatView_SeqBAIJ; 815 C->factor = A->factor; 816 c->row = 0; 817 c->col = 0; 818 C->assembled = PETSC_TRUE; 819 820 c->m = a->m; 821 c->n = a->n; 822 c->bs = a->bs; 823 c->mbs = a->mbs; 824 c->nbs = a->nbs; 825 826 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 827 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 828 for ( i=0; i<mbs; i++ ) { 829 c->imax[i] = a->imax[i]; 830 c->ilen[i] = a->ilen[i]; 831 } 832 833 /* allocate the matrix space */ 834 c->singlemalloc = 1; 835 len = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int)); 836 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 837 c->j = (int *) (c->a + nz*bs*bs); 838 c->i = c->j + nz; 839 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 840 if (mbs > 0) { 841 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 842 if (cpvalues == COPY_VALUES) { 843 PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar)); 844 } 845 } 846 847 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 848 c->sorted = a->sorted; 849 c->roworiented = a->roworiented; 850 c->nonew = a->nonew; 851 852 if (a->diag) { 853 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 854 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 855 for ( i=0; i<mbs; i++ ) { 856 c->diag[i] = a->diag[i]; 857 } 858 } 859 else c->diag = 0; 860 c->nz = a->nz; 861 c->maxnz = a->maxnz; 862 c->solve_work = 0; 863 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 864 c->mult_work = 0; 865 *B = C; 866 return 0; 867 } 868 869 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 870 { 871 Mat_SeqBAIJ *a; 872 Mat B; 873 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 874 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 875 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 876 int *masked, nmask,tmp,ishift,bs2; 877 Scalar *aa; 878 MPI_Comm comm = ((PetscObject) viewer)->comm; 879 880 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 881 bs2 = bs*bs; 882 883 MPI_Comm_size(comm,&size); 884 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 885 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 886 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 887 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 888 M = header[1]; N = header[2]; nz = header[3]; 889 890 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 891 892 /* 893 This code adds extra rows to make sure the number of rows is 894 divisible by the blocksize 895 */ 896 mbs = M/bs; 897 extra_rows = bs - M + bs*(mbs); 898 if (extra_rows == bs) extra_rows = 0; 899 else mbs++; 900 if (extra_rows) { 901 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 902 } 903 904 /* read in row lengths */ 905 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 906 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 907 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 908 909 /* read in column indices */ 910 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 911 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 912 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 913 914 /* loop over row lengths determining block row lengths */ 915 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 916 PetscMemzero(browlengths,mbs*sizeof(int)); 917 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 918 PetscMemzero(mask,mbs*sizeof(int)); 919 masked = mask + mbs; 920 rowcount = 0; nzcount = 0; 921 for ( i=0; i<mbs; i++ ) { 922 nmask = 0; 923 for ( j=0; j<bs; j++ ) { 924 kmax = rowlengths[rowcount]; 925 for ( k=0; k<kmax; k++ ) { 926 tmp = jj[nzcount++]/bs; 927 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 928 } 929 rowcount++; 930 } 931 browlengths[i] += nmask; 932 /* zero out the mask elements we set */ 933 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 934 } 935 936 /* create our matrix */ 937 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 938 CHKERRQ(ierr); 939 B = *A; 940 a = (Mat_SeqBAIJ *) B->data; 941 942 /* set matrix "i" values */ 943 a->i[0] = 0; 944 for ( i=1; i<= mbs; i++ ) { 945 a->i[i] = a->i[i-1] + browlengths[i-1]; 946 a->ilen[i-1] = browlengths[i-1]; 947 } 948 a->indexshift = 0; 949 a->nz = 0; 950 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 951 952 /* read in nonzero values */ 953 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 954 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 955 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 956 957 /* set "a" and "j" values into matrix */ 958 nzcount = 0; jcount = 0; 959 for ( i=0; i<mbs; i++ ) { 960 nzcountb = nzcount; 961 nmask = 0; 962 for ( j=0; j<bs; j++ ) { 963 kmax = rowlengths[i*bs+j]; 964 for ( k=0; k<kmax; k++ ) { 965 tmp = jj[nzcount++]/bs; 966 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 967 } 968 rowcount++; 969 } 970 /* sort the masked values */ 971 PetscSortInt(nmask,masked); 972 973 /* set "j" values into matrix */ 974 maskcount = 1; 975 for ( j=0; j<nmask; j++ ) { 976 a->j[jcount++] = masked[j]; 977 mask[masked[j]] = maskcount++; 978 } 979 /* set "a" values into matrix */ 980 ishift = bs2*a->i[i]; 981 for ( j=0; j<bs; j++ ) { 982 kmax = rowlengths[i*bs+j]; 983 for ( k=0; k<kmax; k++ ) { 984 tmp = jj[nzcountb]/bs ; 985 block = mask[tmp] - 1; 986 point = jj[nzcountb] - bs*tmp; 987 idx = ishift + bs2*block + j + bs*point; 988 a->a[idx] = aa[nzcountb++]; 989 } 990 } 991 /* zero out the mask elements we set */ 992 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 993 } 994 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 995 996 PetscFree(rowlengths); 997 PetscFree(browlengths); 998 PetscFree(aa); 999 PetscFree(jj); 1000 PetscFree(mask); 1001 1002 B->assembled = PETSC_TRUE; 1003 1004 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1005 if (flg) { 1006 Viewer tviewer; 1007 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1008 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1009 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1010 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1011 } 1012 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1013 if (flg) { 1014 Viewer tviewer; 1015 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1016 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1017 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1018 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1019 } 1020 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1021 if (flg) { 1022 Viewer tviewer; 1023 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1024 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1025 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1026 } 1027 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1028 if (flg) { 1029 Viewer tviewer; 1030 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1031 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1032 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1033 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1034 } 1035 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1036 if (flg) { 1037 Viewer tviewer; 1038 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1039 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1040 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1041 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1042 } 1043 return 0; 1044 } 1045 1046 1047 1048