1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.20 1996/03/27 00:06:28 balay Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = a->indexshift; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45 CHKERRQ(ierr); 46 ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 47 PetscFree(ia); PetscFree(ja); 48 } else { 49 if (ishift == oshift) { 50 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51 } 52 else if (ishift == -1) { 53 /* temporarily subtract 1 from i and j indices */ 54 int nz = a->i[a->n] - 1; 55 for ( i=0; i<nz; i++ ) a->j[i]--; 56 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58 for ( i=0; i<nz; i++ ) a->j[i]++; 59 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60 } else { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[a->n] - 1; 63 for ( i=0; i<nz; i++ ) a->j[i]++; 64 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66 for ( i=0; i<nz; i++ ) a->j[i]--; 67 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68 } 69 } 70 return 0; 71 } 72 73 /* 74 Adds diagonal pointers to sparse matrix structure. 75 */ 76 77 int MatMarkDiag_SeqBAIJ(Mat A) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,j, *diag, m = a->mbs; 81 82 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83 PLogObjectMemory(A,(m+1)*sizeof(int)); 84 for ( i=0; i<m; i++ ) { 85 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86 if (a->j[j] == i) { 87 diag[i] = j; 88 break; 89 } 90 } 91 } 92 a->diag = diag; 93 return 0; 94 } 95 96 #include "draw.h" 97 #include "pinclude/pviewer.h" 98 #include "sys.h" 99 100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 101 { 102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 103 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l; 104 Scalar *aa; 105 106 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 107 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 108 col_lens[0] = MAT_COOKIE; 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs*bs; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs*bs*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs*bs*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs*bs*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 return 0; 153 } 154 155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 156 { 157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 158 int ierr, i,j,format,bs = a->bs,k,l; 159 FILE *fd; 160 char *outputname; 161 162 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 163 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 164 ierr = ViewerGetFormat(viewer,&format); 165 if (format == ASCII_FORMAT_INFO) { 166 /* no need to print additional information */ ; 167 } 168 else if (format == ASCII_FORMAT_MATLAB) { 169 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 170 } 171 else { 172 for ( i=0; i<a->mbs; i++ ) { 173 for ( j=0; j<bs; j++ ) { 174 fprintf(fd,"row %d:",i*bs+j); 175 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 176 for ( l=0; l<bs; l++ ) { 177 #if defined(PETSC_COMPLEX) 178 if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) { 179 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 180 real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j])); 181 } 182 else { 183 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j])); 184 } 185 #else 186 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]); 187 #endif 188 } 189 } 190 fprintf(fd,"\n"); 191 } 192 } 193 } 194 fflush(fd); 195 return 0; 196 } 197 198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 199 { 200 Mat A = (Mat) obj; 201 ViewerType vtype; 202 int ierr; 203 204 if (!viewer) { 205 viewer = STDOUT_VIEWER_SELF; 206 } 207 208 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 209 if (vtype == MATLAB_VIEWER) { 210 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 211 } 212 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 213 return MatView_SeqBAIJ_ASCII(A,viewer); 214 } 215 else if (vtype == BINARY_FILE_VIEWER) { 216 return MatView_SeqBAIJ_Binary(A,viewer); 217 } 218 else if (vtype == DRAW_VIEWER) { 219 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 220 } 221 return 0; 222 } 223 224 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 225 { 226 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 227 *m = a->m; *n = a->n; 228 return 0; 229 } 230 231 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 232 { 233 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 234 *m = 0; *n = a->m; 235 return 0; 236 } 237 238 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 239 { 240 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 241 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i; 242 Scalar *aa,*v_i,*aa_i; 243 244 bs = a->bs; 245 ai = a->i; 246 aj = a->j; 247 aa = a->a; 248 249 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 250 251 bn = row/bs; /* Block number */ 252 bp = row % bs; /* Block Position */ 253 M = ai[bn+1] - ai[bn]; 254 *nz = bs*M; 255 256 if (v) { 257 *v = 0; 258 if (*nz) { 259 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 260 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 261 v_i = *v + i*bs; 262 aa_i = aa + bs*bs*(ai[bn] + i); 263 for ( j=bp,k=0; j<bs*bs; j+=bs,k++ ) {v_i[k] = aa_i[j];} 264 } 265 } 266 } 267 268 if (idx) { 269 *idx = 0; 270 if (*nz) { 271 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 272 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 273 idx_i = *idx + i*bs; 274 itmp = bs*aj[ai[bn] + i]; 275 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 276 } 277 } 278 } 279 return 0; 280 } 281 282 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 283 { 284 if (idx) {if (*idx) PetscFree(*idx);} 285 if (v) {if (*v) PetscFree(*v);} 286 return 0; 287 } 288 289 static int MatZeroEntries_SeqBAIJ(Mat A) 290 { 291 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 292 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 293 return 0; 294 } 295 296 int MatDestroy_SeqBAIJ(PetscObject obj) 297 { 298 Mat A = (Mat) obj; 299 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 300 301 #if defined(PETSC_LOG) 302 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 303 #endif 304 PetscFree(a->a); 305 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 306 if (a->diag) PetscFree(a->diag); 307 if (a->ilen) PetscFree(a->ilen); 308 if (a->imax) PetscFree(a->imax); 309 if (a->solve_work) PetscFree(a->solve_work); 310 if (a->mult_work) PetscFree(a->mult_work); 311 PetscFree(a); 312 return 0; 313 } 314 315 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 316 { 317 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 318 if (op == ROW_ORIENTED) a->roworiented = 1; 319 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 320 else if (op == COLUMNS_SORTED) a->sorted = 1; 321 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 322 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 323 else if (op == ROWS_SORTED || 324 op == SYMMETRIC_MATRIX || 325 op == STRUCTURALLY_SYMMETRIC_MATRIX || 326 op == YES_NEW_DIAGONALS) 327 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 328 else if (op == NO_NEW_DIAGONALS) 329 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 330 else 331 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 332 return 0; 333 } 334 335 336 /* -------------------------------------------------------*/ 337 /* Should check that shapes of vectors and matrices match */ 338 /* -------------------------------------------------------*/ 339 #include "pinclude/plapack.h" 340 341 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 342 { 343 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 344 Scalar *xg,*yg; 345 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 346 register Scalar x1,x2,x3,x4,x5; 347 int mbs = a->mbs, m = a->m, i, *idx,*ii; 348 int bs = a->bs,j,n,bs2 = bs*bs; 349 350 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 351 PetscMemzero(y,m*sizeof(Scalar)); 352 x = x; 353 idx = a->j; 354 v = a->a; 355 ii = a->i; 356 357 switch (bs) { 358 case 1: 359 for ( i=0; i<m; i++ ) { 360 n = ii[1] - ii[0]; ii++; 361 sum = 0.0; 362 while (n--) sum += *v++ * x[*idx++]; 363 y[i] = sum; 364 } 365 break; 366 case 2: 367 for ( i=0; i<mbs; i++ ) { 368 n = ii[1] - ii[0]; ii++; 369 sum1 = 0.0; sum2 = 0.0; 370 for ( j=0; j<n; j++ ) { 371 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 372 sum1 += v[0]*x1 + v[2]*x2; 373 sum2 += v[1]*x1 + v[3]*x2; 374 v += 4; 375 } 376 y[0] += sum1; y[1] += sum2; 377 y += 2; 378 } 379 break; 380 case 3: 381 for ( i=0; i<mbs; i++ ) { 382 n = ii[1] - ii[0]; ii++; 383 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 384 for ( j=0; j<n; j++ ) { 385 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 386 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 387 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 388 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 389 v += 9; 390 } 391 y[0] += sum1; y[1] += sum2; y[2] += sum3; 392 y += 3; 393 } 394 break; 395 case 4: 396 for ( i=0; i<mbs; i++ ) { 397 n = ii[1] - ii[0]; ii++; 398 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 399 for ( j=0; j<n; j++ ) { 400 xb = x + 4*(*idx++); 401 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 402 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 403 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 404 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 405 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 406 v += 16; 407 } 408 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 409 y += 4; 410 } 411 break; 412 case 5: 413 for ( i=0; i<mbs; i++ ) { 414 n = ii[1] - ii[0]; ii++; 415 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 416 for ( j=0; j<n; j++ ) { 417 xb = x + 5*(*idx++); 418 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 419 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 420 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 421 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 422 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 423 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 424 v += 25; 425 } 426 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 427 y += 5; 428 } 429 break; 430 /* block sizes larger then 5 by 5 are handled by BLAS */ 431 default: { 432 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 433 if (!a->mult_work) { 434 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 435 CHKPTRQ(a->mult_work); 436 } 437 work = a->mult_work; 438 for ( i=0; i<mbs; i++ ) { 439 n = ii[1] - ii[0]; ii++; 440 ncols = n*bs; 441 workt = work; 442 for ( j=0; j<n; j++ ) { 443 xb = x + bs*(*idx++); 444 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 445 workt += bs; 446 } 447 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 448 v += n*bs2; 449 y += bs; 450 } 451 } 452 } 453 PLogFlops(2*bs2*a->nz - m); 454 return 0; 455 } 456 457 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 458 { 459 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 460 if (nz) *nz = a->bs*a->bs*a->nz; 461 if (nza) *nza = a->maxnz; 462 if (mem) *mem = (int)A->mem; 463 return 0; 464 } 465 466 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 467 { 468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 469 470 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 471 472 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 473 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 474 (a->nz != b->nz)||(a->indexshift != b->indexshift)) { 475 *flg = PETSC_FALSE; return 0; 476 } 477 478 /* if the a->i are the same */ 479 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 480 *flg = PETSC_FALSE; return 0; 481 } 482 483 /* if a->j are the same */ 484 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 485 *flg = PETSC_FALSE; return 0; 486 } 487 488 /* if a->a are the same */ 489 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 490 *flg = PETSC_FALSE; return 0; 491 } 492 *flg = PETSC_TRUE; 493 return 0; 494 495 } 496 497 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 498 { 499 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 500 int i,j,k,n,row,bs,*ai,*aj,ambs; 501 Scalar *x, zero = 0.0,*aa,*aa_j; 502 503 bs = a->bs; 504 aa = a->a; 505 ai = a->i; 506 aj = a->j; 507 ambs = a->mbs; 508 509 VecSet(&zero,v); 510 VecGetArray(v,&x); VecGetLocalSize(v,&n); 511 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 512 for ( i=0; i<ambs; i++ ) { 513 for ( j=ai[i]; j<ai[i+1]; j++ ) { 514 if (aj[j] == i) { 515 row = i*bs; 516 aa_j = aa+j*bs*bs; 517 for (k=0; k<bs*bs; k+=(bs+1),row++) x[row] = aa_j[k]; 518 break; 519 } 520 } 521 } 522 return 0; 523 } 524 525 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 526 { 527 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 528 Scalar *l,*r,x,*v,*aa,*li,*ri; 529 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs; 530 531 ai = a->i; 532 aj = a->j; 533 aa = a->a; 534 m = a->m; 535 n = a->n; 536 bs = a->bs; 537 mbs = a->mbs; 538 539 if (ll) { 540 VecGetArray(ll,&l); VecGetSize(ll,&lm); 541 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 542 for ( i=0; i<mbs; i++ ) { /* for each block row */ 543 M = ai[i+1] - ai[i]; 544 li = l + i*bs; 545 v = aa + bs*bs*ai[i]; 546 for ( j=0; j<M; j++ ) { /* for each block */ 547 for ( k=0; k<bs*bs; k++ ) { 548 (*v++) *= li[k%bs]; 549 } 550 } 551 } 552 } 553 554 if (rr) { 555 VecGetArray(rr,&r); VecGetSize(rr,&rn); 556 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 557 for ( i=0; i<mbs; i++ ) { /* for each block row */ 558 M = ai[i+1] - ai[i]; 559 v = aa + bs*bs*ai[i]; 560 for ( j=0; j<M; j++ ) { /* for each block */ 561 ri = r + bs*aj[ai[i]+j]; 562 for ( k=0; k<bs; k++ ) { 563 x = ri[k]; 564 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 565 } 566 } 567 } 568 } 569 return 0; 570 } 571 572 573 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 574 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 575 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 576 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 577 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 578 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 579 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 580 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 581 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 582 583 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 584 { 585 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 586 *m = a->m; *n = a->n; 587 return 0; 588 } 589 590 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 591 { 592 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 593 *m = 0; *n = a->m; 594 return 0; 595 } 596 597 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 598 { 599 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 600 Scalar *v = a->a; 601 double sum = 0.0; 602 int i; 603 604 if (type == NORM_FROBENIUS) { 605 for (i=0; i<a->nz; i++ ) { 606 #if defined(PETSC_COMPLEX) 607 sum += real(conj(*v)*(*v)); v++; 608 #else 609 sum += (*v)*(*v); v++; 610 #endif 611 } 612 *norm = sqrt(sum); 613 } 614 else { 615 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 616 } 617 return 0; 618 } 619 620 /* 621 note: This can only work for identity for row and col. It would 622 be good to check this and otherwise generate an error. 623 */ 624 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 625 { 626 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 627 Mat outA; 628 int ierr; 629 630 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 631 632 outA = inA; 633 inA->factor = FACTOR_LU; 634 a->row = row; 635 a->col = col; 636 637 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 638 639 if (!a->diag) { 640 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 641 } 642 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 643 return 0; 644 } 645 646 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 647 { 648 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 649 int one = 1, totalnz = a->bs*a->bs*a->nz; 650 BLscal_( &totalnz, alpha, a->a, &one ); 651 PLogFlops(totalnz); 652 return 0; 653 } 654 655 int MatPrintHelp_SeqBAIJ(Mat A) 656 { 657 static int called = 0; 658 659 if (called) return 0; else called = 1; 660 return 0; 661 } 662 /* -------------------------------------------------------------------*/ 663 static struct _MatOps MatOps = {0, 664 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 665 MatMult_SeqBAIJ,0, 666 0,0, 667 MatSolve_SeqBAIJ,0, 668 0,0, 669 MatLUFactor_SeqBAIJ,0, 670 0, 671 0, 672 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 673 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 674 0,0, 675 0, 676 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 677 MatGetReordering_SeqBAIJ, 678 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 679 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 680 MatILUFactorSymbolic_SeqBAIJ,0, 681 0,0,/* MatConvert_SeqBAIJ */ 0, 682 0,0, 683 MatConvertSameType_SeqBAIJ,0,0, 684 MatILUFactor_SeqBAIJ,0,0, 685 0,0, 686 0,0, 687 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 688 0}; 689 690 /*@C 691 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 692 (the default parallel PETSc format). For good matrix assembly performance 693 the user should preallocate the matrix storage by setting the parameter nz 694 (or nzz). By setting these parameters accurately, performance can be 695 increased by more than a factor of 50. 696 697 Input Parameters: 698 . comm - MPI communicator, set to MPI_COMM_SELF 699 . bs - size of block 700 . m - number of rows 701 . n - number of columns 702 . nz - number of block nonzeros per block row (same for all rows) 703 . nzz - number of block nonzeros per block row or PETSC_NULL 704 (possibly different for each row) 705 706 Output Parameter: 707 . A - the matrix 708 709 Notes: 710 The BAIJ format, is fully compatible with standard Fortran 77 711 storage. That is, the stored row and column indices can begin at 712 either one (as in Fortran) or zero. See the users manual for details. 713 714 Specify the preallocated storage with either nz or nnz (not both). 715 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 716 allocation. For additional details, see the users manual chapter on 717 matrices and the file $(PETSC_DIR)/Performance. 718 719 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 720 @*/ 721 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 722 { 723 Mat B; 724 Mat_SeqBAIJ *b; 725 int i,len,ierr, flg,mbs = m/bs; 726 727 if (mbs*bs != m) 728 SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize"); 729 730 *A = 0; 731 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 732 PLogObjectCreate(B); 733 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 734 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 735 switch (bs) { 736 case 1: 737 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 738 break; 739 case 2: 740 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 741 break; 742 case 3: 743 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 744 break; 745 case 4: 746 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 747 break; 748 case 5: 749 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 750 break; 751 } 752 B->destroy = MatDestroy_SeqBAIJ; 753 B->view = MatView_SeqBAIJ; 754 B->factor = 0; 755 B->lupivotthreshold = 1.0; 756 b->row = 0; 757 b->col = 0; 758 b->reallocs = 0; 759 760 b->m = m; 761 b->mbs = mbs; 762 b->n = n; 763 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 764 if (nnz == PETSC_NULL) { 765 if (nz == PETSC_DEFAULT) nz = 5; 766 else if (nz <= 0) nz = 1; 767 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 768 nz = nz*mbs; 769 } 770 else { 771 nz = 0; 772 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 773 } 774 775 /* allocate the matrix space */ 776 len = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int); 777 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 778 PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar)); 779 b->j = (int *) (b->a + nz*bs*bs); 780 PetscMemzero(b->j,nz*sizeof(int)); 781 b->i = b->j + nz; 782 b->singlemalloc = 1; 783 784 b->i[0] = 0; 785 for (i=1; i<mbs+1; i++) { 786 b->i[i] = b->i[i-1] + b->imax[i-1]; 787 } 788 789 /* b->ilen will count nonzeros in each block row so far. */ 790 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 791 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 792 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 793 794 b->bs = bs; 795 b->mbs = mbs; 796 b->nz = 0; 797 b->maxnz = nz; 798 b->sorted = 0; 799 b->roworiented = 1; 800 b->nonew = 0; 801 b->diag = 0; 802 b->solve_work = 0; 803 b->mult_work = 0; 804 b->spptr = 0; 805 806 *A = B; 807 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 808 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 809 return 0; 810 } 811 812 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 813 { 814 Mat C; 815 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 816 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz; 817 818 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 819 820 *B = 0; 821 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 822 PLogObjectCreate(C); 823 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 824 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 825 C->destroy = MatDestroy_SeqBAIJ; 826 C->view = MatView_SeqBAIJ; 827 C->factor = A->factor; 828 c->row = 0; 829 c->col = 0; 830 C->assembled = PETSC_TRUE; 831 832 c->m = a->m; 833 c->n = a->n; 834 c->bs = a->bs; 835 c->mbs = a->mbs; 836 c->nbs = a->nbs; 837 838 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 839 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 840 for ( i=0; i<mbs; i++ ) { 841 c->imax[i] = a->imax[i]; 842 c->ilen[i] = a->ilen[i]; 843 } 844 845 /* allocate the matrix space */ 846 c->singlemalloc = 1; 847 len = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int)); 848 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 849 c->j = (int *) (c->a + nz*bs*bs); 850 c->i = c->j + nz; 851 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 852 if (mbs > 0) { 853 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 854 if (cpvalues == COPY_VALUES) { 855 PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar)); 856 } 857 } 858 859 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 860 c->sorted = a->sorted; 861 c->roworiented = a->roworiented; 862 c->nonew = a->nonew; 863 864 if (a->diag) { 865 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 866 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 867 for ( i=0; i<mbs; i++ ) { 868 c->diag[i] = a->diag[i]; 869 } 870 } 871 else c->diag = 0; 872 c->nz = a->nz; 873 c->maxnz = a->maxnz; 874 c->solve_work = 0; 875 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 876 c->mult_work = 0; 877 *B = C; 878 return 0; 879 } 880 881 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 882 { 883 Mat_SeqBAIJ *a; 884 Mat B; 885 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 886 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 887 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 888 int *masked, nmask,tmp,ishift,bs2; 889 Scalar *aa; 890 MPI_Comm comm = ((PetscObject) viewer)->comm; 891 892 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 893 bs2 = bs*bs; 894 895 MPI_Comm_size(comm,&size); 896 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 897 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 898 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 899 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 900 M = header[1]; N = header[2]; nz = header[3]; 901 902 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 903 904 /* 905 This code adds extra rows to make sure the number of rows is 906 divisible by the blocksize 907 */ 908 mbs = M/bs; 909 extra_rows = bs - M + bs*(mbs); 910 if (extra_rows == bs) extra_rows = 0; 911 else mbs++; 912 if (extra_rows) { 913 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 914 } 915 916 /* read in row lengths */ 917 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 918 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 919 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 920 921 /* read in column indices */ 922 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 923 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 924 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 925 926 /* loop over row lengths determining block row lengths */ 927 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 928 PetscMemzero(browlengths,mbs*sizeof(int)); 929 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 930 PetscMemzero(mask,mbs*sizeof(int)); 931 masked = mask + mbs; 932 rowcount = 0; nzcount = 0; 933 for ( i=0; i<mbs; i++ ) { 934 nmask = 0; 935 for ( j=0; j<bs; j++ ) { 936 kmax = rowlengths[rowcount]; 937 for ( k=0; k<kmax; k++ ) { 938 tmp = jj[nzcount++]/bs; 939 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 940 } 941 rowcount++; 942 } 943 browlengths[i] += nmask; 944 /* zero out the mask elements we set */ 945 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 946 } 947 948 /* create our matrix */ 949 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 950 CHKERRQ(ierr); 951 B = *A; 952 a = (Mat_SeqBAIJ *) B->data; 953 954 /* set matrix "i" values */ 955 a->i[0] = 0; 956 for ( i=1; i<= mbs; i++ ) { 957 a->i[i] = a->i[i-1] + browlengths[i-1]; 958 a->ilen[i-1] = browlengths[i-1]; 959 } 960 a->indexshift = 0; 961 a->nz = 0; 962 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 963 964 /* read in nonzero values */ 965 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 966 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 967 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 968 969 /* set "a" and "j" values into matrix */ 970 nzcount = 0; jcount = 0; 971 for ( i=0; i<mbs; i++ ) { 972 nzcountb = nzcount; 973 nmask = 0; 974 for ( j=0; j<bs; j++ ) { 975 kmax = rowlengths[i*bs+j]; 976 for ( k=0; k<kmax; k++ ) { 977 tmp = jj[nzcount++]/bs; 978 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 979 } 980 rowcount++; 981 } 982 /* sort the masked values */ 983 PetscSortInt(nmask,masked); 984 985 /* set "j" values into matrix */ 986 maskcount = 1; 987 for ( j=0; j<nmask; j++ ) { 988 a->j[jcount++] = masked[j]; 989 mask[masked[j]] = maskcount++; 990 } 991 /* set "a" values into matrix */ 992 ishift = bs2*a->i[i]; 993 for ( j=0; j<bs; j++ ) { 994 kmax = rowlengths[i*bs+j]; 995 for ( k=0; k<kmax; k++ ) { 996 tmp = jj[nzcountb]/bs ; 997 block = mask[tmp] - 1; 998 point = jj[nzcountb] - bs*tmp; 999 idx = ishift + bs2*block + j + bs*point; 1000 a->a[idx] = aa[nzcountb++]; 1001 } 1002 } 1003 /* zero out the mask elements we set */ 1004 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1005 } 1006 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1007 1008 PetscFree(rowlengths); 1009 PetscFree(browlengths); 1010 PetscFree(aa); 1011 PetscFree(jj); 1012 PetscFree(mask); 1013 1014 B->assembled = PETSC_TRUE; 1015 1016 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1017 if (flg) { 1018 Viewer tviewer; 1019 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1020 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1021 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1022 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1023 } 1024 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1025 if (flg) { 1026 Viewer tviewer; 1027 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1028 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1029 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1030 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1031 } 1032 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1033 if (flg) { 1034 Viewer tviewer; 1035 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1036 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1037 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1038 } 1039 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1040 if (flg) { 1041 Viewer tviewer; 1042 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1043 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1044 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1045 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1046 } 1047 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1048 if (flg) { 1049 Viewer tviewer; 1050 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1051 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1052 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1053 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1054 } 1055 return 0; 1056 } 1057 1058 1059 1060