1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.21 1996/03/27 18:59:56 balay Exp curfman $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = a->indexshift; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45 CHKERRQ(ierr); 46 ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 47 PetscFree(ia); PetscFree(ja); 48 } else { 49 if (ishift == oshift) { 50 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51 } 52 else if (ishift == -1) { 53 /* temporarily subtract 1 from i and j indices */ 54 int nz = a->i[a->n] - 1; 55 for ( i=0; i<nz; i++ ) a->j[i]--; 56 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58 for ( i=0; i<nz; i++ ) a->j[i]++; 59 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60 } else { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[a->n] - 1; 63 for ( i=0; i<nz; i++ ) a->j[i]++; 64 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66 for ( i=0; i<nz; i++ ) a->j[i]--; 67 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68 } 69 } 70 return 0; 71 } 72 73 /* 74 Adds diagonal pointers to sparse matrix structure. 75 */ 76 77 int MatMarkDiag_SeqBAIJ(Mat A) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,j, *diag, m = a->mbs; 81 82 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83 PLogObjectMemory(A,(m+1)*sizeof(int)); 84 for ( i=0; i<m; i++ ) { 85 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86 if (a->j[j] == i) { 87 diag[i] = j; 88 break; 89 } 90 } 91 } 92 a->diag = diag; 93 return 0; 94 } 95 96 #include "draw.h" 97 #include "pinclude/pviewer.h" 98 #include "sys.h" 99 100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 101 { 102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 103 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l; 104 Scalar *aa; 105 106 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 107 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 108 col_lens[0] = MAT_COOKIE; 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs*bs; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs*bs*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs*bs*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs*bs*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 return 0; 153 } 154 155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 156 { 157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 158 int ierr, i,j,format,bs = a->bs,k,l; 159 FILE *fd; 160 char *outputname; 161 162 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 163 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 164 ierr = ViewerGetFormat(viewer,&format); 165 if (format == ASCII_FORMAT_INFO) { 166 /* no need to print additional information */ ; 167 } 168 else if (format == ASCII_FORMAT_MATLAB) { 169 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 170 } 171 else { 172 for ( i=0; i<a->mbs; i++ ) { 173 for ( j=0; j<bs; j++ ) { 174 fprintf(fd,"row %d:",i*bs+j); 175 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 176 for ( l=0; l<bs; l++ ) { 177 #if defined(PETSC_COMPLEX) 178 if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) { 179 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 180 real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j])); 181 } 182 else { 183 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j])); 184 } 185 #else 186 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]); 187 #endif 188 } 189 } 190 fprintf(fd,"\n"); 191 } 192 } 193 } 194 fflush(fd); 195 return 0; 196 } 197 198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 199 { 200 Mat A = (Mat) obj; 201 ViewerType vtype; 202 int ierr; 203 204 if (!viewer) { 205 viewer = STDOUT_VIEWER_SELF; 206 } 207 208 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 209 if (vtype == MATLAB_VIEWER) { 210 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 211 } 212 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 213 return MatView_SeqBAIJ_ASCII(A,viewer); 214 } 215 else if (vtype == BINARY_FILE_VIEWER) { 216 return MatView_SeqBAIJ_Binary(A,viewer); 217 } 218 else if (vtype == DRAW_VIEWER) { 219 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 220 } 221 return 0; 222 } 223 224 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 225 { 226 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 227 *m = a->m; *n = a->n; 228 return 0; 229 } 230 231 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 232 { 233 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 234 *m = 0; *n = a->m; 235 return 0; 236 } 237 238 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 239 { 240 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 241 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i; 242 Scalar *aa,*v_i,*aa_i; 243 244 bs = a->bs; 245 ai = a->i; 246 aj = a->j; 247 aa = a->a; 248 249 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 250 251 bn = row/bs; /* Block number */ 252 bp = row % bs; /* Block Position */ 253 M = ai[bn+1] - ai[bn]; 254 *nz = bs*M; 255 256 if (v) { 257 *v = 0; 258 if (*nz) { 259 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 260 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 261 v_i = *v + i*bs; 262 aa_i = aa + bs*bs*(ai[bn] + i); 263 for ( j=bp,k=0; j<bs*bs; j+=bs,k++ ) {v_i[k] = aa_i[j];} 264 } 265 } 266 } 267 268 if (idx) { 269 *idx = 0; 270 if (*nz) { 271 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 272 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 273 idx_i = *idx + i*bs; 274 itmp = bs*aj[ai[bn] + i]; 275 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 276 } 277 } 278 } 279 return 0; 280 } 281 282 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 283 { 284 if (idx) {if (*idx) PetscFree(*idx);} 285 if (v) {if (*v) PetscFree(*v);} 286 return 0; 287 } 288 289 static int MatZeroEntries_SeqBAIJ(Mat A) 290 { 291 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 292 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 293 return 0; 294 } 295 296 int MatDestroy_SeqBAIJ(PetscObject obj) 297 { 298 Mat A = (Mat) obj; 299 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 300 301 #if defined(PETSC_LOG) 302 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 303 #endif 304 PetscFree(a->a); 305 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 306 if (a->diag) PetscFree(a->diag); 307 if (a->ilen) PetscFree(a->ilen); 308 if (a->imax) PetscFree(a->imax); 309 if (a->solve_work) PetscFree(a->solve_work); 310 if (a->mult_work) PetscFree(a->mult_work); 311 PetscFree(a); 312 return 0; 313 } 314 315 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 316 { 317 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 318 if (op == ROW_ORIENTED) a->roworiented = 1; 319 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 320 else if (op == COLUMNS_SORTED) a->sorted = 1; 321 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 322 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 323 else if (op == ROWS_SORTED || 324 op == SYMMETRIC_MATRIX || 325 op == STRUCTURALLY_SYMMETRIC_MATRIX || 326 op == YES_NEW_DIAGONALS) 327 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 328 else if (op == NO_NEW_DIAGONALS) 329 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 330 else 331 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 332 return 0; 333 } 334 335 336 /* -------------------------------------------------------*/ 337 /* Should check that shapes of vectors and matrices match */ 338 /* -------------------------------------------------------*/ 339 #include "pinclude/plapack.h" 340 341 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 342 { 343 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 344 Scalar *xg,*yg; 345 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 346 register Scalar x1,x2,x3,x4,x5; 347 int mbs = a->mbs, m = a->m, i, *idx,*ii; 348 int bs = a->bs,j,n,bs2 = bs*bs; 349 350 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 351 PetscMemzero(y,m*sizeof(Scalar)); 352 x = x; 353 idx = a->j; 354 v = a->a; 355 ii = a->i; 356 357 switch (bs) { 358 case 1: 359 for ( i=0; i<m; i++ ) { 360 n = ii[1] - ii[0]; ii++; 361 sum = 0.0; 362 while (n--) sum += *v++ * x[*idx++]; 363 y[i] = sum; 364 } 365 break; 366 case 2: 367 for ( i=0; i<mbs; i++ ) { 368 n = ii[1] - ii[0]; ii++; 369 sum1 = 0.0; sum2 = 0.0; 370 for ( j=0; j<n; j++ ) { 371 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 372 sum1 += v[0]*x1 + v[2]*x2; 373 sum2 += v[1]*x1 + v[3]*x2; 374 v += 4; 375 } 376 y[0] += sum1; y[1] += sum2; 377 y += 2; 378 } 379 break; 380 case 3: 381 for ( i=0; i<mbs; i++ ) { 382 n = ii[1] - ii[0]; ii++; 383 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 384 for ( j=0; j<n; j++ ) { 385 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 386 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 387 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 388 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 389 v += 9; 390 } 391 y[0] += sum1; y[1] += sum2; y[2] += sum3; 392 y += 3; 393 } 394 break; 395 case 4: 396 for ( i=0; i<mbs; i++ ) { 397 n = ii[1] - ii[0]; ii++; 398 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 399 for ( j=0; j<n; j++ ) { 400 xb = x + 4*(*idx++); 401 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 402 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 403 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 404 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 405 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 406 v += 16; 407 } 408 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 409 y += 4; 410 } 411 break; 412 case 5: 413 for ( i=0; i<mbs; i++ ) { 414 n = ii[1] - ii[0]; ii++; 415 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 416 for ( j=0; j<n; j++ ) { 417 xb = x + 5*(*idx++); 418 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 419 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 420 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 421 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 422 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 423 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 424 v += 25; 425 } 426 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 427 y += 5; 428 } 429 break; 430 /* block sizes larger then 5 by 5 are handled by BLAS */ 431 default: { 432 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 433 if (!a->mult_work) { 434 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 435 CHKPTRQ(a->mult_work); 436 } 437 work = a->mult_work; 438 for ( i=0; i<mbs; i++ ) { 439 n = ii[1] - ii[0]; ii++; 440 ncols = n*bs; 441 workt = work; 442 for ( j=0; j<n; j++ ) { 443 xb = x + bs*(*idx++); 444 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 445 workt += bs; 446 } 447 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 448 v += n*bs2; 449 y += bs; 450 } 451 } 452 } 453 PLogFlops(2*bs2*a->nz - m); 454 return 0; 455 } 456 457 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 458 { 459 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 460 if (nz) *nz = a->bs*a->bs*a->nz; 461 if (nza) *nza = a->maxnz; 462 if (mem) *mem = (int)A->mem; 463 return 0; 464 } 465 466 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 467 { 468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 469 470 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 471 472 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 473 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 474 (a->nz != b->nz)||(a->indexshift != b->indexshift)) { 475 *flg = PETSC_FALSE; return 0; 476 } 477 478 /* if the a->i are the same */ 479 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 480 *flg = PETSC_FALSE; return 0; 481 } 482 483 /* if a->j are the same */ 484 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 485 *flg = PETSC_FALSE; return 0; 486 } 487 488 /* if a->a are the same */ 489 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 490 *flg = PETSC_FALSE; return 0; 491 } 492 *flg = PETSC_TRUE; 493 return 0; 494 495 } 496 497 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 498 { 499 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 500 int i,j,k,n,row,bs,*ai,*aj,ambs; 501 Scalar *x, zero = 0.0,*aa,*aa_j; 502 503 bs = a->bs; 504 aa = a->a; 505 ai = a->i; 506 aj = a->j; 507 ambs = a->mbs; 508 509 VecSet(&zero,v); 510 VecGetArray(v,&x); VecGetLocalSize(v,&n); 511 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 512 for ( i=0; i<ambs; i++ ) { 513 for ( j=ai[i]; j<ai[i+1]; j++ ) { 514 if (aj[j] == i) { 515 row = i*bs; 516 aa_j = aa+j*bs*bs; 517 for (k=0; k<bs*bs; k+=(bs+1),row++) x[row] = aa_j[k]; 518 break; 519 } 520 } 521 } 522 return 0; 523 } 524 525 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 526 { 527 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 528 Scalar *l,*r,x,*v,*aa,*li,*ri; 529 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs; 530 531 ai = a->i; 532 aj = a->j; 533 aa = a->a; 534 m = a->m; 535 n = a->n; 536 bs = a->bs; 537 mbs = a->mbs; 538 539 if (ll) { 540 VecGetArray(ll,&l); VecGetSize(ll,&lm); 541 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 542 for ( i=0; i<mbs; i++ ) { /* for each block row */ 543 M = ai[i+1] - ai[i]; 544 li = l + i*bs; 545 v = aa + bs*bs*ai[i]; 546 for ( j=0; j<M; j++ ) { /* for each block */ 547 for ( k=0; k<bs*bs; k++ ) { 548 (*v++) *= li[k%bs]; 549 } 550 } 551 } 552 } 553 554 if (rr) { 555 VecGetArray(rr,&r); VecGetSize(rr,&rn); 556 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 557 for ( i=0; i<mbs; i++ ) { /* for each block row */ 558 M = ai[i+1] - ai[i]; 559 v = aa + bs*bs*ai[i]; 560 for ( j=0; j<M; j++ ) { /* for each block */ 561 ri = r + bs*aj[ai[i]+j]; 562 for ( k=0; k<bs; k++ ) { 563 x = ri[k]; 564 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 565 } 566 } 567 } 568 } 569 return 0; 570 } 571 572 573 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 574 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 575 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 576 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 577 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 578 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 579 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 580 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 581 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 582 583 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 584 { 585 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 586 Scalar *v = a->a; 587 double sum = 0.0; 588 int i; 589 590 if (type == NORM_FROBENIUS) { 591 for (i=0; i<a->nz; i++ ) { 592 #if defined(PETSC_COMPLEX) 593 sum += real(conj(*v)*(*v)); v++; 594 #else 595 sum += (*v)*(*v); v++; 596 #endif 597 } 598 *norm = sqrt(sum); 599 } 600 else { 601 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 602 } 603 return 0; 604 } 605 606 /* 607 note: This can only work for identity for row and col. It would 608 be good to check this and otherwise generate an error. 609 */ 610 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 611 { 612 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 613 Mat outA; 614 int ierr; 615 616 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 617 618 outA = inA; 619 inA->factor = FACTOR_LU; 620 a->row = row; 621 a->col = col; 622 623 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 624 625 if (!a->diag) { 626 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 627 } 628 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 629 return 0; 630 } 631 632 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 633 { 634 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 635 int one = 1, totalnz = a->bs*a->bs*a->nz; 636 BLscal_( &totalnz, alpha, a->a, &one ); 637 PLogFlops(totalnz); 638 return 0; 639 } 640 641 int MatPrintHelp_SeqBAIJ(Mat A) 642 { 643 static int called = 0; 644 645 if (called) return 0; else called = 1; 646 return 0; 647 } 648 /* -------------------------------------------------------------------*/ 649 static struct _MatOps MatOps = {0, 650 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 651 MatMult_SeqBAIJ,0, 652 0,0, 653 MatSolve_SeqBAIJ,0, 654 0,0, 655 MatLUFactor_SeqBAIJ,0, 656 0, 657 0, 658 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 659 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 660 0,0, 661 0, 662 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 663 MatGetReordering_SeqBAIJ, 664 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 665 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 666 MatILUFactorSymbolic_SeqBAIJ,0, 667 0,0,/* MatConvert_SeqBAIJ */ 0, 668 0,0, 669 MatConvertSameType_SeqBAIJ,0,0, 670 MatILUFactor_SeqBAIJ,0,0, 671 0,0, 672 0,0, 673 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 674 0}; 675 676 /*@C 677 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 678 (the default parallel PETSc format). For good matrix assembly performance 679 the user should preallocate the matrix storage by setting the parameter nz 680 (or nzz). By setting these parameters accurately, performance can be 681 increased by more than a factor of 50. 682 683 Input Parameters: 684 . comm - MPI communicator, set to MPI_COMM_SELF 685 . bs - size of block 686 . m - number of rows 687 . n - number of columns 688 . nz - number of block nonzeros per block row (same for all rows) 689 . nzz - number of block nonzeros per block row or PETSC_NULL 690 (possibly different for each row) 691 692 Output Parameter: 693 . A - the matrix 694 695 Notes: 696 The BAIJ format, is fully compatible with standard Fortran 77 697 storage. That is, the stored row and column indices can begin at 698 either one (as in Fortran) or zero. See the users manual for details. 699 700 Specify the preallocated storage with either nz or nnz (not both). 701 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 702 allocation. For additional details, see the users manual chapter on 703 matrices and the file $(PETSC_DIR)/Performance. 704 705 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 706 @*/ 707 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 708 { 709 Mat B; 710 Mat_SeqBAIJ *b; 711 int i,len,ierr, flg,mbs = m/bs; 712 713 if (mbs*bs != m) 714 SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize"); 715 716 *A = 0; 717 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 718 PLogObjectCreate(B); 719 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 720 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 721 switch (bs) { 722 case 1: 723 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 724 break; 725 case 2: 726 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 727 break; 728 case 3: 729 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 730 break; 731 case 4: 732 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 733 break; 734 case 5: 735 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 736 break; 737 } 738 B->destroy = MatDestroy_SeqBAIJ; 739 B->view = MatView_SeqBAIJ; 740 B->factor = 0; 741 B->lupivotthreshold = 1.0; 742 b->row = 0; 743 b->col = 0; 744 b->reallocs = 0; 745 746 b->m = m; 747 b->mbs = mbs; 748 b->n = n; 749 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 750 if (nnz == PETSC_NULL) { 751 if (nz == PETSC_DEFAULT) nz = 5; 752 else if (nz <= 0) nz = 1; 753 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 754 nz = nz*mbs; 755 } 756 else { 757 nz = 0; 758 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 759 } 760 761 /* allocate the matrix space */ 762 len = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int); 763 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 764 PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar)); 765 b->j = (int *) (b->a + nz*bs*bs); 766 PetscMemzero(b->j,nz*sizeof(int)); 767 b->i = b->j + nz; 768 b->singlemalloc = 1; 769 770 b->i[0] = 0; 771 for (i=1; i<mbs+1; i++) { 772 b->i[i] = b->i[i-1] + b->imax[i-1]; 773 } 774 775 /* b->ilen will count nonzeros in each block row so far. */ 776 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 777 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 778 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 779 780 b->bs = bs; 781 b->mbs = mbs; 782 b->nz = 0; 783 b->maxnz = nz; 784 b->sorted = 0; 785 b->roworiented = 1; 786 b->nonew = 0; 787 b->diag = 0; 788 b->solve_work = 0; 789 b->mult_work = 0; 790 b->spptr = 0; 791 792 *A = B; 793 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 794 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 795 return 0; 796 } 797 798 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 799 { 800 Mat C; 801 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 802 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz; 803 804 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 805 806 *B = 0; 807 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 808 PLogObjectCreate(C); 809 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 810 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 811 C->destroy = MatDestroy_SeqBAIJ; 812 C->view = MatView_SeqBAIJ; 813 C->factor = A->factor; 814 c->row = 0; 815 c->col = 0; 816 C->assembled = PETSC_TRUE; 817 818 c->m = a->m; 819 c->n = a->n; 820 c->bs = a->bs; 821 c->mbs = a->mbs; 822 c->nbs = a->nbs; 823 824 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 825 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 826 for ( i=0; i<mbs; i++ ) { 827 c->imax[i] = a->imax[i]; 828 c->ilen[i] = a->ilen[i]; 829 } 830 831 /* allocate the matrix space */ 832 c->singlemalloc = 1; 833 len = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int)); 834 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 835 c->j = (int *) (c->a + nz*bs*bs); 836 c->i = c->j + nz; 837 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 838 if (mbs > 0) { 839 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 840 if (cpvalues == COPY_VALUES) { 841 PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar)); 842 } 843 } 844 845 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 846 c->sorted = a->sorted; 847 c->roworiented = a->roworiented; 848 c->nonew = a->nonew; 849 850 if (a->diag) { 851 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 852 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 853 for ( i=0; i<mbs; i++ ) { 854 c->diag[i] = a->diag[i]; 855 } 856 } 857 else c->diag = 0; 858 c->nz = a->nz; 859 c->maxnz = a->maxnz; 860 c->solve_work = 0; 861 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 862 c->mult_work = 0; 863 *B = C; 864 return 0; 865 } 866 867 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 868 { 869 Mat_SeqBAIJ *a; 870 Mat B; 871 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 872 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 873 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 874 int *masked, nmask,tmp,ishift,bs2; 875 Scalar *aa; 876 MPI_Comm comm = ((PetscObject) viewer)->comm; 877 878 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 879 bs2 = bs*bs; 880 881 MPI_Comm_size(comm,&size); 882 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 883 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 884 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 885 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 886 M = header[1]; N = header[2]; nz = header[3]; 887 888 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 889 890 /* 891 This code adds extra rows to make sure the number of rows is 892 divisible by the blocksize 893 */ 894 mbs = M/bs; 895 extra_rows = bs - M + bs*(mbs); 896 if (extra_rows == bs) extra_rows = 0; 897 else mbs++; 898 if (extra_rows) { 899 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 900 } 901 902 /* read in row lengths */ 903 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 904 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 905 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 906 907 /* read in column indices */ 908 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 909 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 910 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 911 912 /* loop over row lengths determining block row lengths */ 913 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 914 PetscMemzero(browlengths,mbs*sizeof(int)); 915 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 916 PetscMemzero(mask,mbs*sizeof(int)); 917 masked = mask + mbs; 918 rowcount = 0; nzcount = 0; 919 for ( i=0; i<mbs; i++ ) { 920 nmask = 0; 921 for ( j=0; j<bs; j++ ) { 922 kmax = rowlengths[rowcount]; 923 for ( k=0; k<kmax; k++ ) { 924 tmp = jj[nzcount++]/bs; 925 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 926 } 927 rowcount++; 928 } 929 browlengths[i] += nmask; 930 /* zero out the mask elements we set */ 931 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 932 } 933 934 /* create our matrix */ 935 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 936 CHKERRQ(ierr); 937 B = *A; 938 a = (Mat_SeqBAIJ *) B->data; 939 940 /* set matrix "i" values */ 941 a->i[0] = 0; 942 for ( i=1; i<= mbs; i++ ) { 943 a->i[i] = a->i[i-1] + browlengths[i-1]; 944 a->ilen[i-1] = browlengths[i-1]; 945 } 946 a->indexshift = 0; 947 a->nz = 0; 948 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 949 950 /* read in nonzero values */ 951 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 952 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 953 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 954 955 /* set "a" and "j" values into matrix */ 956 nzcount = 0; jcount = 0; 957 for ( i=0; i<mbs; i++ ) { 958 nzcountb = nzcount; 959 nmask = 0; 960 for ( j=0; j<bs; j++ ) { 961 kmax = rowlengths[i*bs+j]; 962 for ( k=0; k<kmax; k++ ) { 963 tmp = jj[nzcount++]/bs; 964 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 965 } 966 rowcount++; 967 } 968 /* sort the masked values */ 969 PetscSortInt(nmask,masked); 970 971 /* set "j" values into matrix */ 972 maskcount = 1; 973 for ( j=0; j<nmask; j++ ) { 974 a->j[jcount++] = masked[j]; 975 mask[masked[j]] = maskcount++; 976 } 977 /* set "a" values into matrix */ 978 ishift = bs2*a->i[i]; 979 for ( j=0; j<bs; j++ ) { 980 kmax = rowlengths[i*bs+j]; 981 for ( k=0; k<kmax; k++ ) { 982 tmp = jj[nzcountb]/bs ; 983 block = mask[tmp] - 1; 984 point = jj[nzcountb] - bs*tmp; 985 idx = ishift + bs2*block + j + bs*point; 986 a->a[idx] = aa[nzcountb++]; 987 } 988 } 989 /* zero out the mask elements we set */ 990 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 991 } 992 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 993 994 PetscFree(rowlengths); 995 PetscFree(browlengths); 996 PetscFree(aa); 997 PetscFree(jj); 998 PetscFree(mask); 999 1000 B->assembled = PETSC_TRUE; 1001 1002 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1003 if (flg) { 1004 Viewer tviewer; 1005 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1006 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1007 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1008 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1009 } 1010 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1011 if (flg) { 1012 Viewer tviewer; 1013 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1014 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1015 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1016 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1017 } 1018 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1019 if (flg) { 1020 Viewer tviewer; 1021 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1022 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1023 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1024 } 1025 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1026 if (flg) { 1027 Viewer tviewer; 1028 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1029 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1030 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1031 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1032 } 1033 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1034 if (flg) { 1035 Viewer tviewer; 1036 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1037 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1038 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1039 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1040 } 1041 return 0; 1042 } 1043 1044 1045 1046