1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.25 1996/04/04 00:27:51 balay Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = a->indexshift; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45 CHKERRQ(ierr); 46 ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 47 PetscFree(ia); PetscFree(ja); 48 } else { 49 if (ishift == oshift) { 50 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51 } 52 else if (ishift == -1) { 53 /* temporarily subtract 1 from i and j indices */ 54 int nz = a->i[a->n] - 1; 55 for ( i=0; i<nz; i++ ) a->j[i]--; 56 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58 for ( i=0; i<nz; i++ ) a->j[i]++; 59 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60 } else { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[a->n] - 1; 63 for ( i=0; i<nz; i++ ) a->j[i]++; 64 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66 for ( i=0; i<nz; i++ ) a->j[i]--; 67 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68 } 69 } 70 return 0; 71 } 72 73 /* 74 Adds diagonal pointers to sparse matrix structure. 75 */ 76 77 int MatMarkDiag_SeqBAIJ(Mat A) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,j, *diag, m = a->mbs; 81 82 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83 PLogObjectMemory(A,(m+1)*sizeof(int)); 84 for ( i=0; i<m; i++ ) { 85 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86 if (a->j[j] == i) { 87 diag[i] = j; 88 break; 89 } 90 } 91 } 92 a->diag = diag; 93 return 0; 94 } 95 96 #include "draw.h" 97 #include "pinclude/pviewer.h" 98 #include "sys.h" 99 100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 101 { 102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 103 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l; 104 Scalar *aa; 105 106 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 107 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 108 col_lens[0] = MAT_COOKIE; 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs*bs; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( a->nz*bs*bs*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs*bs*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc(a->nz*bs*bs*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs*bs*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs*bs*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 return 0; 153 } 154 155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 156 { 157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 158 int ierr, i,j,format,bs = a->bs,k,l; 159 FILE *fd; 160 char *outputname; 161 162 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 163 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 164 ierr = ViewerGetFormat(viewer,&format); 165 if (format == ASCII_FORMAT_INFO) { 166 /* no need to print additional information */ ; 167 } 168 else if (format == ASCII_FORMAT_MATLAB) { 169 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 170 } 171 else { 172 for ( i=0; i<a->mbs; i++ ) { 173 for ( j=0; j<bs; j++ ) { 174 fprintf(fd,"row %d:",i*bs+j); 175 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 176 for ( l=0; l<bs; l++ ) { 177 #if defined(PETSC_COMPLEX) 178 if (imag(a->a[bs*bs*k + l*bs + j]) != 0.0) { 179 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 180 real(a->a[bs*bs*k + l*bs + j]),imag(a->a[bs*bs*k + l*bs + j])); 181 } 182 else { 183 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs*bs*k + l*bs + j])); 184 } 185 #else 186 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs*bs*k + l*bs + j]); 187 #endif 188 } 189 } 190 fprintf(fd,"\n"); 191 } 192 } 193 } 194 fflush(fd); 195 return 0; 196 } 197 198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 199 { 200 Mat A = (Mat) obj; 201 ViewerType vtype; 202 int ierr; 203 204 if (!viewer) { 205 viewer = STDOUT_VIEWER_SELF; 206 } 207 208 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 209 if (vtype == MATLAB_VIEWER) { 210 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 211 } 212 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 213 return MatView_SeqBAIJ_ASCII(A,viewer); 214 } 215 else if (vtype == BINARY_FILE_VIEWER) { 216 return MatView_SeqBAIJ_Binary(A,viewer); 217 } 218 else if (vtype == DRAW_VIEWER) { 219 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 220 } 221 return 0; 222 } 223 224 #define CHUNKSIZE 2 225 226 /* This version has row oriented v */ 227 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 228 { 229 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 230 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 231 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 232 int *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol; 233 int ridx,cidx; 234 Scalar *ap,value,*aa=a->a,*bap; 235 236 for ( k=0; k<m; k++ ) { /* loop over added rows */ 237 row = im[k]; brow = row/bs; 238 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 239 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 240 rp = aj + ai[brow] + shift; ap = aa + bs*bs*ai[brow] + shift; 241 rmax = imax[brow]; nrow = ailen[brow]; 242 low = 0; 243 for ( l=0; l<n; l++ ) { /* loop over added columns */ 244 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 245 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 246 col = in[l] - shift; bcol = col/bs; 247 ridx = row % bs; cidx = col % bs; 248 if (roworiented) { 249 value = *v++; 250 } 251 else { 252 value = v[k + l*m]; 253 } 254 if (!sorted) low = 0; high = nrow; 255 while (high-low > 5) { 256 t = (low+high)/2; 257 if (rp[t] > bcol) high = t; 258 else low = t; 259 } 260 for ( i=low; i<high; i++ ) { 261 if (rp[i] > bcol) break; 262 if (rp[i] == bcol) { 263 bap = ap + bs*bs*i + bs*ridx + cidx; 264 if (is == ADD_VALUES) *bap += value; 265 else *bap = value; 266 goto noinsert; 267 } 268 } 269 if (nonew) goto noinsert; 270 if (nrow >= rmax) { 271 /* there is no extra room in row, therefore enlarge */ 272 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 273 Scalar *new_a; 274 275 /* malloc new storage space */ 276 len = new_nz*(sizeof(int)+bs*bs*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 277 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 278 new_j = (int *) (new_a + bs*bs*new_nz); 279 new_i = new_j + new_nz; 280 281 /* copy over old data into new slots */ 282 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 283 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 284 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); 285 len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift); 286 PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow, 287 len*sizeof(int)); 288 PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs*bs*sizeof(Scalar)); 289 PetscMemzero(new_a+bs*bs*(ai[brow]+shift+nrow),bs*bs*CHUNKSIZE); 290 PetscMemcpy(new_a+bs*bs*(ai[brow]+shift+nrow+CHUNKSIZE), 291 aa+bs*bs*(ai[brow]+shift+nrow),bs*bs*len*sizeof(Scalar)); 292 /* free up old matrix storage */ 293 PetscFree(a->a); 294 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 295 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 296 a->singlemalloc = 1; 297 298 rp = aj + ai[brow] + shift; ap = aa + ai[brow] + shift; 299 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 300 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs*bs*sizeof(Scalar))); 301 a->maxnz += bs*bs*CHUNKSIZE; 302 a->reallocs++; 303 } 304 N = nrow++ - 1; a->nz++; 305 /* shift up all the later entries in this row */ 306 for ( ii=N; ii>=i; ii-- ) { 307 rp[ii+1] = rp[ii]; 308 PetscMemcpy(ap+bs*bs*(ii+1),ap+bs*bs*(ii),bs*bs); 309 } 310 rp[i] = bcol; 311 ap[bs*bs*i + bs*ridx + cidx] = value; 312 noinsert:; 313 low = i; 314 } 315 ailen[brow] = nrow; 316 } 317 return 0; 318 } 319 320 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 321 { 322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 323 *m = a->m; *n = a->n; 324 return 0; 325 } 326 327 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 328 { 329 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 330 *m = 0; *n = a->m; 331 return 0; 332 } 333 334 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 335 { 336 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 337 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i; 338 Scalar *aa,*v_i,*aa_i; 339 340 bs = a->bs; 341 ai = a->i; 342 aj = a->j; 343 aa = a->a; 344 345 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 346 347 bn = row/bs; /* Block number */ 348 bp = row % bs; /* Block Position */ 349 M = ai[bn+1] - ai[bn]; 350 *nz = bs*M; 351 352 if (v) { 353 *v = 0; 354 if (*nz) { 355 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 356 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 357 v_i = *v + i*bs; 358 aa_i = aa + bs*bs*(ai[bn] + i); 359 for ( j=bp,k=0; j<bs*bs; j+=bs,k++ ) {v_i[k] = aa_i[j];} 360 } 361 } 362 } 363 364 if (idx) { 365 *idx = 0; 366 if (*nz) { 367 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 368 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 369 idx_i = *idx + i*bs; 370 itmp = bs*aj[ai[bn] + i]; 371 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 372 } 373 } 374 } 375 return 0; 376 } 377 378 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 379 { 380 if (idx) {if (*idx) PetscFree(*idx);} 381 if (v) {if (*v) PetscFree(*v);} 382 return 0; 383 } 384 385 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 386 { 387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 388 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 389 int m = a->m, *ip, N, *ailen = a->ilen,shift = a->indexshift; 390 int bs = a->bs, mbs = a->mbs, bs2 = bs*bs; 391 Scalar *aa = a->a, *ap; 392 393 if (mode == FLUSH_ASSEMBLY) return 0; 394 395 for ( i=1; i<mbs; i++ ) { 396 /* move each row back by the amount of empty slots (fshift) before it*/ 397 fshift += imax[i-1] - ailen[i-1]; 398 if (fshift) { 399 ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift; 400 N = ailen[i]; 401 for ( j=0; j<N; j++ ) { 402 ip[j-fshift] = ip[j]; 403 PetscMemcpy(ap+(j-fshift)*bs2,ap+j+bs2,bs2); 404 } 405 } 406 ai[i] = ai[i-1] + ailen[i-1]; 407 } 408 if (mbs) { 409 fshift += imax[mbs-1] - ailen[mbs-1]; 410 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 411 } 412 /* reset ilen and imax for each row */ 413 for ( i=0; i<mbs; i++ ) { 414 ailen[i] = imax[i] = ai[i+1] - ai[i]; 415 } 416 a->nz = (ai[m] + shift)*bs2; 417 418 /* diagonals may have moved, so kill the diagonal pointers */ 419 if (fshift && a->diag) { 420 PetscFree(a->diag); 421 PLogObjectMemory(A,-(m+1)*sizeof(int)); 422 a->diag = 0; 423 } 424 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n", 425 fshift,a->nz,m); 426 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n", 427 a->reallocs); 428 return 0; 429 } 430 431 static int MatZeroEntries_SeqBAIJ(Mat A) 432 { 433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 434 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 435 return 0; 436 } 437 438 int MatDestroy_SeqBAIJ(PetscObject obj) 439 { 440 Mat A = (Mat) obj; 441 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 442 443 #if defined(PETSC_LOG) 444 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 445 #endif 446 PetscFree(a->a); 447 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 448 if (a->diag) PetscFree(a->diag); 449 if (a->ilen) PetscFree(a->ilen); 450 if (a->imax) PetscFree(a->imax); 451 if (a->solve_work) PetscFree(a->solve_work); 452 if (a->mult_work) PetscFree(a->mult_work); 453 PetscFree(a); 454 PLogObjectDestroy(A); 455 PetscHeaderDestroy(A); 456 return 0; 457 } 458 459 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 460 { 461 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 462 if (op == ROW_ORIENTED) a->roworiented = 1; 463 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 464 else if (op == COLUMNS_SORTED) a->sorted = 1; 465 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 466 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 467 else if (op == ROWS_SORTED || 468 op == SYMMETRIC_MATRIX || 469 op == STRUCTURALLY_SYMMETRIC_MATRIX || 470 op == YES_NEW_DIAGONALS) 471 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 472 else if (op == NO_NEW_DIAGONALS) 473 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 474 else 475 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 476 return 0; 477 } 478 479 480 /* -------------------------------------------------------*/ 481 /* Should check that shapes of vectors and matrices match */ 482 /* -------------------------------------------------------*/ 483 #include "pinclude/plapack.h" 484 485 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 486 { 487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 488 Scalar *xg,*yg; 489 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 490 register Scalar x1,x2,x3,x4,x5; 491 int mbs = a->mbs, m = a->m, i, *idx,*ii; 492 int bs = a->bs,j,n,bs2 = bs*bs; 493 494 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 495 PetscMemzero(y,m*sizeof(Scalar)); 496 x = x; 497 idx = a->j; 498 v = a->a; 499 ii = a->i; 500 501 switch (bs) { 502 case 1: 503 for ( i=0; i<m; i++ ) { 504 n = ii[1] - ii[0]; ii++; 505 sum = 0.0; 506 while (n--) sum += *v++ * x[*idx++]; 507 y[i] = sum; 508 } 509 break; 510 case 2: 511 for ( i=0; i<mbs; i++ ) { 512 n = ii[1] - ii[0]; ii++; 513 sum1 = 0.0; sum2 = 0.0; 514 for ( j=0; j<n; j++ ) { 515 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 516 sum1 += v[0]*x1 + v[2]*x2; 517 sum2 += v[1]*x1 + v[3]*x2; 518 v += 4; 519 } 520 y[0] += sum1; y[1] += sum2; 521 y += 2; 522 } 523 break; 524 case 3: 525 for ( i=0; i<mbs; i++ ) { 526 n = ii[1] - ii[0]; ii++; 527 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 528 for ( j=0; j<n; j++ ) { 529 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 530 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 531 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 532 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 533 v += 9; 534 } 535 y[0] += sum1; y[1] += sum2; y[2] += sum3; 536 y += 3; 537 } 538 break; 539 case 4: 540 for ( i=0; i<mbs; i++ ) { 541 n = ii[1] - ii[0]; ii++; 542 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 543 for ( j=0; j<n; j++ ) { 544 xb = x + 4*(*idx++); 545 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 546 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 547 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 548 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 549 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 550 v += 16; 551 } 552 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 553 y += 4; 554 } 555 break; 556 case 5: 557 for ( i=0; i<mbs; i++ ) { 558 n = ii[1] - ii[0]; ii++; 559 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 560 for ( j=0; j<n; j++ ) { 561 xb = x + 5*(*idx++); 562 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 563 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 564 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 565 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 566 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 567 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 568 v += 25; 569 } 570 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 571 y += 5; 572 } 573 break; 574 /* block sizes larger then 5 by 5 are handled by BLAS */ 575 default: { 576 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 577 if (!a->mult_work) { 578 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 579 CHKPTRQ(a->mult_work); 580 } 581 work = a->mult_work; 582 for ( i=0; i<mbs; i++ ) { 583 n = ii[1] - ii[0]; ii++; 584 ncols = n*bs; 585 workt = work; 586 for ( j=0; j<n; j++ ) { 587 xb = x + bs*(*idx++); 588 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 589 workt += bs; 590 } 591 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 592 v += n*bs2; 593 y += bs; 594 } 595 } 596 } 597 PLogFlops(2*bs2*a->nz - m); 598 return 0; 599 } 600 601 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 602 { 603 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 604 if (nz) *nz = a->bs*a->bs*a->nz; 605 if (nza) *nza = a->maxnz; 606 if (mem) *mem = (int)A->mem; 607 return 0; 608 } 609 610 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 611 { 612 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 613 614 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 615 616 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 617 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 618 (a->nz != b->nz)||(a->indexshift != b->indexshift)) { 619 *flg = PETSC_FALSE; return 0; 620 } 621 622 /* if the a->i are the same */ 623 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 624 *flg = PETSC_FALSE; return 0; 625 } 626 627 /* if a->j are the same */ 628 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 629 *flg = PETSC_FALSE; return 0; 630 } 631 632 /* if a->a are the same */ 633 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 634 *flg = PETSC_FALSE; return 0; 635 } 636 *flg = PETSC_TRUE; 637 return 0; 638 639 } 640 641 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 642 { 643 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 644 int i,j,k,n,row,bs,*ai,*aj,ambs; 645 Scalar *x, zero = 0.0,*aa,*aa_j; 646 647 bs = a->bs; 648 aa = a->a; 649 ai = a->i; 650 aj = a->j; 651 ambs = a->mbs; 652 653 VecSet(&zero,v); 654 VecGetArray(v,&x); VecGetLocalSize(v,&n); 655 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 656 for ( i=0; i<ambs; i++ ) { 657 for ( j=ai[i]; j<ai[i+1]; j++ ) { 658 if (aj[j] == i) { 659 row = i*bs; 660 aa_j = aa+j*bs*bs; 661 for (k=0; k<bs*bs; k+=(bs+1),row++) x[row] = aa_j[k]; 662 break; 663 } 664 } 665 } 666 return 0; 667 } 668 669 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 670 { 671 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 672 Scalar *l,*r,x,*v,*aa,*li,*ri; 673 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs; 674 675 ai = a->i; 676 aj = a->j; 677 aa = a->a; 678 m = a->m; 679 n = a->n; 680 bs = a->bs; 681 mbs = a->mbs; 682 683 if (ll) { 684 VecGetArray(ll,&l); VecGetSize(ll,&lm); 685 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 686 for ( i=0; i<mbs; i++ ) { /* for each block row */ 687 M = ai[i+1] - ai[i]; 688 li = l + i*bs; 689 v = aa + bs*bs*ai[i]; 690 for ( j=0; j<M; j++ ) { /* for each block */ 691 for ( k=0; k<bs*bs; k++ ) { 692 (*v++) *= li[k%bs]; 693 } 694 } 695 } 696 } 697 698 if (rr) { 699 VecGetArray(rr,&r); VecGetSize(rr,&rn); 700 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 701 for ( i=0; i<mbs; i++ ) { /* for each block row */ 702 M = ai[i+1] - ai[i]; 703 v = aa + bs*bs*ai[i]; 704 for ( j=0; j<M; j++ ) { /* for each block */ 705 ri = r + bs*aj[ai[i]+j]; 706 for ( k=0; k<bs; k++ ) { 707 x = ri[k]; 708 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 709 } 710 } 711 } 712 } 713 return 0; 714 } 715 716 717 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 718 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 719 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 720 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 721 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 722 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 723 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 724 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 725 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 726 727 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 728 { 729 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 730 Scalar *v = a->a; 731 double sum = 0.0; 732 int i; 733 734 if (type == NORM_FROBENIUS) { 735 for (i=0; i<a->nz; i++ ) { 736 #if defined(PETSC_COMPLEX) 737 sum += real(conj(*v)*(*v)); v++; 738 #else 739 sum += (*v)*(*v); v++; 740 #endif 741 } 742 *norm = sqrt(sum); 743 } 744 else { 745 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 746 } 747 return 0; 748 } 749 750 /* 751 note: This can only work for identity for row and col. It would 752 be good to check this and otherwise generate an error. 753 */ 754 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 755 { 756 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 757 Mat outA; 758 int ierr; 759 760 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 761 762 outA = inA; 763 inA->factor = FACTOR_LU; 764 a->row = row; 765 a->col = col; 766 767 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 768 769 if (!a->diag) { 770 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 771 } 772 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 773 return 0; 774 } 775 776 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 777 { 778 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 779 int one = 1, totalnz = a->bs*a->bs*a->nz; 780 BLscal_( &totalnz, alpha, a->a, &one ); 781 PLogFlops(totalnz); 782 return 0; 783 } 784 785 int MatPrintHelp_SeqBAIJ(Mat A) 786 { 787 static int called = 0; 788 789 if (called) return 0; else called = 1; 790 return 0; 791 } 792 /* -------------------------------------------------------------------*/ 793 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 794 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 795 MatMult_SeqBAIJ,0, 796 0,0, 797 MatSolve_SeqBAIJ,0, 798 0,0, 799 MatLUFactor_SeqBAIJ,0, 800 0, 801 0, 802 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 803 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 804 0,MatAssemblyEnd_SeqBAIJ, 805 0, 806 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 807 MatGetReordering_SeqBAIJ, 808 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 809 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 810 MatILUFactorSymbolic_SeqBAIJ,0, 811 0,0,/* MatConvert_SeqBAIJ */ 0, 812 0,0, 813 MatConvertSameType_SeqBAIJ,0,0, 814 MatILUFactor_SeqBAIJ,0,0, 815 0,0, 816 0,0, 817 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 818 0}; 819 820 /*@C 821 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 822 (the default parallel PETSc format). For good matrix assembly performance 823 the user should preallocate the matrix storage by setting the parameter nz 824 (or nzz). By setting these parameters accurately, performance can be 825 increased by more than a factor of 50. 826 827 Input Parameters: 828 . comm - MPI communicator, set to MPI_COMM_SELF 829 . bs - size of block 830 . m - number of rows 831 . n - number of columns 832 . nz - number of block nonzeros per block row (same for all rows) 833 . nzz - number of block nonzeros per block row or PETSC_NULL 834 (possibly different for each row) 835 836 Output Parameter: 837 . A - the matrix 838 839 Notes: 840 The BAIJ format, is fully compatible with standard Fortran 77 841 storage. That is, the stored row and column indices can begin at 842 either one (as in Fortran) or zero. See the users manual for details. 843 844 Specify the preallocated storage with either nz or nnz (not both). 845 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 846 allocation. For additional details, see the users manual chapter on 847 matrices and the file $(PETSC_DIR)/Performance. 848 849 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 850 @*/ 851 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 852 { 853 Mat B; 854 Mat_SeqBAIJ *b; 855 int i,len,ierr, flg,mbs = m/bs; 856 857 if (mbs*bs != m) 858 SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize"); 859 860 *A = 0; 861 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 862 PLogObjectCreate(B); 863 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 864 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 865 switch (bs) { 866 case 1: 867 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 868 break; 869 case 2: 870 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 871 break; 872 case 3: 873 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 874 break; 875 case 4: 876 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 877 break; 878 case 5: 879 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 880 break; 881 } 882 B->destroy = MatDestroy_SeqBAIJ; 883 B->view = MatView_SeqBAIJ; 884 B->factor = 0; 885 B->lupivotthreshold = 1.0; 886 b->row = 0; 887 b->col = 0; 888 b->reallocs = 0; 889 890 b->m = m; 891 b->mbs = mbs; 892 b->n = n; 893 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 894 if (nnz == PETSC_NULL) { 895 if (nz == PETSC_DEFAULT) nz = 5; 896 else if (nz <= 0) nz = 1; 897 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 898 nz = nz*mbs; 899 } 900 else { 901 nz = 0; 902 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 903 } 904 905 /* allocate the matrix space */ 906 len = nz*sizeof(int) + nz*bs*bs*sizeof(Scalar) + (b->m+1)*sizeof(int); 907 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 908 PetscMemzero(b->a,nz*bs*bs*sizeof(Scalar)); 909 b->j = (int *) (b->a + nz*bs*bs); 910 PetscMemzero(b->j,nz*sizeof(int)); 911 b->i = b->j + nz; 912 b->singlemalloc = 1; 913 914 b->i[0] = 0; 915 for (i=1; i<mbs+1; i++) { 916 b->i[i] = b->i[i-1] + b->imax[i-1]; 917 } 918 919 /* b->ilen will count nonzeros in each block row so far. */ 920 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 921 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 922 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 923 924 b->bs = bs; 925 b->mbs = mbs; 926 b->nz = 0; 927 b->maxnz = nz; 928 b->sorted = 0; 929 b->roworiented = 1; 930 b->nonew = 0; 931 b->diag = 0; 932 b->solve_work = 0; 933 b->mult_work = 0; 934 b->spptr = 0; 935 936 *A = B; 937 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 938 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 939 return 0; 940 } 941 942 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 943 { 944 Mat C; 945 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 946 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz; 947 948 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 949 950 *B = 0; 951 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 952 PLogObjectCreate(C); 953 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 954 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 955 C->destroy = MatDestroy_SeqBAIJ; 956 C->view = MatView_SeqBAIJ; 957 C->factor = A->factor; 958 c->row = 0; 959 c->col = 0; 960 C->assembled = PETSC_TRUE; 961 962 c->m = a->m; 963 c->n = a->n; 964 c->bs = a->bs; 965 c->mbs = a->mbs; 966 c->nbs = a->nbs; 967 968 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 969 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 970 for ( i=0; i<mbs; i++ ) { 971 c->imax[i] = a->imax[i]; 972 c->ilen[i] = a->ilen[i]; 973 } 974 975 /* allocate the matrix space */ 976 c->singlemalloc = 1; 977 len = (mbs+1)*sizeof(int) + nz*(bs*bs*sizeof(Scalar) + sizeof(int)); 978 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 979 c->j = (int *) (c->a + nz*bs*bs); 980 c->i = c->j + nz; 981 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 982 if (mbs > 0) { 983 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 984 if (cpvalues == COPY_VALUES) { 985 PetscMemcpy(c->a,a->a,bs*bs*nz*sizeof(Scalar)); 986 } 987 } 988 989 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 990 c->sorted = a->sorted; 991 c->roworiented = a->roworiented; 992 c->nonew = a->nonew; 993 994 if (a->diag) { 995 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 996 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 997 for ( i=0; i<mbs; i++ ) { 998 c->diag[i] = a->diag[i]; 999 } 1000 } 1001 else c->diag = 0; 1002 c->nz = a->nz; 1003 c->maxnz = a->maxnz; 1004 c->solve_work = 0; 1005 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1006 c->mult_work = 0; 1007 *B = C; 1008 return 0; 1009 } 1010 1011 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1012 { 1013 Mat_SeqBAIJ *a; 1014 Mat B; 1015 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1016 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1017 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1018 int *masked, nmask,tmp,ishift,bs2; 1019 Scalar *aa; 1020 MPI_Comm comm = ((PetscObject) viewer)->comm; 1021 1022 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1023 bs2 = bs*bs; 1024 1025 MPI_Comm_size(comm,&size); 1026 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1027 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1028 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1029 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1030 M = header[1]; N = header[2]; nz = header[3]; 1031 1032 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1033 1034 /* 1035 This code adds extra rows to make sure the number of rows is 1036 divisible by the blocksize 1037 */ 1038 mbs = M/bs; 1039 extra_rows = bs - M + bs*(mbs); 1040 if (extra_rows == bs) extra_rows = 0; 1041 else mbs++; 1042 if (extra_rows) { 1043 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 1044 } 1045 1046 /* read in row lengths */ 1047 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1048 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1049 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1050 1051 /* read in column indices */ 1052 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1053 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1054 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1055 1056 /* loop over row lengths determining block row lengths */ 1057 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1058 PetscMemzero(browlengths,mbs*sizeof(int)); 1059 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1060 PetscMemzero(mask,mbs*sizeof(int)); 1061 masked = mask + mbs; 1062 rowcount = 0; nzcount = 0; 1063 for ( i=0; i<mbs; i++ ) { 1064 nmask = 0; 1065 for ( j=0; j<bs; j++ ) { 1066 kmax = rowlengths[rowcount]; 1067 for ( k=0; k<kmax; k++ ) { 1068 tmp = jj[nzcount++]/bs; 1069 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1070 } 1071 rowcount++; 1072 } 1073 browlengths[i] += nmask; 1074 /* zero out the mask elements we set */ 1075 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1076 } 1077 1078 /* create our matrix */ 1079 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1080 CHKERRQ(ierr); 1081 B = *A; 1082 a = (Mat_SeqBAIJ *) B->data; 1083 1084 /* set matrix "i" values */ 1085 a->i[0] = 0; 1086 for ( i=1; i<= mbs; i++ ) { 1087 a->i[i] = a->i[i-1] + browlengths[i-1]; 1088 a->ilen[i-1] = browlengths[i-1]; 1089 } 1090 a->indexshift = 0; 1091 a->nz = 0; 1092 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1093 1094 /* read in nonzero values */ 1095 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1096 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1097 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1098 1099 /* set "a" and "j" values into matrix */ 1100 nzcount = 0; jcount = 0; 1101 for ( i=0; i<mbs; i++ ) { 1102 nzcountb = nzcount; 1103 nmask = 0; 1104 for ( j=0; j<bs; j++ ) { 1105 kmax = rowlengths[i*bs+j]; 1106 for ( k=0; k<kmax; k++ ) { 1107 tmp = jj[nzcount++]/bs; 1108 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1109 } 1110 rowcount++; 1111 } 1112 /* sort the masked values */ 1113 PetscSortInt(nmask,masked); 1114 1115 /* set "j" values into matrix */ 1116 maskcount = 1; 1117 for ( j=0; j<nmask; j++ ) { 1118 a->j[jcount++] = masked[j]; 1119 mask[masked[j]] = maskcount++; 1120 } 1121 /* set "a" values into matrix */ 1122 ishift = bs2*a->i[i]; 1123 for ( j=0; j<bs; j++ ) { 1124 kmax = rowlengths[i*bs+j]; 1125 for ( k=0; k<kmax; k++ ) { 1126 tmp = jj[nzcountb]/bs ; 1127 block = mask[tmp] - 1; 1128 point = jj[nzcountb] - bs*tmp; 1129 idx = ishift + bs2*block + j + bs*point; 1130 a->a[idx] = aa[nzcountb++]; 1131 } 1132 } 1133 /* zero out the mask elements we set */ 1134 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1135 } 1136 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1137 1138 PetscFree(rowlengths); 1139 PetscFree(browlengths); 1140 PetscFree(aa); 1141 PetscFree(jj); 1142 PetscFree(mask); 1143 1144 B->assembled = PETSC_TRUE; 1145 1146 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1147 if (flg) { 1148 Viewer tviewer; 1149 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1150 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1151 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1152 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1153 } 1154 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1155 if (flg) { 1156 Viewer tviewer; 1157 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1158 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1159 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1160 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1161 } 1162 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1163 if (flg) { 1164 Viewer tviewer; 1165 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1166 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1167 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1168 } 1169 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1170 if (flg) { 1171 Viewer tviewer; 1172 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1173 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1174 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1175 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1176 } 1177 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1178 if (flg) { 1179 Viewer tviewer; 1180 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1181 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1182 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1183 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1184 } 1185 return 0; 1186 } 1187 1188 1189 1190