1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.26 1996/04/04 04:07:02 balay Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = a->indexshift; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45 CHKERRQ(ierr); 46 ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 47 PetscFree(ia); PetscFree(ja); 48 } else { 49 if (ishift == oshift) { 50 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51 } 52 else if (ishift == -1) { 53 /* temporarily subtract 1 from i and j indices */ 54 int nz = a->i[a->n] - 1; 55 for ( i=0; i<nz; i++ ) a->j[i]--; 56 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58 for ( i=0; i<nz; i++ ) a->j[i]++; 59 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60 } else { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[a->n] - 1; 63 for ( i=0; i<nz; i++ ) a->j[i]++; 64 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66 for ( i=0; i<nz; i++ ) a->j[i]--; 67 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68 } 69 } 70 return 0; 71 } 72 73 /* 74 Adds diagonal pointers to sparse matrix structure. 75 */ 76 77 int MatMarkDiag_SeqBAIJ(Mat A) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,j, *diag, m = a->mbs; 81 82 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83 PLogObjectMemory(A,(m+1)*sizeof(int)); 84 for ( i=0; i<m; i++ ) { 85 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86 if (a->j[j] == i) { 87 diag[i] = j; 88 break; 89 } 90 } 91 } 92 a->diag = diag; 93 return 0; 94 } 95 96 #include "draw.h" 97 #include "pinclude/pviewer.h" 98 #include "sys.h" 99 100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 101 { 102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 103 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs; 104 Scalar *aa; 105 106 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 107 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 108 col_lens[0] = MAT_COOKIE; 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs2; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs2*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 return 0; 153 } 154 155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 156 { 157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 158 int ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs; 159 FILE *fd; 160 char *outputname; 161 162 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 163 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 164 ierr = ViewerGetFormat(viewer,&format); 165 if (format == ASCII_FORMAT_INFO) { 166 /* no need to print additional information */ ; 167 } 168 else if (format == ASCII_FORMAT_MATLAB) { 169 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 170 } 171 else { 172 for ( i=0; i<a->mbs; i++ ) { 173 for ( j=0; j<bs; j++ ) { 174 fprintf(fd,"row %d:",i*bs+j); 175 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 176 for ( l=0; l<bs; l++ ) { 177 #if defined(PETSC_COMPLEX) 178 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 179 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 180 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 181 } 182 else { 183 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 184 } 185 #else 186 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 187 #endif 188 } 189 } 190 fprintf(fd,"\n"); 191 } 192 } 193 } 194 fflush(fd); 195 return 0; 196 } 197 198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 199 { 200 Mat A = (Mat) obj; 201 ViewerType vtype; 202 int ierr; 203 204 if (!viewer) { 205 viewer = STDOUT_VIEWER_SELF; 206 } 207 208 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 209 if (vtype == MATLAB_VIEWER) { 210 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 211 } 212 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 213 return MatView_SeqBAIJ_ASCII(A,viewer); 214 } 215 else if (vtype == BINARY_FILE_VIEWER) { 216 return MatView_SeqBAIJ_Binary(A,viewer); 217 } 218 else if (vtype == DRAW_VIEWER) { 219 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 220 } 221 return 0; 222 } 223 224 #define CHUNKSIZE 1 225 226 /* This version has row oriented v */ 227 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 228 { 229 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 230 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 231 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 232 int *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol; 233 int ridx,cidx,bs2=bs*bs; 234 Scalar *ap,value,*aa=a->a,*bap; 235 236 for ( k=0; k<m; k++ ) { /* loop over added rows */ 237 row = im[k]; brow = row/bs; 238 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 239 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 240 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 241 rmax = imax[brow]; nrow = ailen[brow]; 242 low = 0; 243 for ( l=0; l<n; l++ ) { /* loop over added columns */ 244 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 245 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 246 col = in[l] - shift; bcol = col/bs; 247 ridx = row % bs; cidx = col % bs; 248 if (roworiented) { 249 value = *v++; 250 } 251 else { 252 value = v[k + l*m]; 253 } 254 if (!sorted) low = 0; high = nrow; 255 while (high-low > 5) { 256 t = (low+high)/2; 257 if (rp[t] > bcol) high = t; 258 else low = t; 259 } 260 for ( i=low; i<high; i++ ) { 261 if (rp[i] > bcol) break; 262 if (rp[i] == bcol) { 263 bap = ap + bs2*i + bs*cidx + ridx; 264 if (is == ADD_VALUES) *bap += value; 265 else *bap = value; 266 goto noinsert; 267 } 268 } 269 if (nonew) goto noinsert; 270 if (nrow >= rmax) { 271 /* there is no extra room in row, therefore enlarge */ 272 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 273 Scalar *new_a; 274 275 /* malloc new storage space */ 276 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 277 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 278 new_j = (int *) (new_a + bs2*new_nz); 279 new_i = new_j + new_nz; 280 281 /* copy over old data into new slots */ 282 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 283 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 284 PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int)); 285 len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift); 286 PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow, 287 len*sizeof(int)); 288 PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar)); 289 PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 290 PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE), 291 aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar)); 292 /* free up old matrix storage */ 293 PetscFree(a->a); 294 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 295 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 296 a->singlemalloc = 1; 297 298 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 299 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 300 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 301 a->maxnz += bs2*CHUNKSIZE; 302 a->reallocs++; 303 a->nz+=bs2; 304 } 305 N = nrow++ - 1; 306 /* shift up all the later entries in this row */ 307 for ( ii=N; ii>=i; ii-- ) { 308 rp[ii+1] = rp[ii]; 309 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 310 } 311 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 312 rp[i] = bcol; 313 ap[bs2*i + bs*cidx + ridx] = value; 314 noinsert:; 315 low = i; 316 } 317 ailen[brow] = nrow; 318 } 319 return 0; 320 } 321 322 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 323 { 324 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 325 *m = a->m; *n = a->n; 326 return 0; 327 } 328 329 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 332 *m = 0; *n = a->m; 333 return 0; 334 } 335 336 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 337 { 338 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 339 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 340 Scalar *aa,*v_i,*aa_i; 341 342 bs = a->bs; 343 ai = a->i; 344 aj = a->j; 345 aa = a->a; 346 bs2 = bs*bs; 347 348 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 349 350 bn = row/bs; /* Block number */ 351 bp = row % bs; /* Block Position */ 352 M = ai[bn+1] - ai[bn]; 353 *nz = bs*M; 354 355 if (v) { 356 *v = 0; 357 if (*nz) { 358 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 359 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 360 v_i = *v + i*bs; 361 aa_i = aa + bs2*(ai[bn] + i); 362 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 363 } 364 } 365 } 366 367 if (idx) { 368 *idx = 0; 369 if (*nz) { 370 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 371 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 372 idx_i = *idx + i*bs; 373 itmp = bs*aj[ai[bn] + i]; 374 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 375 } 376 } 377 } 378 return 0; 379 } 380 381 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 382 { 383 if (idx) {if (*idx) PetscFree(*idx);} 384 if (v) {if (*v) PetscFree(*v);} 385 return 0; 386 } 387 388 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 389 { 390 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 391 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 392 int m = a->m, *ip, N, *ailen = a->ilen,shift = a->indexshift; 393 int bs = a->bs, mbs = a->mbs, bs2 = bs*bs; 394 Scalar *aa = a->a, *ap; 395 396 if (mode == FLUSH_ASSEMBLY) return 0; 397 398 for ( i=1; i<mbs; i++ ) { 399 /* move each row back by the amount of empty slots (fshift) before it*/ 400 fshift += imax[i-1] - ailen[i-1]; 401 if (fshift) { 402 ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift; 403 N = ailen[i]; 404 for ( j=0; j<N; j++ ) { 405 ip[j-fshift] = ip[j]; 406 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 407 } 408 } 409 ai[i] = ai[i-1] + ailen[i-1]; 410 } 411 if (mbs) { 412 fshift += imax[mbs-1] - ailen[mbs-1]; 413 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 414 } 415 /* reset ilen and imax for each row */ 416 for ( i=0; i<mbs; i++ ) { 417 ailen[i] = imax[i] = ai[i+1] - ai[i]; 418 } 419 a->nz = (ai[m] + shift)*bs2; 420 421 /* diagonals may have moved, so kill the diagonal pointers */ 422 if (fshift && a->diag) { 423 PetscFree(a->diag); 424 PLogObjectMemory(A,-(m+1)*sizeof(int)); 425 a->diag = 0; 426 } 427 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n", 428 fshift,a->nz,m); 429 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n", 430 a->reallocs); 431 return 0; 432 } 433 434 static int MatZeroEntries_SeqBAIJ(Mat A) 435 { 436 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 437 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 438 return 0; 439 } 440 441 int MatDestroy_SeqBAIJ(PetscObject obj) 442 { 443 Mat A = (Mat) obj; 444 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 445 446 #if defined(PETSC_LOG) 447 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 448 #endif 449 PetscFree(a->a); 450 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 451 if (a->diag) PetscFree(a->diag); 452 if (a->ilen) PetscFree(a->ilen); 453 if (a->imax) PetscFree(a->imax); 454 if (a->solve_work) PetscFree(a->solve_work); 455 if (a->mult_work) PetscFree(a->mult_work); 456 PetscFree(a); 457 PLogObjectDestroy(A); 458 PetscHeaderDestroy(A); 459 return 0; 460 } 461 462 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 463 { 464 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 465 if (op == ROW_ORIENTED) a->roworiented = 1; 466 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 467 else if (op == COLUMNS_SORTED) a->sorted = 1; 468 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 469 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 470 else if (op == ROWS_SORTED || 471 op == SYMMETRIC_MATRIX || 472 op == STRUCTURALLY_SYMMETRIC_MATRIX || 473 op == YES_NEW_DIAGONALS) 474 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 475 else if (op == NO_NEW_DIAGONALS) 476 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 477 else 478 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 479 return 0; 480 } 481 482 483 /* -------------------------------------------------------*/ 484 /* Should check that shapes of vectors and matrices match */ 485 /* -------------------------------------------------------*/ 486 #include "pinclude/plapack.h" 487 488 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 489 { 490 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 491 Scalar *xg,*yg; 492 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 493 register Scalar x1,x2,x3,x4,x5; 494 int mbs = a->mbs, m = a->m, i, *idx,*ii; 495 int bs = a->bs,j,n,bs2 = bs*bs; 496 497 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 498 PetscMemzero(y,m*sizeof(Scalar)); 499 x = x; 500 idx = a->j; 501 v = a->a; 502 ii = a->i; 503 504 switch (bs) { 505 case 1: 506 for ( i=0; i<m; i++ ) { 507 n = ii[1] - ii[0]; ii++; 508 sum = 0.0; 509 while (n--) sum += *v++ * x[*idx++]; 510 y[i] = sum; 511 } 512 break; 513 case 2: 514 for ( i=0; i<mbs; i++ ) { 515 n = ii[1] - ii[0]; ii++; 516 sum1 = 0.0; sum2 = 0.0; 517 for ( j=0; j<n; j++ ) { 518 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 519 sum1 += v[0]*x1 + v[2]*x2; 520 sum2 += v[1]*x1 + v[3]*x2; 521 v += 4; 522 } 523 y[0] += sum1; y[1] += sum2; 524 y += 2; 525 } 526 break; 527 case 3: 528 for ( i=0; i<mbs; i++ ) { 529 n = ii[1] - ii[0]; ii++; 530 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 531 for ( j=0; j<n; j++ ) { 532 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 533 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 534 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 535 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 536 v += 9; 537 } 538 y[0] += sum1; y[1] += sum2; y[2] += sum3; 539 y += 3; 540 } 541 break; 542 case 4: 543 for ( i=0; i<mbs; i++ ) { 544 n = ii[1] - ii[0]; ii++; 545 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 546 for ( j=0; j<n; j++ ) { 547 xb = x + 4*(*idx++); 548 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 549 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 550 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 551 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 552 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 553 v += 16; 554 } 555 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 556 y += 4; 557 } 558 break; 559 case 5: 560 for ( i=0; i<mbs; i++ ) { 561 n = ii[1] - ii[0]; ii++; 562 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 563 for ( j=0; j<n; j++ ) { 564 xb = x + 5*(*idx++); 565 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 566 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 567 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 568 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 569 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 570 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 571 v += 25; 572 } 573 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 574 y += 5; 575 } 576 break; 577 /* block sizes larger then 5 by 5 are handled by BLAS */ 578 default: { 579 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 580 if (!a->mult_work) { 581 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 582 CHKPTRQ(a->mult_work); 583 } 584 work = a->mult_work; 585 for ( i=0; i<mbs; i++ ) { 586 n = ii[1] - ii[0]; ii++; 587 ncols = n*bs; 588 workt = work; 589 for ( j=0; j<n; j++ ) { 590 xb = x + bs*(*idx++); 591 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 592 workt += bs; 593 } 594 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 595 v += n*bs2; 596 y += bs; 597 } 598 } 599 } 600 PLogFlops(2*bs2*a->nz - m); 601 return 0; 602 } 603 604 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 605 { 606 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 607 if (nz) *nz = a->bs*a->bs*a->nz; 608 if (nza) *nza = a->maxnz; 609 if (mem) *mem = (int)A->mem; 610 return 0; 611 } 612 613 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 614 { 615 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 616 617 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 618 619 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 620 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 621 (a->nz != b->nz)||(a->indexshift != b->indexshift)) { 622 *flg = PETSC_FALSE; return 0; 623 } 624 625 /* if the a->i are the same */ 626 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 627 *flg = PETSC_FALSE; return 0; 628 } 629 630 /* if a->j are the same */ 631 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 632 *flg = PETSC_FALSE; return 0; 633 } 634 635 /* if a->a are the same */ 636 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 637 *flg = PETSC_FALSE; return 0; 638 } 639 *flg = PETSC_TRUE; 640 return 0; 641 642 } 643 644 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 645 { 646 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 647 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 648 Scalar *x, zero = 0.0,*aa,*aa_j; 649 650 bs = a->bs; 651 aa = a->a; 652 ai = a->i; 653 aj = a->j; 654 ambs = a->mbs; 655 bs2 = bs*bs; 656 657 VecSet(&zero,v); 658 VecGetArray(v,&x); VecGetLocalSize(v,&n); 659 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 660 for ( i=0; i<ambs; i++ ) { 661 for ( j=ai[i]; j<ai[i+1]; j++ ) { 662 if (aj[j] == i) { 663 row = i*bs; 664 aa_j = aa+j*bs2; 665 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 666 break; 667 } 668 } 669 } 670 return 0; 671 } 672 673 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 674 { 675 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 676 Scalar *l,*r,x,*v,*aa,*li,*ri; 677 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 678 679 ai = a->i; 680 aj = a->j; 681 aa = a->a; 682 m = a->m; 683 n = a->n; 684 bs = a->bs; 685 mbs = a->mbs; 686 bs2 = bs*bs; 687 if (ll) { 688 VecGetArray(ll,&l); VecGetSize(ll,&lm); 689 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 690 for ( i=0; i<mbs; i++ ) { /* for each block row */ 691 M = ai[i+1] - ai[i]; 692 li = l + i*bs; 693 v = aa + bs2*ai[i]; 694 for ( j=0; j<M; j++ ) { /* for each block */ 695 for ( k=0; k<bs2; k++ ) { 696 (*v++) *= li[k%bs]; 697 } 698 } 699 } 700 } 701 702 if (rr) { 703 VecGetArray(rr,&r); VecGetSize(rr,&rn); 704 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 705 for ( i=0; i<mbs; i++ ) { /* for each block row */ 706 M = ai[i+1] - ai[i]; 707 v = aa + bs2*ai[i]; 708 for ( j=0; j<M; j++ ) { /* for each block */ 709 ri = r + bs*aj[ai[i]+j]; 710 for ( k=0; k<bs; k++ ) { 711 x = ri[k]; 712 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 713 } 714 } 715 } 716 } 717 return 0; 718 } 719 720 721 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 722 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 723 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 724 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 725 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 726 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 727 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 728 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 729 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 730 731 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 732 { 733 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 734 Scalar *v = a->a; 735 double sum = 0.0; 736 int i; 737 738 if (type == NORM_FROBENIUS) { 739 for (i=0; i<a->nz; i++ ) { 740 #if defined(PETSC_COMPLEX) 741 sum += real(conj(*v)*(*v)); v++; 742 #else 743 sum += (*v)*(*v); v++; 744 #endif 745 } 746 *norm = sqrt(sum); 747 } 748 else { 749 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 750 } 751 return 0; 752 } 753 754 /* 755 note: This can only work for identity for row and col. It would 756 be good to check this and otherwise generate an error. 757 */ 758 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 759 { 760 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 761 Mat outA; 762 int ierr; 763 764 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 765 766 outA = inA; 767 inA->factor = FACTOR_LU; 768 a->row = row; 769 a->col = col; 770 771 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 772 773 if (!a->diag) { 774 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 775 } 776 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 777 return 0; 778 } 779 780 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 781 { 782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 783 int one = 1, totalnz = a->bs*a->bs*a->nz; 784 BLscal_( &totalnz, alpha, a->a, &one ); 785 PLogFlops(totalnz); 786 return 0; 787 } 788 789 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 790 { 791 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 792 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 793 int *ai = a->i, *ailen = a->ilen, shift = a->indexshift; 794 int brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs; 795 Scalar *ap, *aa = a->a, zero = 0.0; 796 797 for ( k=0; k<m; k++ ) { /* loop over rows */ 798 row = im[k]; brow = row/bs; 799 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 800 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 801 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 802 nrow = ailen[brow]; 803 for ( l=0; l<n; l++ ) { /* loop over columns */ 804 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 805 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 806 col = in[l] - shift; 807 bcol = col/bs; 808 cidx = col%bs; 809 ridx = row%bs; 810 high = nrow; 811 low = 0; /* assume unsorted */ 812 while (high-low > 5) { 813 t = (low+high)/2; 814 if (rp[t] > bcol) high = t; 815 else low = t; 816 } 817 for ( i=low; i<high; i++ ) { 818 if (rp[i] > bcol) break; 819 if (rp[i] == bcol) { 820 *v++ = ap[bs2*i+bs*cidx+ridx]; 821 goto finished; 822 } 823 } 824 *v++ = zero; 825 finished:; 826 } 827 } 828 return 0; 829 } 830 831 int MatPrintHelp_SeqBAIJ(Mat A) 832 { 833 static int called = 0; 834 835 if (called) return 0; else called = 1; 836 return 0; 837 } 838 /* -------------------------------------------------------------------*/ 839 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 840 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 841 MatMult_SeqBAIJ,0, 842 0,0, 843 MatSolve_SeqBAIJ,0, 844 0,0, 845 MatLUFactor_SeqBAIJ,0, 846 0, 847 0, 848 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 849 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 850 0,MatAssemblyEnd_SeqBAIJ, 851 0, 852 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 853 MatGetReordering_SeqBAIJ, 854 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 855 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 856 MatILUFactorSymbolic_SeqBAIJ,0, 857 0,0,/* MatConvert_SeqBAIJ */ 0, 858 0,0, 859 MatConvertSameType_SeqBAIJ,0,0, 860 MatILUFactor_SeqBAIJ,0,0, 861 0,0, 862 MatGetValues_SeqBAIJ,0, 863 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 864 0}; 865 866 /*@C 867 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 868 (the default parallel PETSc format). For good matrix assembly performance 869 the user should preallocate the matrix storage by setting the parameter nz 870 (or nzz). By setting these parameters accurately, performance can be 871 increased by more than a factor of 50. 872 873 Input Parameters: 874 . comm - MPI communicator, set to MPI_COMM_SELF 875 . bs - size of block 876 . m - number of rows 877 . n - number of columns 878 . nz - number of block nonzeros per block row (same for all rows) 879 . nzz - number of block nonzeros per block row or PETSC_NULL 880 (possibly different for each row) 881 882 Output Parameter: 883 . A - the matrix 884 885 Notes: 886 The BAIJ format, is fully compatible with standard Fortran 77 887 storage. That is, the stored row and column indices can begin at 888 either one (as in Fortran) or zero. See the users manual for details. 889 890 Specify the preallocated storage with either nz or nnz (not both). 891 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 892 allocation. For additional details, see the users manual chapter on 893 matrices and the file $(PETSC_DIR)/Performance. 894 895 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 896 @*/ 897 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 898 { 899 Mat B; 900 Mat_SeqBAIJ *b; 901 int i,len,ierr, flg,mbs = m/bs,bs2; 902 903 if (mbs*bs != m) 904 SETERRQ(1,"MatCreateSeqBAIJ:Number rows must be divisible by blocksize"); 905 906 *A = 0; 907 bs2 = bs*bs; 908 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 909 PLogObjectCreate(B); 910 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 911 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 912 switch (bs) { 913 case 1: 914 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 915 break; 916 case 2: 917 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 918 break; 919 case 3: 920 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 921 break; 922 case 4: 923 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 924 break; 925 case 5: 926 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 927 break; 928 } 929 B->destroy = MatDestroy_SeqBAIJ; 930 B->view = MatView_SeqBAIJ; 931 B->factor = 0; 932 B->lupivotthreshold = 1.0; 933 b->row = 0; 934 b->col = 0; 935 b->reallocs = 0; 936 937 b->m = m; 938 b->mbs = mbs; 939 b->n = n; 940 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 941 if (nnz == PETSC_NULL) { 942 if (nz == PETSC_DEFAULT) nz = CHUNKSIZE; 943 else if (nz <= 0) nz = 1; 944 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 945 nz = nz*mbs; 946 } 947 else { 948 nz = 0; 949 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 950 } 951 952 /* allocate the matrix space */ 953 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 954 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 955 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 956 b->j = (int *) (b->a + nz*bs2); 957 PetscMemzero(b->j,nz*sizeof(int)); 958 b->i = b->j + nz; 959 b->singlemalloc = 1; 960 961 b->i[0] = 0; 962 for (i=1; i<mbs+1; i++) { 963 b->i[i] = b->i[i-1] + b->imax[i-1]; 964 } 965 966 /* b->ilen will count nonzeros in each block row so far. */ 967 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 968 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 969 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 970 971 b->bs = bs; 972 b->mbs = mbs; 973 b->nz = 0; 974 b->maxnz = nz; 975 b->sorted = 0; 976 b->roworiented = 1; 977 b->nonew = 0; 978 b->diag = 0; 979 b->solve_work = 0; 980 b->mult_work = 0; 981 b->spptr = 0; 982 983 *A = B; 984 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 985 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 986 return 0; 987 } 988 989 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 990 { 991 Mat C; 992 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 993 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs; 994 995 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 996 997 *B = 0; 998 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 999 PLogObjectCreate(C); 1000 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1001 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1002 C->destroy = MatDestroy_SeqBAIJ; 1003 C->view = MatView_SeqBAIJ; 1004 C->factor = A->factor; 1005 c->row = 0; 1006 c->col = 0; 1007 C->assembled = PETSC_TRUE; 1008 1009 c->m = a->m; 1010 c->n = a->n; 1011 c->bs = a->bs; 1012 c->mbs = a->mbs; 1013 c->nbs = a->nbs; 1014 1015 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1016 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1017 for ( i=0; i<mbs; i++ ) { 1018 c->imax[i] = a->imax[i]; 1019 c->ilen[i] = a->ilen[i]; 1020 } 1021 1022 /* allocate the matrix space */ 1023 c->singlemalloc = 1; 1024 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1025 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1026 c->j = (int *) (c->a + nz*bs2); 1027 c->i = c->j + nz; 1028 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1029 if (mbs > 0) { 1030 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1031 if (cpvalues == COPY_VALUES) { 1032 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1033 } 1034 } 1035 1036 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1037 c->sorted = a->sorted; 1038 c->roworiented = a->roworiented; 1039 c->nonew = a->nonew; 1040 1041 if (a->diag) { 1042 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1043 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1044 for ( i=0; i<mbs; i++ ) { 1045 c->diag[i] = a->diag[i]; 1046 } 1047 } 1048 else c->diag = 0; 1049 c->nz = a->nz; 1050 c->maxnz = a->maxnz; 1051 c->solve_work = 0; 1052 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1053 c->mult_work = 0; 1054 *B = C; 1055 return 0; 1056 } 1057 1058 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1059 { 1060 Mat_SeqBAIJ *a; 1061 Mat B; 1062 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1063 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1064 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1065 int *masked, nmask,tmp,ishift,bs2; 1066 Scalar *aa; 1067 MPI_Comm comm = ((PetscObject) viewer)->comm; 1068 1069 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1070 bs2 = bs*bs; 1071 1072 MPI_Comm_size(comm,&size); 1073 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1074 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1075 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1076 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1077 M = header[1]; N = header[2]; nz = header[3]; 1078 1079 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1080 1081 /* 1082 This code adds extra rows to make sure the number of rows is 1083 divisible by the blocksize 1084 */ 1085 mbs = M/bs; 1086 extra_rows = bs - M + bs*(mbs); 1087 if (extra_rows == bs) extra_rows = 0; 1088 else mbs++; 1089 if (extra_rows) { 1090 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 1091 } 1092 1093 /* read in row lengths */ 1094 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1095 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1096 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1097 1098 /* read in column indices */ 1099 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1100 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1101 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1102 1103 /* loop over row lengths determining block row lengths */ 1104 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1105 PetscMemzero(browlengths,mbs*sizeof(int)); 1106 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1107 PetscMemzero(mask,mbs*sizeof(int)); 1108 masked = mask + mbs; 1109 rowcount = 0; nzcount = 0; 1110 for ( i=0; i<mbs; i++ ) { 1111 nmask = 0; 1112 for ( j=0; j<bs; j++ ) { 1113 kmax = rowlengths[rowcount]; 1114 for ( k=0; k<kmax; k++ ) { 1115 tmp = jj[nzcount++]/bs; 1116 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1117 } 1118 rowcount++; 1119 } 1120 browlengths[i] += nmask; 1121 /* zero out the mask elements we set */ 1122 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1123 } 1124 1125 /* create our matrix */ 1126 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1127 CHKERRQ(ierr); 1128 B = *A; 1129 a = (Mat_SeqBAIJ *) B->data; 1130 1131 /* set matrix "i" values */ 1132 a->i[0] = 0; 1133 for ( i=1; i<= mbs; i++ ) { 1134 a->i[i] = a->i[i-1] + browlengths[i-1]; 1135 a->ilen[i-1] = browlengths[i-1]; 1136 } 1137 a->indexshift = 0; 1138 a->nz = 0; 1139 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1140 1141 /* read in nonzero values */ 1142 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1143 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1144 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1145 1146 /* set "a" and "j" values into matrix */ 1147 nzcount = 0; jcount = 0; 1148 for ( i=0; i<mbs; i++ ) { 1149 nzcountb = nzcount; 1150 nmask = 0; 1151 for ( j=0; j<bs; j++ ) { 1152 kmax = rowlengths[i*bs+j]; 1153 for ( k=0; k<kmax; k++ ) { 1154 tmp = jj[nzcount++]/bs; 1155 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1156 } 1157 rowcount++; 1158 } 1159 /* sort the masked values */ 1160 PetscSortInt(nmask,masked); 1161 1162 /* set "j" values into matrix */ 1163 maskcount = 1; 1164 for ( j=0; j<nmask; j++ ) { 1165 a->j[jcount++] = masked[j]; 1166 mask[masked[j]] = maskcount++; 1167 } 1168 /* set "a" values into matrix */ 1169 ishift = bs2*a->i[i]; 1170 for ( j=0; j<bs; j++ ) { 1171 kmax = rowlengths[i*bs+j]; 1172 for ( k=0; k<kmax; k++ ) { 1173 tmp = jj[nzcountb]/bs ; 1174 block = mask[tmp] - 1; 1175 point = jj[nzcountb] - bs*tmp; 1176 idx = ishift + bs2*block + j + bs*point; 1177 a->a[idx] = aa[nzcountb++]; 1178 } 1179 } 1180 /* zero out the mask elements we set */ 1181 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1182 } 1183 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1184 1185 PetscFree(rowlengths); 1186 PetscFree(browlengths); 1187 PetscFree(aa); 1188 PetscFree(jj); 1189 PetscFree(mask); 1190 1191 B->assembled = PETSC_TRUE; 1192 1193 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1194 if (flg) { 1195 Viewer tviewer; 1196 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1197 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1198 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1199 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1200 } 1201 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1202 if (flg) { 1203 Viewer tviewer; 1204 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1205 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1206 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1207 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1208 } 1209 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1210 if (flg) { 1211 Viewer tviewer; 1212 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1213 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1214 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1215 } 1216 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1217 if (flg) { 1218 Viewer tviewer; 1219 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1220 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1221 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1222 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1223 } 1224 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1225 if (flg) { 1226 Viewer tviewer; 1227 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1228 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1229 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1230 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1231 } 1232 return 0; 1233 } 1234 1235 1236 1237