1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.29 1996/04/06 00:00:43 balay Exp curfman $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = a->indexshift; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45 CHKERRQ(ierr); 46 ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 47 PetscFree(ia); PetscFree(ja); 48 } else { 49 if (ishift == oshift) { 50 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51 } 52 else if (ishift == -1) { 53 /* temporarily subtract 1 from i and j indices */ 54 int nz = a->i[a->n] - 1; 55 for ( i=0; i<nz; i++ ) a->j[i]--; 56 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58 for ( i=0; i<nz; i++ ) a->j[i]++; 59 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60 } else { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[a->n] - 1; 63 for ( i=0; i<nz; i++ ) a->j[i]++; 64 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66 for ( i=0; i<nz; i++ ) a->j[i]--; 67 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68 } 69 } 70 return 0; 71 } 72 73 /* 74 Adds diagonal pointers to sparse matrix structure. 75 */ 76 77 int MatMarkDiag_SeqBAIJ(Mat A) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,j, *diag, m = a->mbs; 81 82 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83 PLogObjectMemory(A,(m+1)*sizeof(int)); 84 for ( i=0; i<m; i++ ) { 85 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86 if (a->j[j] == i) { 87 diag[i] = j; 88 break; 89 } 90 } 91 } 92 a->diag = diag; 93 return 0; 94 } 95 96 #include "draw.h" 97 #include "pinclude/pviewer.h" 98 #include "sys.h" 99 100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 101 { 102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 103 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs; 104 Scalar *aa; 105 106 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 107 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 108 col_lens[0] = MAT_COOKIE; 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs2; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs2*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 return 0; 153 } 154 155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 156 { 157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 158 int ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs; 159 FILE *fd; 160 char *outputname; 161 162 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 163 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 164 ierr = ViewerGetFormat(viewer,&format); 165 if (format == ASCII_FORMAT_INFO) { 166 /* no need to print additional information */ ; 167 } 168 else if (format == ASCII_FORMAT_MATLAB) { 169 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 170 } 171 else if (format == ASCII_FORMAT_COMMON) { 172 for ( i=0; i<a->mbs; i++ ) { 173 for ( j=0; j<bs; j++ ) { 174 fprintf(fd,"row %d:",i*bs+j); 175 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 176 for ( l=0; l<bs; l++ ) { 177 #if defined(PETSC_COMPLEX) 178 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 180 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 181 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 182 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 183 #else 184 if (a->a[bs2*k + l*bs + j] != 0.0) 185 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 186 #endif 187 } 188 } 189 fprintf(fd,"\n"); 190 } 191 } 192 } 193 else { 194 for ( i=0; i<a->mbs; i++ ) { 195 for ( j=0; j<bs; j++ ) { 196 fprintf(fd,"row %d:",i*bs+j); 197 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 198 for ( l=0; l<bs; l++ ) { 199 #if defined(PETSC_COMPLEX) 200 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 201 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 202 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 203 } 204 else { 205 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 206 } 207 #else 208 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 209 #endif 210 } 211 } 212 fprintf(fd,"\n"); 213 } 214 } 215 } 216 fflush(fd); 217 return 0; 218 } 219 220 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 221 { 222 Mat A = (Mat) obj; 223 ViewerType vtype; 224 int ierr; 225 226 if (!viewer) { 227 viewer = STDOUT_VIEWER_SELF; 228 } 229 230 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 231 if (vtype == MATLAB_VIEWER) { 232 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 233 } 234 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 235 return MatView_SeqBAIJ_ASCII(A,viewer); 236 } 237 else if (vtype == BINARY_FILE_VIEWER) { 238 return MatView_SeqBAIJ_Binary(A,viewer); 239 } 240 else if (vtype == DRAW_VIEWER) { 241 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 242 } 243 return 0; 244 } 245 246 #define CHUNKSIZE 1 247 248 /* This version has row oriented v */ 249 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 250 { 251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 252 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 253 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 254 int *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol; 255 int ridx,cidx,bs2=bs*bs; 256 Scalar *ap,value,*aa=a->a,*bap; 257 258 for ( k=0; k<m; k++ ) { /* loop over added rows */ 259 row = im[k]; brow = row/bs; 260 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 261 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 262 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 263 rmax = imax[brow]; nrow = ailen[brow]; 264 low = 0; 265 for ( l=0; l<n; l++ ) { /* loop over added columns */ 266 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 267 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 268 col = in[l] - shift; bcol = col/bs; 269 ridx = row % bs; cidx = col % bs; 270 if (roworiented) { 271 value = *v++; 272 } 273 else { 274 value = v[k + l*m]; 275 } 276 if (!sorted) low = 0; high = nrow; 277 while (high-low > 5) { 278 t = (low+high)/2; 279 if (rp[t] > bcol) high = t; 280 else low = t; 281 } 282 for ( i=low; i<high; i++ ) { 283 if (rp[i] > bcol) break; 284 if (rp[i] == bcol) { 285 bap = ap + bs2*i + bs*cidx + ridx; 286 if (is == ADD_VALUES) *bap += value; 287 else *bap = value; 288 goto noinsert; 289 } 290 } 291 if (nonew) goto noinsert; 292 if (nrow >= rmax) { 293 /* there is no extra room in row, therefore enlarge */ 294 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 295 Scalar *new_a; 296 297 /* malloc new storage space */ 298 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 299 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 300 new_j = (int *) (new_a + bs2*new_nz); 301 new_i = new_j + new_nz; 302 303 /* copy over old data into new slots */ 304 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 305 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 306 PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int)); 307 len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift); 308 PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow, 309 len*sizeof(int)); 310 PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar)); 311 PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 312 PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE), 313 aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar)); 314 /* free up old matrix storage */ 315 PetscFree(a->a); 316 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 317 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 318 a->singlemalloc = 1; 319 320 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 321 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 322 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 323 a->maxnz += bs2*CHUNKSIZE; 324 a->reallocs++; 325 a->nz+=bs2; 326 } 327 N = nrow++ - 1; 328 /* shift up all the later entries in this row */ 329 for ( ii=N; ii>=i; ii-- ) { 330 rp[ii+1] = rp[ii]; 331 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 332 } 333 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 334 rp[i] = bcol; 335 ap[bs2*i + bs*cidx + ridx] = value; 336 noinsert:; 337 low = i; 338 } 339 ailen[brow] = nrow; 340 } 341 return 0; 342 } 343 344 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 345 { 346 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 347 *m = a->m; *n = a->n; 348 return 0; 349 } 350 351 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 352 { 353 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 354 *m = 0; *n = a->m; 355 return 0; 356 } 357 358 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 359 { 360 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 361 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 362 Scalar *aa,*v_i,*aa_i; 363 364 bs = a->bs; 365 ai = a->i; 366 aj = a->j; 367 aa = a->a; 368 bs2 = bs*bs; 369 370 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 371 372 bn = row/bs; /* Block number */ 373 bp = row % bs; /* Block Position */ 374 M = ai[bn+1] - ai[bn]; 375 *nz = bs*M; 376 377 if (v) { 378 *v = 0; 379 if (*nz) { 380 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 381 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 382 v_i = *v + i*bs; 383 aa_i = aa + bs2*(ai[bn] + i); 384 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 385 } 386 } 387 } 388 389 if (idx) { 390 *idx = 0; 391 if (*nz) { 392 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 393 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 394 idx_i = *idx + i*bs; 395 itmp = bs*aj[ai[bn] + i]; 396 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 397 } 398 } 399 } 400 return 0; 401 } 402 403 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 404 { 405 if (idx) {if (*idx) PetscFree(*idx);} 406 if (v) {if (*v) PetscFree(*v);} 407 return 0; 408 } 409 410 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 411 { 412 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 413 Mat C; 414 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 415 int shift=a->indexshift,*rows,*cols,bs2=bs*bs; 416 Scalar *array=a->a; 417 418 if (B==PETSC_NULL && mbs!=nbs) 419 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 420 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 421 PetscMemzero(col,(1+nbs)*sizeof(int)); 422 if (shift) { 423 for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1; 424 } 425 for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1; 426 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 427 PetscFree(col); 428 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 429 cols = rows + bs; 430 for ( i=0; i<mbs; i++ ) { 431 cols[0] = i*bs; 432 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 433 len = ai[i+1] - ai[i]; 434 for ( j=0; j<len; j++ ) { 435 rows[0] = (*aj++)*bs; 436 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 437 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 438 array += bs2; 439 } 440 } 441 PetscFree(rows); 442 if (shift) { 443 for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1; 444 } 445 446 ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 447 ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 448 449 if (B != PETSC_NULL) { 450 *B = C; 451 } else { 452 /* This isn't really an in-place transpose */ 453 PetscFree(a->a); 454 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 455 if (a->diag) PetscFree(a->diag); 456 if (a->ilen) PetscFree(a->ilen); 457 if (a->imax) PetscFree(a->imax); 458 if (a->solve_work) PetscFree(a->solve_work); 459 PetscFree(a); 460 PetscMemcpy(A,C,sizeof(struct _Mat)); 461 PetscHeaderDestroy(C); 462 } 463 return 0; 464 } 465 466 467 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 468 { 469 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 470 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 471 int m = a->m, *ip, N, *ailen = a->ilen,shift = a->indexshift; 472 int bs = a->bs, mbs = a->mbs, bs2 = bs*bs; 473 Scalar *aa = a->a, *ap; 474 475 if (mode == FLUSH_ASSEMBLY) return 0; 476 477 for ( i=1; i<mbs; i++ ) { 478 /* move each row back by the amount of empty slots (fshift) before it*/ 479 fshift += imax[i-1] - ailen[i-1]; 480 if (fshift) { 481 ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift; 482 N = ailen[i]; 483 for ( j=0; j<N; j++ ) { 484 ip[j-fshift] = ip[j]; 485 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 486 } 487 } 488 ai[i] = ai[i-1] + ailen[i-1]; 489 } 490 if (mbs) { 491 fshift += imax[mbs-1] - ailen[mbs-1]; 492 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 493 } 494 /* reset ilen and imax for each row */ 495 for ( i=0; i<mbs; i++ ) { 496 ailen[i] = imax[i] = ai[i+1] - ai[i]; 497 } 498 a->nz = (ai[m] + shift)*bs2; 499 500 /* diagonals may have moved, so kill the diagonal pointers */ 501 if (fshift && a->diag) { 502 PetscFree(a->diag); 503 PLogObjectMemory(A,-(m+1)*sizeof(int)); 504 a->diag = 0; 505 } 506 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n", 507 fshift,a->nz,m); 508 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n", 509 a->reallocs); 510 return 0; 511 } 512 513 static int MatZeroEntries_SeqBAIJ(Mat A) 514 { 515 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 516 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 517 return 0; 518 } 519 520 int MatDestroy_SeqBAIJ(PetscObject obj) 521 { 522 Mat A = (Mat) obj; 523 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 524 525 #if defined(PETSC_LOG) 526 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 527 #endif 528 PetscFree(a->a); 529 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 530 if (a->diag) PetscFree(a->diag); 531 if (a->ilen) PetscFree(a->ilen); 532 if (a->imax) PetscFree(a->imax); 533 if (a->solve_work) PetscFree(a->solve_work); 534 if (a->mult_work) PetscFree(a->mult_work); 535 PetscFree(a); 536 PLogObjectDestroy(A); 537 PetscHeaderDestroy(A); 538 return 0; 539 } 540 541 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 542 { 543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 544 if (op == ROW_ORIENTED) a->roworiented = 1; 545 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 546 else if (op == COLUMNS_SORTED) a->sorted = 1; 547 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 548 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 549 else if (op == ROWS_SORTED || 550 op == SYMMETRIC_MATRIX || 551 op == STRUCTURALLY_SYMMETRIC_MATRIX || 552 op == YES_NEW_DIAGONALS) 553 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 554 else if (op == NO_NEW_DIAGONALS) 555 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 556 else 557 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 558 return 0; 559 } 560 561 562 /* -------------------------------------------------------*/ 563 /* Should check that shapes of vectors and matrices match */ 564 /* -------------------------------------------------------*/ 565 #include "pinclude/plapack.h" 566 567 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 568 { 569 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 570 Scalar *xg,*yg; 571 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 572 register Scalar x1,x2,x3,x4,x5; 573 int mbs = a->mbs, m = a->m, i, *idx,*ii; 574 int bs = a->bs,j,n,bs2 = bs*bs; 575 576 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 577 PetscMemzero(y,m*sizeof(Scalar)); 578 x = x; 579 idx = a->j; 580 v = a->a; 581 ii = a->i; 582 583 switch (bs) { 584 case 1: 585 for ( i=0; i<m; i++ ) { 586 n = ii[1] - ii[0]; ii++; 587 sum = 0.0; 588 while (n--) sum += *v++ * x[*idx++]; 589 y[i] = sum; 590 } 591 break; 592 case 2: 593 for ( i=0; i<mbs; i++ ) { 594 n = ii[1] - ii[0]; ii++; 595 sum1 = 0.0; sum2 = 0.0; 596 for ( j=0; j<n; j++ ) { 597 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 598 sum1 += v[0]*x1 + v[2]*x2; 599 sum2 += v[1]*x1 + v[3]*x2; 600 v += 4; 601 } 602 y[0] += sum1; y[1] += sum2; 603 y += 2; 604 } 605 break; 606 case 3: 607 for ( i=0; i<mbs; i++ ) { 608 n = ii[1] - ii[0]; ii++; 609 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 610 for ( j=0; j<n; j++ ) { 611 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 612 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 613 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 614 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 615 v += 9; 616 } 617 y[0] += sum1; y[1] += sum2; y[2] += sum3; 618 y += 3; 619 } 620 break; 621 case 4: 622 for ( i=0; i<mbs; i++ ) { 623 n = ii[1] - ii[0]; ii++; 624 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 625 for ( j=0; j<n; j++ ) { 626 xb = x + 4*(*idx++); 627 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 628 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 629 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 630 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 631 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 632 v += 16; 633 } 634 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 635 y += 4; 636 } 637 break; 638 case 5: 639 for ( i=0; i<mbs; i++ ) { 640 n = ii[1] - ii[0]; ii++; 641 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 642 for ( j=0; j<n; j++ ) { 643 xb = x + 5*(*idx++); 644 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 645 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 646 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 647 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 648 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 649 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 650 v += 25; 651 } 652 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 653 y += 5; 654 } 655 break; 656 /* block sizes larger then 5 by 5 are handled by BLAS */ 657 default: { 658 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 659 if (!a->mult_work) { 660 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 661 CHKPTRQ(a->mult_work); 662 } 663 work = a->mult_work; 664 for ( i=0; i<mbs; i++ ) { 665 n = ii[1] - ii[0]; ii++; 666 ncols = n*bs; 667 workt = work; 668 for ( j=0; j<n; j++ ) { 669 xb = x + bs*(*idx++); 670 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 671 workt += bs; 672 } 673 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 674 v += n*bs2; 675 y += bs; 676 } 677 } 678 } 679 PLogFlops(2*bs2*a->nz - m); 680 return 0; 681 } 682 683 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 684 { 685 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 686 if (nz) *nz = a->bs*a->bs*a->nz; 687 if (nza) *nza = a->maxnz; 688 if (mem) *mem = (int)A->mem; 689 return 0; 690 } 691 692 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 693 { 694 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 695 696 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 697 698 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 699 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 700 (a->nz != b->nz)||(a->indexshift != b->indexshift)) { 701 *flg = PETSC_FALSE; return 0; 702 } 703 704 /* if the a->i are the same */ 705 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 706 *flg = PETSC_FALSE; return 0; 707 } 708 709 /* if a->j are the same */ 710 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 711 *flg = PETSC_FALSE; return 0; 712 } 713 714 /* if a->a are the same */ 715 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 716 *flg = PETSC_FALSE; return 0; 717 } 718 *flg = PETSC_TRUE; 719 return 0; 720 721 } 722 723 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 724 { 725 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 726 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 727 Scalar *x, zero = 0.0,*aa,*aa_j; 728 729 bs = a->bs; 730 aa = a->a; 731 ai = a->i; 732 aj = a->j; 733 ambs = a->mbs; 734 bs2 = bs*bs; 735 736 VecSet(&zero,v); 737 VecGetArray(v,&x); VecGetLocalSize(v,&n); 738 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 739 for ( i=0; i<ambs; i++ ) { 740 for ( j=ai[i]; j<ai[i+1]; j++ ) { 741 if (aj[j] == i) { 742 row = i*bs; 743 aa_j = aa+j*bs2; 744 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 745 break; 746 } 747 } 748 } 749 return 0; 750 } 751 752 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 753 { 754 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 755 Scalar *l,*r,x,*v,*aa,*li,*ri; 756 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 757 758 ai = a->i; 759 aj = a->j; 760 aa = a->a; 761 m = a->m; 762 n = a->n; 763 bs = a->bs; 764 mbs = a->mbs; 765 bs2 = bs*bs; 766 if (ll) { 767 VecGetArray(ll,&l); VecGetSize(ll,&lm); 768 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 769 for ( i=0; i<mbs; i++ ) { /* for each block row */ 770 M = ai[i+1] - ai[i]; 771 li = l + i*bs; 772 v = aa + bs2*ai[i]; 773 for ( j=0; j<M; j++ ) { /* for each block */ 774 for ( k=0; k<bs2; k++ ) { 775 (*v++) *= li[k%bs]; 776 } 777 } 778 } 779 } 780 781 if (rr) { 782 VecGetArray(rr,&r); VecGetSize(rr,&rn); 783 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 784 for ( i=0; i<mbs; i++ ) { /* for each block row */ 785 M = ai[i+1] - ai[i]; 786 v = aa + bs2*ai[i]; 787 for ( j=0; j<M; j++ ) { /* for each block */ 788 ri = r + bs*aj[ai[i]+j]; 789 for ( k=0; k<bs; k++ ) { 790 x = ri[k]; 791 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 792 } 793 } 794 } 795 } 796 return 0; 797 } 798 799 800 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 801 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 802 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 803 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 804 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 805 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 806 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 807 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 808 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 809 810 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 811 { 812 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 813 Scalar *v = a->a; 814 double sum = 0.0; 815 int i; 816 817 if (type == NORM_FROBENIUS) { 818 for (i=0; i<a->nz; i++ ) { 819 #if defined(PETSC_COMPLEX) 820 sum += real(conj(*v)*(*v)); v++; 821 #else 822 sum += (*v)*(*v); v++; 823 #endif 824 } 825 *norm = sqrt(sum); 826 } 827 else { 828 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 829 } 830 return 0; 831 } 832 833 /* 834 note: This can only work for identity for row and col. It would 835 be good to check this and otherwise generate an error. 836 */ 837 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 838 { 839 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 840 Mat outA; 841 int ierr; 842 843 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 844 845 outA = inA; 846 inA->factor = FACTOR_LU; 847 a->row = row; 848 a->col = col; 849 850 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 851 852 if (!a->diag) { 853 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 854 } 855 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 856 return 0; 857 } 858 859 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 860 { 861 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 862 int one = 1, totalnz = a->bs*a->bs*a->nz; 863 BLscal_( &totalnz, alpha, a->a, &one ); 864 PLogFlops(totalnz); 865 return 0; 866 } 867 868 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 869 { 870 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 871 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 872 int *ai = a->i, *ailen = a->ilen, shift = a->indexshift; 873 int brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs; 874 Scalar *ap, *aa = a->a, zero = 0.0; 875 876 for ( k=0; k<m; k++ ) { /* loop over rows */ 877 row = im[k]; brow = row/bs; 878 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 879 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 880 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 881 nrow = ailen[brow]; 882 for ( l=0; l<n; l++ ) { /* loop over columns */ 883 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 884 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 885 col = in[l] - shift; 886 bcol = col/bs; 887 cidx = col%bs; 888 ridx = row%bs; 889 high = nrow; 890 low = 0; /* assume unsorted */ 891 while (high-low > 5) { 892 t = (low+high)/2; 893 if (rp[t] > bcol) high = t; 894 else low = t; 895 } 896 for ( i=low; i<high; i++ ) { 897 if (rp[i] > bcol) break; 898 if (rp[i] == bcol) { 899 *v++ = ap[bs2*i+bs*cidx+ridx]; 900 goto finished; 901 } 902 } 903 *v++ = zero; 904 finished:; 905 } 906 } 907 return 0; 908 } 909 910 int MatPrintHelp_SeqBAIJ(Mat A) 911 { 912 static int called = 0; 913 914 if (called) return 0; else called = 1; 915 return 0; 916 } 917 /* -------------------------------------------------------------------*/ 918 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 919 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 920 MatMult_SeqBAIJ,0, 921 0,0, 922 MatSolve_SeqBAIJ,0, 923 0,0, 924 MatLUFactor_SeqBAIJ,0, 925 0, 926 MatTranspose_SeqBAIJ, 927 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 928 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 929 0,MatAssemblyEnd_SeqBAIJ, 930 0, 931 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 932 MatGetReordering_SeqBAIJ, 933 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 934 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 935 MatILUFactorSymbolic_SeqBAIJ,0, 936 0,0,/* MatConvert_SeqBAIJ */ 0, 937 0,0, 938 MatConvertSameType_SeqBAIJ,0,0, 939 MatILUFactor_SeqBAIJ,0,0, 940 0,0, 941 MatGetValues_SeqBAIJ,0, 942 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 943 0}; 944 945 /*@C 946 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 947 compressed row) format. For good matrix assembly performance the 948 user should preallocate the matrix storage by setting the parameter nz 949 (or nzz). By setting these parameters accurately, performance can be 950 increased by more than a factor of 50. 951 952 Input Parameters: 953 . comm - MPI communicator, set to MPI_COMM_SELF 954 . bs - size of block 955 . m - number of rows 956 . n - number of columns 957 . nz - number of block nonzeros per block row (same for all rows) 958 . nzz - number of block nonzeros per block row or PETSC_NULL 959 (possibly different for each row) 960 961 Output Parameter: 962 . A - the matrix 963 964 Notes: 965 The block AIJ format is fully compatible with standard Fortran 77 966 storage. That is, the stored row and column indices can begin at 967 either one (as in Fortran) or zero. See the users' manual for details. 968 969 Specify the preallocated storage with either nz or nnz (not both). 970 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 971 allocation. For additional details, see the users manual chapter on 972 matrices and the file $(PETSC_DIR)/Performance. 973 974 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 975 @*/ 976 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 977 { 978 Mat B; 979 Mat_SeqBAIJ *b; 980 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 981 982 if (mbs*bs!=m || nbs*bs!=n) 983 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 984 985 *A = 0; 986 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 987 PLogObjectCreate(B); 988 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 989 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 990 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 991 switch (bs) { 992 case 1: 993 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 994 break; 995 case 2: 996 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 997 break; 998 case 3: 999 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1000 break; 1001 case 4: 1002 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1003 break; 1004 case 5: 1005 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1006 break; 1007 } 1008 B->destroy = MatDestroy_SeqBAIJ; 1009 B->view = MatView_SeqBAIJ; 1010 B->factor = 0; 1011 B->lupivotthreshold = 1.0; 1012 b->row = 0; 1013 b->col = 0; 1014 b->reallocs = 0; 1015 1016 b->m = m; B->m = m; B->M = m; 1017 b->n = n; B->n = n; B->N = n; 1018 b->mbs = mbs; 1019 b->nbs = nbs; 1020 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1021 if (nnz == PETSC_NULL) { 1022 if (nz == PETSC_DEFAULT) nz = CHUNKSIZE; 1023 else if (nz <= 0) nz = 1; 1024 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1025 nz = nz*mbs; 1026 } 1027 else { 1028 nz = 0; 1029 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1030 } 1031 1032 /* allocate the matrix space */ 1033 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1034 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1035 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1036 b->j = (int *) (b->a + nz*bs2); 1037 PetscMemzero(b->j,nz*sizeof(int)); 1038 b->i = b->j + nz; 1039 b->singlemalloc = 1; 1040 1041 b->i[0] = 0; 1042 for (i=1; i<mbs+1; i++) { 1043 b->i[i] = b->i[i-1] + b->imax[i-1]; 1044 } 1045 1046 /* b->ilen will count nonzeros in each block row so far. */ 1047 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1048 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1049 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1050 1051 b->bs = bs; 1052 b->mbs = mbs; 1053 b->nz = 0; 1054 b->maxnz = nz; 1055 b->sorted = 0; 1056 b->roworiented = 1; 1057 b->nonew = 0; 1058 b->diag = 0; 1059 b->solve_work = 0; 1060 b->mult_work = 0; 1061 b->spptr = 0; 1062 b->indexshift = 0; 1063 *A = B; 1064 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1065 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1066 return 0; 1067 } 1068 1069 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1070 { 1071 Mat C; 1072 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1073 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs; 1074 1075 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1076 1077 *B = 0; 1078 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1079 PLogObjectCreate(C); 1080 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1081 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1082 C->destroy = MatDestroy_SeqBAIJ; 1083 C->view = MatView_SeqBAIJ; 1084 C->factor = A->factor; 1085 c->row = 0; 1086 c->col = 0; 1087 C->assembled = PETSC_TRUE; 1088 1089 c->m = C->m = a->m; 1090 c->n = C->n = a->n; 1091 C->M = a->m; 1092 C->N = a->n; 1093 1094 c->bs = a->bs; 1095 c->mbs = a->mbs; 1096 c->nbs = a->nbs; 1097 c->indexshift = 0; 1098 1099 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1100 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1101 for ( i=0; i<mbs; i++ ) { 1102 c->imax[i] = a->imax[i]; 1103 c->ilen[i] = a->ilen[i]; 1104 } 1105 1106 /* allocate the matrix space */ 1107 c->singlemalloc = 1; 1108 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1109 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1110 c->j = (int *) (c->a + nz*bs2); 1111 c->i = c->j + nz; 1112 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1113 if (mbs > 0) { 1114 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1115 if (cpvalues == COPY_VALUES) { 1116 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1117 } 1118 } 1119 1120 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1121 c->sorted = a->sorted; 1122 c->roworiented = a->roworiented; 1123 c->nonew = a->nonew; 1124 1125 if (a->diag) { 1126 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1127 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1128 for ( i=0; i<mbs; i++ ) { 1129 c->diag[i] = a->diag[i]; 1130 } 1131 } 1132 else c->diag = 0; 1133 c->nz = a->nz; 1134 c->maxnz = a->maxnz; 1135 c->solve_work = 0; 1136 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1137 c->mult_work = 0; 1138 *B = C; 1139 return 0; 1140 } 1141 1142 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1143 { 1144 Mat_SeqBAIJ *a; 1145 Mat B; 1146 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1147 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1148 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1149 int *masked, nmask,tmp,ishift,bs2; 1150 Scalar *aa; 1151 MPI_Comm comm = ((PetscObject) viewer)->comm; 1152 1153 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1154 bs2 = bs*bs; 1155 1156 MPI_Comm_size(comm,&size); 1157 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1158 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1159 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1160 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1161 M = header[1]; N = header[2]; nz = header[3]; 1162 1163 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1164 1165 /* 1166 This code adds extra rows to make sure the number of rows is 1167 divisible by the blocksize 1168 */ 1169 mbs = M/bs; 1170 extra_rows = bs - M + bs*(mbs); 1171 if (extra_rows == bs) extra_rows = 0; 1172 else mbs++; 1173 if (extra_rows) { 1174 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 1175 } 1176 1177 /* read in row lengths */ 1178 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1179 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1180 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1181 1182 /* read in column indices */ 1183 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1184 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1185 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1186 1187 /* loop over row lengths determining block row lengths */ 1188 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1189 PetscMemzero(browlengths,mbs*sizeof(int)); 1190 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1191 PetscMemzero(mask,mbs*sizeof(int)); 1192 masked = mask + mbs; 1193 rowcount = 0; nzcount = 0; 1194 for ( i=0; i<mbs; i++ ) { 1195 nmask = 0; 1196 for ( j=0; j<bs; j++ ) { 1197 kmax = rowlengths[rowcount]; 1198 for ( k=0; k<kmax; k++ ) { 1199 tmp = jj[nzcount++]/bs; 1200 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1201 } 1202 rowcount++; 1203 } 1204 browlengths[i] += nmask; 1205 /* zero out the mask elements we set */ 1206 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1207 } 1208 1209 /* create our matrix */ 1210 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1211 CHKERRQ(ierr); 1212 B = *A; 1213 a = (Mat_SeqBAIJ *) B->data; 1214 1215 /* set matrix "i" values */ 1216 a->i[0] = 0; 1217 for ( i=1; i<= mbs; i++ ) { 1218 a->i[i] = a->i[i-1] + browlengths[i-1]; 1219 a->ilen[i-1] = browlengths[i-1]; 1220 } 1221 a->indexshift = 0; 1222 a->nz = 0; 1223 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1224 1225 /* read in nonzero values */ 1226 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1227 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1228 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1229 1230 /* set "a" and "j" values into matrix */ 1231 nzcount = 0; jcount = 0; 1232 for ( i=0; i<mbs; i++ ) { 1233 nzcountb = nzcount; 1234 nmask = 0; 1235 for ( j=0; j<bs; j++ ) { 1236 kmax = rowlengths[i*bs+j]; 1237 for ( k=0; k<kmax; k++ ) { 1238 tmp = jj[nzcount++]/bs; 1239 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1240 } 1241 rowcount++; 1242 } 1243 /* sort the masked values */ 1244 PetscSortInt(nmask,masked); 1245 1246 /* set "j" values into matrix */ 1247 maskcount = 1; 1248 for ( j=0; j<nmask; j++ ) { 1249 a->j[jcount++] = masked[j]; 1250 mask[masked[j]] = maskcount++; 1251 } 1252 /* set "a" values into matrix */ 1253 ishift = bs2*a->i[i]; 1254 for ( j=0; j<bs; j++ ) { 1255 kmax = rowlengths[i*bs+j]; 1256 for ( k=0; k<kmax; k++ ) { 1257 tmp = jj[nzcountb]/bs ; 1258 block = mask[tmp] - 1; 1259 point = jj[nzcountb] - bs*tmp; 1260 idx = ishift + bs2*block + j + bs*point; 1261 a->a[idx] = aa[nzcountb++]; 1262 } 1263 } 1264 /* zero out the mask elements we set */ 1265 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1266 } 1267 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1268 1269 PetscFree(rowlengths); 1270 PetscFree(browlengths); 1271 PetscFree(aa); 1272 PetscFree(jj); 1273 PetscFree(mask); 1274 1275 B->assembled = PETSC_TRUE; 1276 1277 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1278 if (flg) { 1279 Viewer tviewer; 1280 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1281 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1282 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1283 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1284 } 1285 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1286 if (flg) { 1287 Viewer tviewer; 1288 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1289 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1290 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1291 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1292 } 1293 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1294 if (flg) { 1295 Viewer tviewer; 1296 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1297 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1298 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1299 } 1300 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1301 if (flg) { 1302 Viewer tviewer; 1303 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1304 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1305 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1306 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1307 } 1308 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1309 if (flg) { 1310 Viewer tviewer; 1311 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1312 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1313 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1314 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1315 } 1316 return 0; 1317 } 1318 1319 1320 1321