1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.32 1996/04/09 02:23:33 curfman Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 #define max(a,b) (a>b ? a:b) 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = a->indexshift; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45 CHKERRQ(ierr); 46 ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 47 PetscFree(ia); PetscFree(ja); 48 } else { 49 if (ishift == oshift) { 50 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51 } 52 else if (ishift == -1) { 53 /* temporarily subtract 1 from i and j indices */ 54 int nz = a->i[a->n] - 1; 55 for ( i=0; i<nz; i++ ) a->j[i]--; 56 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58 for ( i=0; i<nz; i++ ) a->j[i]++; 59 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60 } else { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[a->n] - 1; 63 for ( i=0; i<nz; i++ ) a->j[i]++; 64 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66 for ( i=0; i<nz; i++ ) a->j[i]--; 67 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68 } 69 } 70 return 0; 71 } 72 73 /* 74 Adds diagonal pointers to sparse matrix structure. 75 */ 76 77 int MatMarkDiag_SeqBAIJ(Mat A) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,j, *diag, m = a->mbs; 81 82 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83 PLogObjectMemory(A,(m+1)*sizeof(int)); 84 for ( i=0; i<m; i++ ) { 85 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86 if (a->j[j] == i) { 87 diag[i] = j; 88 break; 89 } 90 } 91 } 92 a->diag = diag; 93 return 0; 94 } 95 96 #include "draw.h" 97 #include "pinclude/pviewer.h" 98 #include "sys.h" 99 100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 101 { 102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 103 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs; 104 Scalar *aa; 105 106 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 107 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 108 col_lens[0] = MAT_COOKIE; 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs2; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs2*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 return 0; 153 } 154 155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 156 { 157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 158 int ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs; 159 FILE *fd; 160 char *outputname; 161 162 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 163 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 164 ierr = ViewerGetFormat(viewer,&format); 165 if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 166 fprintf(fd," block size is %d\n",bs); 167 } 168 else if (format == ASCII_FORMAT_MATLAB) { 169 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 170 } 171 else if (format == ASCII_FORMAT_COMMON) { 172 for ( i=0; i<a->mbs; i++ ) { 173 for ( j=0; j<bs; j++ ) { 174 fprintf(fd,"row %d:",i*bs+j); 175 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 176 for ( l=0; l<bs; l++ ) { 177 #if defined(PETSC_COMPLEX) 178 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 180 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 181 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 182 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 183 #else 184 if (a->a[bs2*k + l*bs + j] != 0.0) 185 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 186 #endif 187 } 188 } 189 fprintf(fd,"\n"); 190 } 191 } 192 } 193 else { 194 for ( i=0; i<a->mbs; i++ ) { 195 for ( j=0; j<bs; j++ ) { 196 fprintf(fd,"row %d:",i*bs+j); 197 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 198 for ( l=0; l<bs; l++ ) { 199 #if defined(PETSC_COMPLEX) 200 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 201 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 202 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 203 } 204 else { 205 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 206 } 207 #else 208 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 209 #endif 210 } 211 } 212 fprintf(fd,"\n"); 213 } 214 } 215 } 216 fflush(fd); 217 return 0; 218 } 219 220 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 221 { 222 Mat A = (Mat) obj; 223 ViewerType vtype; 224 int ierr; 225 226 if (!viewer) { 227 viewer = STDOUT_VIEWER_SELF; 228 } 229 230 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 231 if (vtype == MATLAB_VIEWER) { 232 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 233 } 234 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 235 return MatView_SeqBAIJ_ASCII(A,viewer); 236 } 237 else if (vtype == BINARY_FILE_VIEWER) { 238 return MatView_SeqBAIJ_Binary(A,viewer); 239 } 240 else if (vtype == DRAW_VIEWER) { 241 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 242 } 243 return 0; 244 } 245 246 #define CHUNKSIZE 10 247 248 /* This version has row oriented v */ 249 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 250 { 251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 252 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 253 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 254 int *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol; 255 int ridx,cidx,bs2=bs*bs; 256 Scalar *ap,value,*aa=a->a,*bap; 257 258 for ( k=0; k<m; k++ ) { /* loop over added rows */ 259 row = im[k]; brow = row/bs; 260 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 261 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 262 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 263 rmax = imax[brow]; nrow = ailen[brow]; 264 low = 0; 265 for ( l=0; l<n; l++ ) { /* loop over added columns */ 266 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 267 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 268 col = in[l] - shift; bcol = col/bs; 269 ridx = row % bs; cidx = col % bs; 270 if (roworiented) { 271 value = *v++; 272 } 273 else { 274 value = v[k + l*m]; 275 } 276 if (!sorted) low = 0; high = nrow; 277 while (high-low > 5) { 278 t = (low+high)/2; 279 if (rp[t] > bcol) high = t; 280 else low = t; 281 } 282 for ( i=low; i<high; i++ ) { 283 if (rp[i] > bcol) break; 284 if (rp[i] == bcol) { 285 bap = ap + bs2*i + bs*cidx + ridx; 286 if (is == ADD_VALUES) *bap += value; 287 else *bap = value; 288 goto noinsert; 289 } 290 } 291 if (nonew) goto noinsert; 292 if (nrow >= rmax) { 293 /* there is no extra room in row, therefore enlarge */ 294 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 295 Scalar *new_a; 296 297 /* malloc new storage space */ 298 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 299 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 300 new_j = (int *) (new_a + bs2*new_nz); 301 new_i = new_j + new_nz; 302 303 /* copy over old data into new slots */ 304 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 305 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 306 PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int)); 307 len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift); 308 PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow, 309 len*sizeof(int)); 310 PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar)); 311 PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 312 PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE), 313 aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar)); 314 /* free up old matrix storage */ 315 PetscFree(a->a); 316 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 317 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 318 a->singlemalloc = 1; 319 320 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 321 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 322 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 323 a->maxnz += CHUNKSIZE; 324 a->reallocs++; 325 a->nz++; 326 } 327 N = nrow++ - 1; 328 /* shift up all the later entries in this row */ 329 for ( ii=N; ii>=i; ii-- ) { 330 rp[ii+1] = rp[ii]; 331 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 332 } 333 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 334 rp[i] = bcol; 335 ap[bs2*i + bs*cidx + ridx] = value; 336 noinsert:; 337 low = i; 338 } 339 ailen[brow] = nrow; 340 } 341 return 0; 342 } 343 344 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 345 { 346 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 347 *m = a->m; *n = a->n; 348 return 0; 349 } 350 351 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 352 { 353 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 354 *m = 0; *n = a->m; 355 return 0; 356 } 357 358 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 359 { 360 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 361 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 362 Scalar *aa,*v_i,*aa_i; 363 364 bs = a->bs; 365 ai = a->i; 366 aj = a->j; 367 aa = a->a; 368 bs2 = bs*bs; 369 370 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 371 372 bn = row/bs; /* Block number */ 373 bp = row % bs; /* Block Position */ 374 M = ai[bn+1] - ai[bn]; 375 *nz = bs*M; 376 377 if (v) { 378 *v = 0; 379 if (*nz) { 380 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 381 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 382 v_i = *v + i*bs; 383 aa_i = aa + bs2*(ai[bn] + i); 384 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 385 } 386 } 387 } 388 389 if (idx) { 390 *idx = 0; 391 if (*nz) { 392 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 393 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 394 idx_i = *idx + i*bs; 395 itmp = bs*aj[ai[bn] + i]; 396 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 397 } 398 } 399 } 400 return 0; 401 } 402 403 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 404 { 405 if (idx) {if (*idx) PetscFree(*idx);} 406 if (v) {if (*v) PetscFree(*v);} 407 return 0; 408 } 409 410 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 411 { 412 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 413 Mat C; 414 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 415 int shift=a->indexshift,*rows,*cols,bs2=bs*bs; 416 Scalar *array=a->a; 417 418 if (B==PETSC_NULL && mbs!=nbs) 419 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 420 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 421 PetscMemzero(col,(1+nbs)*sizeof(int)); 422 if (shift) { 423 for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1; 424 } 425 for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1; 426 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 427 PetscFree(col); 428 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 429 cols = rows + bs; 430 for ( i=0; i<mbs; i++ ) { 431 cols[0] = i*bs; 432 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 433 len = ai[i+1] - ai[i]; 434 for ( j=0; j<len; j++ ) { 435 rows[0] = (*aj++)*bs; 436 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 437 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 438 array += bs2; 439 } 440 } 441 PetscFree(rows); 442 if (shift) { 443 for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1; 444 } 445 446 ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 447 ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 448 449 if (B != PETSC_NULL) { 450 *B = C; 451 } else { 452 /* This isn't really an in-place transpose */ 453 PetscFree(a->a); 454 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 455 if (a->diag) PetscFree(a->diag); 456 if (a->ilen) PetscFree(a->ilen); 457 if (a->imax) PetscFree(a->imax); 458 if (a->solve_work) PetscFree(a->solve_work); 459 PetscFree(a); 460 PetscMemcpy(A,C,sizeof(struct _Mat)); 461 PetscHeaderDestroy(C); 462 } 463 return 0; 464 } 465 466 467 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 468 { 469 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 470 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 471 int m = a->m,*ip, N, *ailen = a->ilen,shift = a->indexshift; 472 int bs = a->bs, mbs = a->mbs, bs2 = bs*bs; 473 Scalar *aa = a->a, *ap; 474 475 if (mode == FLUSH_ASSEMBLY) return 0; 476 477 for ( i=1; i<mbs; i++ ) { 478 /* move each row back by the amount of empty slots (fshift) before it*/ 479 fshift += imax[i-1] - ailen[i-1]; 480 if (fshift) { 481 ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift; 482 N = ailen[i]; 483 for ( j=0; j<N; j++ ) { 484 ip[j-fshift] = ip[j]; 485 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 486 } 487 } 488 ai[i] = ai[i-1] + ailen[i-1]; 489 } 490 if (mbs) { 491 fshift += imax[mbs-1] - ailen[mbs-1]; 492 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 493 } 494 /* reset ilen and imax for each row */ 495 for ( i=0; i<mbs; i++ ) { 496 ailen[i] = imax[i] = ai[i+1] - ai[i]; 497 } 498 a->nz = ai[mbs] + shift; 499 500 /* diagonals may have moved, so kill the diagonal pointers */ 501 if (fshift && a->diag) { 502 PetscFree(a->diag); 503 PLogObjectMemory(A,-(m+1)*sizeof(int)); 504 a->diag = 0; 505 } 506 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n", 507 fshift,a->nz,m); 508 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n", 509 a->reallocs); 510 return 0; 511 } 512 513 static int MatZeroEntries_SeqBAIJ(Mat A) 514 { 515 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 516 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 517 return 0; 518 } 519 520 int MatDestroy_SeqBAIJ(PetscObject obj) 521 { 522 Mat A = (Mat) obj; 523 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 524 525 #if defined(PETSC_LOG) 526 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 527 #endif 528 PetscFree(a->a); 529 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 530 if (a->diag) PetscFree(a->diag); 531 if (a->ilen) PetscFree(a->ilen); 532 if (a->imax) PetscFree(a->imax); 533 if (a->solve_work) PetscFree(a->solve_work); 534 if (a->mult_work) PetscFree(a->mult_work); 535 PetscFree(a); 536 PLogObjectDestroy(A); 537 PetscHeaderDestroy(A); 538 return 0; 539 } 540 541 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 542 { 543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 544 if (op == ROW_ORIENTED) a->roworiented = 1; 545 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 546 else if (op == COLUMNS_SORTED) a->sorted = 1; 547 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 548 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 549 else if (op == ROWS_SORTED || 550 op == SYMMETRIC_MATRIX || 551 op == STRUCTURALLY_SYMMETRIC_MATRIX || 552 op == YES_NEW_DIAGONALS) 553 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 554 else if (op == NO_NEW_DIAGONALS) 555 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 556 else 557 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 558 return 0; 559 } 560 561 562 /* -------------------------------------------------------*/ 563 /* Should check that shapes of vectors and matrices match */ 564 /* -------------------------------------------------------*/ 565 #include "pinclude/plapack.h" 566 567 static int MatMultAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz) 568 { 569 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 570 Scalar *xg,*y,*zg; 571 register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5; 572 register Scalar x1,x2,x3,x4,x5; 573 int mbs=a->mbs,m=a->m,i,*idx,*ii; 574 int bs=a->bs,j,n,bs2=bs*bs,ierr; 575 576 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 577 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 578 579 if (yy==PETSC_NULL) PetscMemzero(z,m*sizeof(Scalar)); /* MatMult() */ 580 else if (zz!=yy){ 581 ierr = VecGetArray(yy,&y); CHKERRQ(ierr); 582 PetscMemcpy(z,y,m*sizeof(Scalar)); 583 } 584 585 idx = a->j; 586 v = a->a; 587 ii = a->i; 588 589 switch (bs) { 590 case 1: 591 for ( i=0; i<mbs; i++ ) { 592 n = ii[1] - ii[0]; ii++; 593 sum = 0.0; 594 while (n--) sum += *v++ * x[*idx++]; 595 z[i] += sum; 596 } 597 break; 598 case 2: 599 for ( i=0; i<mbs; i++ ) { 600 n = ii[1] - ii[0]; ii++; 601 sum1 = 0.0; sum2 = 0.0; 602 for ( j=0; j<n; j++ ) { 603 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 604 sum1 += v[0]*x1 + v[2]*x2; 605 sum2 += v[1]*x1 + v[3]*x2; 606 v += 4; 607 } 608 z[0] += sum1; z[1] += sum2; 609 z += 2; 610 } 611 break; 612 case 3: 613 for ( i=0; i<mbs; i++ ) { 614 n = ii[1] - ii[0]; ii++; 615 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 616 for ( j=0; j<n; j++ ) { 617 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 618 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 619 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 620 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 621 v += 9; 622 } 623 z[0] += sum1; z[1] += sum2; z[2] += sum3; 624 z += 3; 625 } 626 break; 627 case 4: 628 for ( i=0; i<mbs; i++ ) { 629 n = ii[1] - ii[0]; ii++; 630 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 631 for ( j=0; j<n; j++ ) { 632 xb = x + 4*(*idx++); 633 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 634 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 635 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 636 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 637 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 638 v += 16; 639 } 640 z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; 641 z += 4; 642 } 643 break; 644 case 5: 645 for ( i=0; i<mbs; i++ ) { 646 n = ii[1] - ii[0]; ii++; 647 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 648 for ( j=0; j<n; j++ ) { 649 xb = x + 5*(*idx++); 650 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 651 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 652 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 653 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 654 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 655 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 656 v += 25; 657 } 658 z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; z[4] += sum5; 659 z += 5; 660 } 661 break; 662 /* block sizes larger then 5 by 5 are handled by BLAS */ 663 default: { 664 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 665 if (!a->mult_work) { 666 k = max(a->m,a->n); 667 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 668 CHKPTRQ(a->mult_work); 669 } 670 work = a->mult_work; 671 for ( i=0; i<mbs; i++ ) { 672 n = ii[1] - ii[0]; ii++; 673 ncols = n*bs; 674 workt = work; 675 for ( j=0; j<n; j++ ) { 676 xb = x + bs*(*idx++); 677 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 678 workt += bs; 679 } 680 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 681 v += n*bs2; 682 z += bs; 683 } 684 } 685 } 686 return 0; 687 } 688 689 static int MatMult_SeqBAIJ(Mat A,Vec xx, Vec yy) 690 { 691 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 692 int ierr,bs=a->bs,bs2=bs*bs; 693 694 ierr = MatMultAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr); 695 PLogFlops(2*bs2*(a->nz)-a->m); 696 return 0; 697 } 698 699 static int MatMultAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 700 { 701 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 702 int ierr,bs=a->bs,bs2=bs*bs; 703 704 ierr = MatMultAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr); 705 PLogFlops(2*bs2*(a->nz)); 706 return 0; 707 } 708 709 static int MatMultTransAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz) 710 { 711 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 712 Scalar *xg,*y,*zg,*zb; 713 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 714 int mbs=a->mbs,N=a->n,i,*idx,*ii,*ai=a->i,rval; 715 int bs=a->bs,j,n,bs2=bs*bs,*ib,ierr; 716 717 718 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 719 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 720 721 if (yy==PETSC_NULL) PetscMemzero(z,N*sizeof(Scalar)); /* MatMultTrans() */ 722 else if (zz!=yy){ 723 ierr = VecGetArray(yy,&y); CHKERRQ(ierr); 724 PetscMemcpy(z,y,N*sizeof(Scalar)); 725 } 726 727 idx = a->j; 728 v = a->a; 729 ii = a->i; 730 731 switch (bs) { 732 case 1: 733 for ( i=0; i<mbs; i++ ) { 734 n = ii[1] - ii[0]; ii++; 735 xb = x + i; x1 = xb[0]; 736 ib = idx + ai[i]; 737 for ( j=0; j<n; j++ ) { 738 z[ib[j]] += *v++ * x1; 739 } 740 } 741 break; 742 case 2: 743 for ( i=0; i<mbs; i++ ) { 744 n = ii[1] - ii[0]; ii++; 745 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 746 ib = idx + ai[i]; 747 for ( j=0; j<n; j++ ) { 748 rval = ib[j]*2; 749 z[rval++] += v[0]*x1 + v[1]*x2; 750 z[rval++] += v[2]*x1 + v[3]*x2; 751 v += 4; 752 } 753 } 754 break; 755 case 3: 756 for ( i=0; i<mbs; i++ ) { 757 n = ii[1] - ii[0]; ii++; 758 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 759 ib = idx + ai[i]; 760 for ( j=0; j<n; j++ ) { 761 rval = ib[j]*3; 762 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 763 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 764 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 765 v += 9; 766 } 767 } 768 break; 769 case 4: 770 for ( i=0; i<mbs; i++ ) { 771 n = ii[1] - ii[0]; ii++; 772 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 773 ib = idx + ai[i]; 774 for ( j=0; j<n; j++ ) { 775 rval = ib[j]*4; 776 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 777 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 778 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 779 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 780 v += 16; 781 } 782 } 783 break; 784 case 5: 785 for ( i=0; i<mbs; i++ ) { 786 n = ii[1] - ii[0]; ii++; 787 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 788 x4 = xb[3]; x5 = xb[4]; 789 ib = idx + ai[i]; 790 for ( j=0; j<n; j++ ) { 791 rval = ib[j]*5; 792 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 793 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 794 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 795 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 796 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 797 v += 25; 798 } 799 } 800 break; 801 /* block sizes larger then 5 by 5 are handled by BLAS */ 802 default: { 803 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 804 if (!a->mult_work) { 805 k = max(a->m,a->n); 806 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 807 CHKPTRQ(a->mult_work); 808 } 809 work = a->mult_work; 810 for ( i=0; i<mbs; i++ ) { 811 n = ii[1] - ii[0]; ii++; 812 ncols = n*bs; 813 PetscMemzero(work,ncols*sizeof(Scalar)); 814 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 815 v += n*bs2; 816 x += bs; 817 workt = work; 818 for ( j=0; j<n; j++ ) { 819 zb = z + bs*(*idx++); 820 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 821 workt += bs; 822 } 823 } 824 } 825 } 826 PLogFlops(2*bs2*(a->nz)); 827 return 0; 828 } 829 830 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx, Vec yy) 831 { 832 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 833 int ierr,bs=a->bs,bs2=bs*bs; 834 835 ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr); 836 PLogFlops(2*bs2*(a->nz)-a->n); 837 return 0; 838 } 839 840 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 841 { 842 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 843 int ierr,bs=a->bs,bs2=bs*bs; 844 845 ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr); 846 PLogFlops(2*bs2*(a->nz)); 847 return 0; 848 } 849 850 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 851 { 852 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 853 if (nz) *nz = a->bs*a->bs*a->nz; 854 if (nza) *nza = a->maxnz; 855 if (mem) *mem = (int)A->mem; 856 return 0; 857 } 858 859 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 860 { 861 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 862 863 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 864 865 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 866 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 867 (a->nz != b->nz)||(a->indexshift != b->indexshift)) { 868 *flg = PETSC_FALSE; return 0; 869 } 870 871 /* if the a->i are the same */ 872 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 873 *flg = PETSC_FALSE; return 0; 874 } 875 876 /* if a->j are the same */ 877 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 878 *flg = PETSC_FALSE; return 0; 879 } 880 881 /* if a->a are the same */ 882 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 883 *flg = PETSC_FALSE; return 0; 884 } 885 *flg = PETSC_TRUE; 886 return 0; 887 888 } 889 890 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 891 { 892 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 893 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 894 Scalar *x, zero = 0.0,*aa,*aa_j; 895 896 bs = a->bs; 897 aa = a->a; 898 ai = a->i; 899 aj = a->j; 900 ambs = a->mbs; 901 bs2 = bs*bs; 902 903 VecSet(&zero,v); 904 VecGetArray(v,&x); VecGetLocalSize(v,&n); 905 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 906 for ( i=0; i<ambs; i++ ) { 907 for ( j=ai[i]; j<ai[i+1]; j++ ) { 908 if (aj[j] == i) { 909 row = i*bs; 910 aa_j = aa+j*bs2; 911 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 912 break; 913 } 914 } 915 } 916 return 0; 917 } 918 919 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 920 { 921 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 922 Scalar *l,*r,x,*v,*aa,*li,*ri; 923 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 924 925 ai = a->i; 926 aj = a->j; 927 aa = a->a; 928 m = a->m; 929 n = a->n; 930 bs = a->bs; 931 mbs = a->mbs; 932 bs2 = bs*bs; 933 if (ll) { 934 VecGetArray(ll,&l); VecGetSize(ll,&lm); 935 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 936 for ( i=0; i<mbs; i++ ) { /* for each block row */ 937 M = ai[i+1] - ai[i]; 938 li = l + i*bs; 939 v = aa + bs2*ai[i]; 940 for ( j=0; j<M; j++ ) { /* for each block */ 941 for ( k=0; k<bs2; k++ ) { 942 (*v++) *= li[k%bs]; 943 } 944 } 945 } 946 } 947 948 if (rr) { 949 VecGetArray(rr,&r); VecGetSize(rr,&rn); 950 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 951 for ( i=0; i<mbs; i++ ) { /* for each block row */ 952 M = ai[i+1] - ai[i]; 953 v = aa + bs2*ai[i]; 954 for ( j=0; j<M; j++ ) { /* for each block */ 955 ri = r + bs*aj[ai[i]+j]; 956 for ( k=0; k<bs; k++ ) { 957 x = ri[k]; 958 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 959 } 960 } 961 } 962 } 963 return 0; 964 } 965 966 967 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 968 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 969 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 970 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 971 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 972 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 973 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 974 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 975 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 976 977 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 978 { 979 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 980 Scalar *v = a->a; 981 double sum = 0.0; 982 int i; 983 984 if (type == NORM_FROBENIUS) { 985 for (i=0; i<a->nz; i++ ) { 986 #if defined(PETSC_COMPLEX) 987 sum += real(conj(*v)*(*v)); v++; 988 #else 989 sum += (*v)*(*v); v++; 990 #endif 991 } 992 *norm = sqrt(sum); 993 } 994 else { 995 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 996 } 997 return 0; 998 } 999 1000 /* 1001 note: This can only work for identity for row and col. It would 1002 be good to check this and otherwise generate an error. 1003 */ 1004 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1005 { 1006 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1007 Mat outA; 1008 int ierr; 1009 1010 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 1011 1012 outA = inA; 1013 inA->factor = FACTOR_LU; 1014 a->row = row; 1015 a->col = col; 1016 1017 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1018 1019 if (!a->diag) { 1020 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1021 } 1022 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1023 return 0; 1024 } 1025 1026 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1027 { 1028 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1029 int one = 1, totalnz = a->bs*a->bs*a->nz; 1030 BLscal_( &totalnz, alpha, a->a, &one ); 1031 PLogFlops(totalnz); 1032 return 0; 1033 } 1034 1035 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1036 { 1037 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1038 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1039 int *ai = a->i, *ailen = a->ilen, shift = a->indexshift; 1040 int brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs; 1041 Scalar *ap, *aa = a->a, zero = 0.0; 1042 1043 for ( k=0; k<m; k++ ) { /* loop over rows */ 1044 row = im[k]; brow = row/bs; 1045 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1046 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1047 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 1048 nrow = ailen[brow]; 1049 for ( l=0; l<n; l++ ) { /* loop over columns */ 1050 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1051 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1052 col = in[l] - shift; 1053 bcol = col/bs; 1054 cidx = col%bs; 1055 ridx = row%bs; 1056 high = nrow; 1057 low = 0; /* assume unsorted */ 1058 while (high-low > 5) { 1059 t = (low+high)/2; 1060 if (rp[t] > bcol) high = t; 1061 else low = t; 1062 } 1063 for ( i=low; i<high; i++ ) { 1064 if (rp[i] > bcol) break; 1065 if (rp[i] == bcol) { 1066 *v++ = ap[bs2*i+bs*cidx+ridx]; 1067 goto finished; 1068 } 1069 } 1070 *v++ = zero; 1071 finished:; 1072 } 1073 } 1074 return 0; 1075 } 1076 1077 int MatPrintHelp_SeqBAIJ(Mat A) 1078 { 1079 static int called = 0; 1080 1081 if (called) return 0; else called = 1; 1082 return 0; 1083 } 1084 /* -------------------------------------------------------------------*/ 1085 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1086 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1087 MatMult_SeqBAIJ,MatMultAdd_SeqBAIJ, 1088 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1089 MatSolve_SeqBAIJ,0, 1090 0,0, 1091 MatLUFactor_SeqBAIJ,0, 1092 0, 1093 MatTranspose_SeqBAIJ, 1094 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1095 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1096 0,MatAssemblyEnd_SeqBAIJ, 1097 0, 1098 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 1099 MatGetReordering_SeqBAIJ, 1100 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1101 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1102 MatILUFactorSymbolic_SeqBAIJ,0, 1103 0,0,/* MatConvert_SeqBAIJ */ 0, 1104 0,0, 1105 MatConvertSameType_SeqBAIJ,0,0, 1106 MatILUFactor_SeqBAIJ,0,0, 1107 0,0, 1108 MatGetValues_SeqBAIJ,0, 1109 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1110 0}; 1111 1112 /*@C 1113 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1114 compressed row) format. For good matrix assembly performance the 1115 user should preallocate the matrix storage by setting the parameter nz 1116 (or nzz). By setting these parameters accurately, performance can be 1117 increased by more than a factor of 50. 1118 1119 Input Parameters: 1120 . comm - MPI communicator, set to MPI_COMM_SELF 1121 . bs - size of block 1122 . m - number of rows 1123 . n - number of columns 1124 . nz - number of block nonzeros per block row (same for all rows) 1125 . nzz - number of block nonzeros per block row or PETSC_NULL 1126 (possibly different for each row) 1127 1128 Output Parameter: 1129 . A - the matrix 1130 1131 Notes: 1132 The block AIJ format is fully compatible with standard Fortran 77 1133 storage. That is, the stored row and column indices can begin at 1134 either one (as in Fortran) or zero. See the users' manual for details. 1135 1136 Specify the preallocated storage with either nz or nnz (not both). 1137 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1138 allocation. For additional details, see the users manual chapter on 1139 matrices and the file $(PETSC_DIR)/Performance. 1140 1141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1142 @*/ 1143 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1144 { 1145 Mat B; 1146 Mat_SeqBAIJ *b; 1147 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1148 1149 if (mbs*bs!=m || nbs*bs!=n) 1150 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1151 1152 *A = 0; 1153 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1154 PLogObjectCreate(B); 1155 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1156 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1157 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1158 switch (bs) { 1159 case 1: 1160 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1161 break; 1162 case 2: 1163 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1164 break; 1165 case 3: 1166 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1167 break; 1168 case 4: 1169 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1170 break; 1171 case 5: 1172 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1173 break; 1174 } 1175 B->destroy = MatDestroy_SeqBAIJ; 1176 B->view = MatView_SeqBAIJ; 1177 B->factor = 0; 1178 B->lupivotthreshold = 1.0; 1179 b->row = 0; 1180 b->col = 0; 1181 b->reallocs = 0; 1182 1183 b->m = m; B->m = m; B->M = m; 1184 b->n = n; B->n = n; B->N = n; 1185 b->mbs = mbs; 1186 b->nbs = nbs; 1187 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1188 if (nnz == PETSC_NULL) { 1189 if (nz == PETSC_DEFAULT) nz = 5; 1190 else if (nz <= 0) nz = 1; 1191 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1192 nz = nz*mbs; 1193 } 1194 else { 1195 nz = 0; 1196 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1197 } 1198 1199 /* allocate the matrix space */ 1200 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1201 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1202 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1203 b->j = (int *) (b->a + nz*bs2); 1204 PetscMemzero(b->j,nz*sizeof(int)); 1205 b->i = b->j + nz; 1206 b->singlemalloc = 1; 1207 1208 b->i[0] = 0; 1209 for (i=1; i<mbs+1; i++) { 1210 b->i[i] = b->i[i-1] + b->imax[i-1]; 1211 } 1212 1213 /* b->ilen will count nonzeros in each block row so far. */ 1214 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1215 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1216 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1217 1218 b->bs = bs; 1219 b->mbs = mbs; 1220 b->nz = 0; 1221 b->maxnz = nz; 1222 b->sorted = 0; 1223 b->roworiented = 1; 1224 b->nonew = 0; 1225 b->diag = 0; 1226 b->solve_work = 0; 1227 b->mult_work = 0; 1228 b->spptr = 0; 1229 b->indexshift = 0; 1230 *A = B; 1231 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1232 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1233 return 0; 1234 } 1235 1236 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1237 { 1238 Mat C; 1239 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1240 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs; 1241 1242 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1243 1244 *B = 0; 1245 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1246 PLogObjectCreate(C); 1247 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1248 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1249 C->destroy = MatDestroy_SeqBAIJ; 1250 C->view = MatView_SeqBAIJ; 1251 C->factor = A->factor; 1252 c->row = 0; 1253 c->col = 0; 1254 C->assembled = PETSC_TRUE; 1255 1256 c->m = C->m = a->m; 1257 c->n = C->n = a->n; 1258 C->M = a->m; 1259 C->N = a->n; 1260 1261 c->bs = a->bs; 1262 c->mbs = a->mbs; 1263 c->nbs = a->nbs; 1264 c->indexshift = 0; 1265 1266 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1267 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1268 for ( i=0; i<mbs; i++ ) { 1269 c->imax[i] = a->imax[i]; 1270 c->ilen[i] = a->ilen[i]; 1271 } 1272 1273 /* allocate the matrix space */ 1274 c->singlemalloc = 1; 1275 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1276 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1277 c->j = (int *) (c->a + nz*bs2); 1278 c->i = c->j + nz; 1279 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1280 if (mbs > 0) { 1281 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1282 if (cpvalues == COPY_VALUES) { 1283 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1284 } 1285 } 1286 1287 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1288 c->sorted = a->sorted; 1289 c->roworiented = a->roworiented; 1290 c->nonew = a->nonew; 1291 1292 if (a->diag) { 1293 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1294 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1295 for ( i=0; i<mbs; i++ ) { 1296 c->diag[i] = a->diag[i]; 1297 } 1298 } 1299 else c->diag = 0; 1300 c->nz = a->nz; 1301 c->maxnz = a->maxnz; 1302 c->solve_work = 0; 1303 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1304 c->mult_work = 0; 1305 *B = C; 1306 return 0; 1307 } 1308 1309 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1310 { 1311 Mat_SeqBAIJ *a; 1312 Mat B; 1313 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1314 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1315 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1316 int *masked, nmask,tmp,ishift,bs2; 1317 Scalar *aa; 1318 MPI_Comm comm = ((PetscObject) viewer)->comm; 1319 1320 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1321 bs2 = bs*bs; 1322 1323 MPI_Comm_size(comm,&size); 1324 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1325 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1326 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1327 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1328 M = header[1]; N = header[2]; nz = header[3]; 1329 1330 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1331 1332 /* 1333 This code adds extra rows to make sure the number of rows is 1334 divisible by the blocksize 1335 */ 1336 mbs = M/bs; 1337 extra_rows = bs - M + bs*(mbs); 1338 if (extra_rows == bs) extra_rows = 0; 1339 else mbs++; 1340 if (extra_rows) { 1341 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 1342 } 1343 1344 /* read in row lengths */ 1345 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1346 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1347 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1348 1349 /* read in column indices */ 1350 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1351 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1352 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1353 1354 /* loop over row lengths determining block row lengths */ 1355 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1356 PetscMemzero(browlengths,mbs*sizeof(int)); 1357 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1358 PetscMemzero(mask,mbs*sizeof(int)); 1359 masked = mask + mbs; 1360 rowcount = 0; nzcount = 0; 1361 for ( i=0; i<mbs; i++ ) { 1362 nmask = 0; 1363 for ( j=0; j<bs; j++ ) { 1364 kmax = rowlengths[rowcount]; 1365 for ( k=0; k<kmax; k++ ) { 1366 tmp = jj[nzcount++]/bs; 1367 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1368 } 1369 rowcount++; 1370 } 1371 browlengths[i] += nmask; 1372 /* zero out the mask elements we set */ 1373 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1374 } 1375 1376 /* create our matrix */ 1377 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1378 CHKERRQ(ierr); 1379 B = *A; 1380 a = (Mat_SeqBAIJ *) B->data; 1381 1382 /* set matrix "i" values */ 1383 a->i[0] = 0; 1384 for ( i=1; i<= mbs; i++ ) { 1385 a->i[i] = a->i[i-1] + browlengths[i-1]; 1386 a->ilen[i-1] = browlengths[i-1]; 1387 } 1388 a->indexshift = 0; 1389 a->nz = 0; 1390 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1391 1392 /* read in nonzero values */ 1393 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1394 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1395 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1396 1397 /* set "a" and "j" values into matrix */ 1398 nzcount = 0; jcount = 0; 1399 for ( i=0; i<mbs; i++ ) { 1400 nzcountb = nzcount; 1401 nmask = 0; 1402 for ( j=0; j<bs; j++ ) { 1403 kmax = rowlengths[i*bs+j]; 1404 for ( k=0; k<kmax; k++ ) { 1405 tmp = jj[nzcount++]/bs; 1406 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1407 } 1408 rowcount++; 1409 } 1410 /* sort the masked values */ 1411 PetscSortInt(nmask,masked); 1412 1413 /* set "j" values into matrix */ 1414 maskcount = 1; 1415 for ( j=0; j<nmask; j++ ) { 1416 a->j[jcount++] = masked[j]; 1417 mask[masked[j]] = maskcount++; 1418 } 1419 /* set "a" values into matrix */ 1420 ishift = bs2*a->i[i]; 1421 for ( j=0; j<bs; j++ ) { 1422 kmax = rowlengths[i*bs+j]; 1423 for ( k=0; k<kmax; k++ ) { 1424 tmp = jj[nzcountb]/bs ; 1425 block = mask[tmp] - 1; 1426 point = jj[nzcountb] - bs*tmp; 1427 idx = ishift + bs2*block + j + bs*point; 1428 a->a[idx] = aa[nzcountb++]; 1429 } 1430 } 1431 /* zero out the mask elements we set */ 1432 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1433 } 1434 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1435 1436 PetscFree(rowlengths); 1437 PetscFree(browlengths); 1438 PetscFree(aa); 1439 PetscFree(jj); 1440 PetscFree(mask); 1441 1442 B->assembled = PETSC_TRUE; 1443 1444 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1445 if (flg) { 1446 Viewer tviewer; 1447 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1448 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1449 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1450 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1451 } 1452 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1453 if (flg) { 1454 Viewer tviewer; 1455 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1456 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1457 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1458 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1459 } 1460 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1461 if (flg) { 1462 Viewer tviewer; 1463 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1464 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1465 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1466 } 1467 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1468 if (flg) { 1469 Viewer tviewer; 1470 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1471 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1472 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1473 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1474 } 1475 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1476 if (flg) { 1477 Viewer tviewer; 1478 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1479 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1480 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1481 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1482 } 1483 return 0; 1484 } 1485 1486 1487 1488