1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.33 1996/04/09 16:17:21 balay Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 #define max(a,b) (a>b ? a:b) 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = a->indexshift; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45 CHKERRQ(ierr); 46 ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 47 PetscFree(ia); PetscFree(ja); 48 } else { 49 if (ishift == oshift) { 50 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51 } 52 else if (ishift == -1) { 53 /* temporarily subtract 1 from i and j indices */ 54 int nz = a->i[a->n] - 1; 55 for ( i=0; i<nz; i++ ) a->j[i]--; 56 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58 for ( i=0; i<nz; i++ ) a->j[i]++; 59 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60 } else { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[a->n] - 1; 63 for ( i=0; i<nz; i++ ) a->j[i]++; 64 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66 for ( i=0; i<nz; i++ ) a->j[i]--; 67 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68 } 69 } 70 return 0; 71 } 72 73 /* 74 Adds diagonal pointers to sparse matrix structure. 75 */ 76 77 int MatMarkDiag_SeqBAIJ(Mat A) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,j, *diag, m = a->mbs; 81 82 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83 PLogObjectMemory(A,(m+1)*sizeof(int)); 84 for ( i=0; i<m; i++ ) { 85 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86 if (a->j[j] == i) { 87 diag[i] = j; 88 break; 89 } 90 } 91 } 92 a->diag = diag; 93 return 0; 94 } 95 96 #include "draw.h" 97 #include "pinclude/pviewer.h" 98 #include "sys.h" 99 100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 101 { 102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 103 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs; 104 Scalar *aa; 105 106 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 107 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 108 col_lens[0] = MAT_COOKIE; 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs2; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs2*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 return 0; 153 } 154 155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 156 { 157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 158 int ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs; 159 FILE *fd; 160 char *outputname; 161 162 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 163 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 164 ierr = ViewerGetFormat(viewer,&format); 165 if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 166 fprintf(fd," block size is %d\n",bs); 167 } 168 else if (format == ASCII_FORMAT_MATLAB) { 169 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 170 } 171 else if (format == ASCII_FORMAT_COMMON) { 172 for ( i=0; i<a->mbs; i++ ) { 173 for ( j=0; j<bs; j++ ) { 174 fprintf(fd,"row %d:",i*bs+j); 175 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 176 for ( l=0; l<bs; l++ ) { 177 #if defined(PETSC_COMPLEX) 178 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 180 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 181 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 182 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 183 #else 184 if (a->a[bs2*k + l*bs + j] != 0.0) 185 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 186 #endif 187 } 188 } 189 fprintf(fd,"\n"); 190 } 191 } 192 } 193 else { 194 for ( i=0; i<a->mbs; i++ ) { 195 for ( j=0; j<bs; j++ ) { 196 fprintf(fd,"row %d:",i*bs+j); 197 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 198 for ( l=0; l<bs; l++ ) { 199 #if defined(PETSC_COMPLEX) 200 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 201 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 202 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 203 } 204 else { 205 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 206 } 207 #else 208 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 209 #endif 210 } 211 } 212 fprintf(fd,"\n"); 213 } 214 } 215 } 216 fflush(fd); 217 return 0; 218 } 219 220 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 221 { 222 Mat A = (Mat) obj; 223 ViewerType vtype; 224 int ierr; 225 226 if (!viewer) { 227 viewer = STDOUT_VIEWER_SELF; 228 } 229 230 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 231 if (vtype == MATLAB_VIEWER) { 232 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 233 } 234 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 235 return MatView_SeqBAIJ_ASCII(A,viewer); 236 } 237 else if (vtype == BINARY_FILE_VIEWER) { 238 return MatView_SeqBAIJ_Binary(A,viewer); 239 } 240 else if (vtype == DRAW_VIEWER) { 241 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 242 } 243 return 0; 244 } 245 246 #define CHUNKSIZE 10 247 248 /* This version has row oriented v */ 249 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 250 { 251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 252 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 253 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 254 int *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol; 255 int ridx,cidx,bs2=bs*bs; 256 Scalar *ap,value,*aa=a->a,*bap; 257 258 for ( k=0; k<m; k++ ) { /* loop over added rows */ 259 row = im[k]; brow = row/bs; 260 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 261 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 262 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 263 rmax = imax[brow]; nrow = ailen[brow]; 264 low = 0; 265 for ( l=0; l<n; l++ ) { /* loop over added columns */ 266 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 267 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 268 col = in[l] - shift; bcol = col/bs; 269 ridx = row % bs; cidx = col % bs; 270 if (roworiented) { 271 value = *v++; 272 } 273 else { 274 value = v[k + l*m]; 275 } 276 if (!sorted) low = 0; high = nrow; 277 while (high-low > 5) { 278 t = (low+high)/2; 279 if (rp[t] > bcol) high = t; 280 else low = t; 281 } 282 for ( i=low; i<high; i++ ) { 283 if (rp[i] > bcol) break; 284 if (rp[i] == bcol) { 285 bap = ap + bs2*i + bs*cidx + ridx; 286 if (is == ADD_VALUES) *bap += value; 287 else *bap = value; 288 goto noinsert; 289 } 290 } 291 if (nonew) goto noinsert; 292 if (nrow >= rmax) { 293 /* there is no extra room in row, therefore enlarge */ 294 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 295 Scalar *new_a; 296 297 /* malloc new storage space */ 298 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 299 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 300 new_j = (int *) (new_a + bs2*new_nz); 301 new_i = new_j + new_nz; 302 303 /* copy over old data into new slots */ 304 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 305 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 306 PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int)); 307 len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift); 308 PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow, 309 len*sizeof(int)); 310 PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar)); 311 PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 312 PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE), 313 aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar)); 314 /* free up old matrix storage */ 315 PetscFree(a->a); 316 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 317 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 318 a->singlemalloc = 1; 319 320 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 321 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 322 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 323 a->maxnz += CHUNKSIZE; 324 a->reallocs++; 325 a->nz++; 326 } 327 N = nrow++ - 1; 328 /* shift up all the later entries in this row */ 329 for ( ii=N; ii>=i; ii-- ) { 330 rp[ii+1] = rp[ii]; 331 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 332 } 333 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 334 rp[i] = bcol; 335 ap[bs2*i + bs*cidx + ridx] = value; 336 noinsert:; 337 low = i; 338 } 339 ailen[brow] = nrow; 340 } 341 return 0; 342 } 343 344 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 345 { 346 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 347 *m = a->m; *n = a->n; 348 return 0; 349 } 350 351 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 352 { 353 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 354 *m = 0; *n = a->m; 355 return 0; 356 } 357 358 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 359 { 360 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 361 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 362 Scalar *aa,*v_i,*aa_i; 363 364 bs = a->bs; 365 ai = a->i; 366 aj = a->j; 367 aa = a->a; 368 bs2 = bs*bs; 369 370 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 371 372 bn = row/bs; /* Block number */ 373 bp = row % bs; /* Block Position */ 374 M = ai[bn+1] - ai[bn]; 375 *nz = bs*M; 376 377 if (v) { 378 *v = 0; 379 if (*nz) { 380 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 381 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 382 v_i = *v + i*bs; 383 aa_i = aa + bs2*(ai[bn] + i); 384 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 385 } 386 } 387 } 388 389 if (idx) { 390 *idx = 0; 391 if (*nz) { 392 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 393 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 394 idx_i = *idx + i*bs; 395 itmp = bs*aj[ai[bn] + i]; 396 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 397 } 398 } 399 } 400 return 0; 401 } 402 403 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 404 { 405 if (idx) {if (*idx) PetscFree(*idx);} 406 if (v) {if (*v) PetscFree(*v);} 407 return 0; 408 } 409 410 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 411 { 412 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 413 Mat C; 414 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 415 int shift=a->indexshift,*rows,*cols,bs2=bs*bs; 416 Scalar *array=a->a; 417 418 if (B==PETSC_NULL && mbs!=nbs) 419 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 420 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 421 PetscMemzero(col,(1+nbs)*sizeof(int)); 422 if (shift) { 423 for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1; 424 } 425 for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1; 426 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 427 PetscFree(col); 428 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 429 cols = rows + bs; 430 for ( i=0; i<mbs; i++ ) { 431 cols[0] = i*bs; 432 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 433 len = ai[i+1] - ai[i]; 434 for ( j=0; j<len; j++ ) { 435 rows[0] = (*aj++)*bs; 436 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 437 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 438 array += bs2; 439 } 440 } 441 PetscFree(rows); 442 if (shift) { 443 for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1; 444 } 445 446 ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 447 ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 448 449 if (B != PETSC_NULL) { 450 *B = C; 451 } else { 452 /* This isn't really an in-place transpose */ 453 PetscFree(a->a); 454 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 455 if (a->diag) PetscFree(a->diag); 456 if (a->ilen) PetscFree(a->ilen); 457 if (a->imax) PetscFree(a->imax); 458 if (a->solve_work) PetscFree(a->solve_work); 459 PetscFree(a); 460 PetscMemcpy(A,C,sizeof(struct _Mat)); 461 PetscHeaderDestroy(C); 462 } 463 return 0; 464 } 465 466 467 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 468 { 469 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 470 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 471 int m = a->m,*ip, N, *ailen = a->ilen,shift = a->indexshift; 472 int bs = a->bs, mbs = a->mbs, bs2 = bs*bs; 473 Scalar *aa = a->a, *ap; 474 475 if (mode == FLUSH_ASSEMBLY) return 0; 476 477 for ( i=1; i<mbs; i++ ) { 478 /* move each row back by the amount of empty slots (fshift) before it*/ 479 fshift += imax[i-1] - ailen[i-1]; 480 if (fshift) { 481 ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift; 482 N = ailen[i]; 483 for ( j=0; j<N; j++ ) { 484 ip[j-fshift] = ip[j]; 485 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 486 } 487 } 488 ai[i] = ai[i-1] + ailen[i-1]; 489 } 490 if (mbs) { 491 fshift += imax[mbs-1] - ailen[mbs-1]; 492 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 493 } 494 /* reset ilen and imax for each row */ 495 for ( i=0; i<mbs; i++ ) { 496 ailen[i] = imax[i] = ai[i+1] - ai[i]; 497 } 498 a->nz = ai[mbs] + shift; 499 500 /* diagonals may have moved, so kill the diagonal pointers */ 501 if (fshift && a->diag) { 502 PetscFree(a->diag); 503 PLogObjectMemory(A,-(m+1)*sizeof(int)); 504 a->diag = 0; 505 } 506 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n", 507 fshift,a->nz,m); 508 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n", 509 a->reallocs); 510 return 0; 511 } 512 513 static int MatZeroEntries_SeqBAIJ(Mat A) 514 { 515 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 516 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 517 return 0; 518 } 519 520 int MatDestroy_SeqBAIJ(PetscObject obj) 521 { 522 Mat A = (Mat) obj; 523 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 524 525 #if defined(PETSC_LOG) 526 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 527 #endif 528 PetscFree(a->a); 529 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 530 if (a->diag) PetscFree(a->diag); 531 if (a->ilen) PetscFree(a->ilen); 532 if (a->imax) PetscFree(a->imax); 533 if (a->solve_work) PetscFree(a->solve_work); 534 if (a->mult_work) PetscFree(a->mult_work); 535 PetscFree(a); 536 PLogObjectDestroy(A); 537 PetscHeaderDestroy(A); 538 return 0; 539 } 540 541 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 542 { 543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 544 if (op == ROW_ORIENTED) a->roworiented = 1; 545 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 546 else if (op == COLUMNS_SORTED) a->sorted = 1; 547 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 548 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 549 else if (op == ROWS_SORTED || 550 op == SYMMETRIC_MATRIX || 551 op == STRUCTURALLY_SYMMETRIC_MATRIX || 552 op == YES_NEW_DIAGONALS) 553 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 554 else if (op == NO_NEW_DIAGONALS) 555 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 556 else 557 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 558 return 0; 559 } 560 561 562 /* -------------------------------------------------------*/ 563 /* Should check that shapes of vectors and matrices match */ 564 /* -------------------------------------------------------*/ 565 #include "pinclude/plapack.h" 566 567 static int MatMultAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz) 568 { 569 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 570 Scalar *xg,*y,*zg; 571 register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5; 572 register Scalar x1,x2,x3,x4,x5; 573 int mbs=a->mbs,m=a->m,i,*idx,*ii; 574 int bs=a->bs,j,n,bs2=bs*bs,ierr; 575 576 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 577 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 578 579 if (yy==PETSC_NULL) PetscMemzero(z,m*sizeof(Scalar)); /* MatMult() */ 580 else if (zz!=yy){ 581 ierr = VecGetArray(yy,&y); CHKERRQ(ierr); 582 PetscMemcpy(z,y,m*sizeof(Scalar)); 583 ierr = VecRestoreArray(yy,&y); CHKERRQ(ierr); 584 } 585 586 idx = a->j; 587 v = a->a; 588 ii = a->i; 589 590 switch (bs) { 591 case 1: 592 for ( i=0; i<mbs; i++ ) { 593 n = ii[1] - ii[0]; ii++; 594 sum = 0.0; 595 while (n--) sum += *v++ * x[*idx++]; 596 z[i] += sum; 597 } 598 break; 599 case 2: 600 for ( i=0; i<mbs; i++ ) { 601 n = ii[1] - ii[0]; ii++; 602 sum1 = 0.0; sum2 = 0.0; 603 for ( j=0; j<n; j++ ) { 604 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 605 sum1 += v[0]*x1 + v[2]*x2; 606 sum2 += v[1]*x1 + v[3]*x2; 607 v += 4; 608 } 609 z[0] += sum1; z[1] += sum2; 610 z += 2; 611 } 612 break; 613 case 3: 614 for ( i=0; i<mbs; i++ ) { 615 n = ii[1] - ii[0]; ii++; 616 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 617 for ( j=0; j<n; j++ ) { 618 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 619 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 620 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 621 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 622 v += 9; 623 } 624 z[0] += sum1; z[1] += sum2; z[2] += sum3; 625 z += 3; 626 } 627 break; 628 case 4: 629 for ( i=0; i<mbs; i++ ) { 630 n = ii[1] - ii[0]; ii++; 631 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 632 for ( j=0; j<n; j++ ) { 633 xb = x + 4*(*idx++); 634 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 635 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 636 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 637 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 638 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 639 v += 16; 640 } 641 z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; 642 z += 4; 643 } 644 break; 645 case 5: 646 for ( i=0; i<mbs; i++ ) { 647 n = ii[1] - ii[0]; ii++; 648 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 649 for ( j=0; j<n; j++ ) { 650 xb = x + 5*(*idx++); 651 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 652 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 653 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 654 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 655 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 656 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 657 v += 25; 658 } 659 z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; z[4] += sum5; 660 z += 5; 661 } 662 break; 663 /* block sizes larger then 5 by 5 are handled by BLAS */ 664 default: { 665 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 666 if (!a->mult_work) { 667 k = max(a->m,a->n); 668 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 669 CHKPTRQ(a->mult_work); 670 } 671 work = a->mult_work; 672 for ( i=0; i<mbs; i++ ) { 673 n = ii[1] - ii[0]; ii++; 674 ncols = n*bs; 675 workt = work; 676 for ( j=0; j<n; j++ ) { 677 xb = x + bs*(*idx++); 678 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 679 workt += bs; 680 } 681 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 682 v += n*bs2; 683 z += bs; 684 } 685 } 686 } 687 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 688 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 689 return 0; 690 } 691 692 static int MatMult_SeqBAIJ(Mat A,Vec xx, Vec yy) 693 { 694 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 695 int ierr,bs=a->bs,bs2=bs*bs; 696 697 ierr = MatMultAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr); 698 PLogFlops(2*bs2*(a->nz)-a->m); 699 return 0; 700 } 701 702 static int MatMultAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 703 { 704 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 705 int ierr,bs=a->bs,bs2=bs*bs; 706 707 ierr = MatMultAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr); 708 PLogFlops(2*bs2*(a->nz)); 709 return 0; 710 } 711 712 static int MatMultTransAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz) 713 { 714 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 715 Scalar *xg,*y,*zg,*zb; 716 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 717 int mbs=a->mbs,N=a->n,i,*idx,*ii,*ai=a->i,rval; 718 int bs=a->bs,j,n,bs2=bs*bs,*ib,ierr; 719 720 721 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 722 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 723 724 if (yy==PETSC_NULL) PetscMemzero(z,N*sizeof(Scalar)); /* MatMultTrans() */ 725 else if (zz!=yy){ 726 ierr = VecGetArray(yy,&y); CHKERRQ(ierr); 727 PetscMemcpy(z,y,N*sizeof(Scalar)); 728 ierr = VecRestoreArray(yy,&y); CHKERRQ(ierr); 729 } 730 731 idx = a->j; 732 v = a->a; 733 ii = a->i; 734 735 switch (bs) { 736 case 1: 737 for ( i=0; i<mbs; i++ ) { 738 n = ii[1] - ii[0]; ii++; 739 xb = x + i; x1 = xb[0]; 740 ib = idx + ai[i]; 741 for ( j=0; j<n; j++ ) { 742 z[ib[j]] += *v++ * x1; 743 } 744 } 745 break; 746 case 2: 747 for ( i=0; i<mbs; i++ ) { 748 n = ii[1] - ii[0]; ii++; 749 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 750 ib = idx + ai[i]; 751 for ( j=0; j<n; j++ ) { 752 rval = ib[j]*2; 753 z[rval++] += v[0]*x1 + v[1]*x2; 754 z[rval++] += v[2]*x1 + v[3]*x2; 755 v += 4; 756 } 757 } 758 break; 759 case 3: 760 for ( i=0; i<mbs; i++ ) { 761 n = ii[1] - ii[0]; ii++; 762 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 763 ib = idx + ai[i]; 764 for ( j=0; j<n; j++ ) { 765 rval = ib[j]*3; 766 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 767 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 768 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 769 v += 9; 770 } 771 } 772 break; 773 case 4: 774 for ( i=0; i<mbs; i++ ) { 775 n = ii[1] - ii[0]; ii++; 776 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 777 ib = idx + ai[i]; 778 for ( j=0; j<n; j++ ) { 779 rval = ib[j]*4; 780 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 781 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 782 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 783 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 784 v += 16; 785 } 786 } 787 break; 788 case 5: 789 for ( i=0; i<mbs; i++ ) { 790 n = ii[1] - ii[0]; ii++; 791 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 792 x4 = xb[3]; x5 = xb[4]; 793 ib = idx + ai[i]; 794 for ( j=0; j<n; j++ ) { 795 rval = ib[j]*5; 796 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 797 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 798 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 799 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 800 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 801 v += 25; 802 } 803 } 804 break; 805 /* block sizes larger then 5 by 5 are handled by BLAS */ 806 default: { 807 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 808 if (!a->mult_work) { 809 k = max(a->m,a->n); 810 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 811 CHKPTRQ(a->mult_work); 812 } 813 work = a->mult_work; 814 for ( i=0; i<mbs; i++ ) { 815 n = ii[1] - ii[0]; ii++; 816 ncols = n*bs; 817 PetscMemzero(work,ncols*sizeof(Scalar)); 818 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 819 v += n*bs2; 820 x += bs; 821 workt = work; 822 for ( j=0; j<n; j++ ) { 823 zb = z + bs*(*idx++); 824 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 825 workt += bs; 826 } 827 } 828 } 829 } 830 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 831 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 832 return 0; 833 } 834 835 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx, Vec yy) 836 { 837 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 838 int ierr,bs=a->bs,bs2=bs*bs; 839 840 ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr); 841 PLogFlops(2*bs2*(a->nz)-a->n); 842 return 0; 843 } 844 845 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 846 { 847 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 848 int ierr,bs=a->bs,bs2=bs*bs; 849 850 ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr); 851 PLogFlops(2*bs2*(a->nz)); 852 return 0; 853 } 854 855 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 856 { 857 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 858 if (nz) *nz = a->bs*a->bs*a->nz; 859 if (nza) *nza = a->maxnz; 860 if (mem) *mem = (int)A->mem; 861 return 0; 862 } 863 864 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 865 { 866 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 867 868 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 869 870 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 871 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 872 (a->nz != b->nz)||(a->indexshift != b->indexshift)) { 873 *flg = PETSC_FALSE; return 0; 874 } 875 876 /* if the a->i are the same */ 877 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 878 *flg = PETSC_FALSE; return 0; 879 } 880 881 /* if a->j are the same */ 882 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 883 *flg = PETSC_FALSE; return 0; 884 } 885 886 /* if a->a are the same */ 887 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 888 *flg = PETSC_FALSE; return 0; 889 } 890 *flg = PETSC_TRUE; 891 return 0; 892 893 } 894 895 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 896 { 897 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 898 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 899 Scalar *x, zero = 0.0,*aa,*aa_j; 900 901 bs = a->bs; 902 aa = a->a; 903 ai = a->i; 904 aj = a->j; 905 ambs = a->mbs; 906 bs2 = bs*bs; 907 908 VecSet(&zero,v); 909 VecGetArray(v,&x); VecGetLocalSize(v,&n); 910 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 911 for ( i=0; i<ambs; i++ ) { 912 for ( j=ai[i]; j<ai[i+1]; j++ ) { 913 if (aj[j] == i) { 914 row = i*bs; 915 aa_j = aa+j*bs2; 916 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 917 break; 918 } 919 } 920 } 921 return 0; 922 } 923 924 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 925 { 926 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 927 Scalar *l,*r,x,*v,*aa,*li,*ri; 928 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 929 930 ai = a->i; 931 aj = a->j; 932 aa = a->a; 933 m = a->m; 934 n = a->n; 935 bs = a->bs; 936 mbs = a->mbs; 937 bs2 = bs*bs; 938 if (ll) { 939 VecGetArray(ll,&l); VecGetSize(ll,&lm); 940 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 941 for ( i=0; i<mbs; i++ ) { /* for each block row */ 942 M = ai[i+1] - ai[i]; 943 li = l + i*bs; 944 v = aa + bs2*ai[i]; 945 for ( j=0; j<M; j++ ) { /* for each block */ 946 for ( k=0; k<bs2; k++ ) { 947 (*v++) *= li[k%bs]; 948 } 949 } 950 } 951 } 952 953 if (rr) { 954 VecGetArray(rr,&r); VecGetSize(rr,&rn); 955 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 956 for ( i=0; i<mbs; i++ ) { /* for each block row */ 957 M = ai[i+1] - ai[i]; 958 v = aa + bs2*ai[i]; 959 for ( j=0; j<M; j++ ) { /* for each block */ 960 ri = r + bs*aj[ai[i]+j]; 961 for ( k=0; k<bs; k++ ) { 962 x = ri[k]; 963 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 964 } 965 } 966 } 967 } 968 return 0; 969 } 970 971 972 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 973 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 974 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 975 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 976 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 977 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 978 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 979 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 980 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 981 982 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 983 { 984 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 985 Scalar *v = a->a; 986 double sum = 0.0; 987 int i,bs=a->bs,nz=a->nz,bs2=bs*bs; 988 989 if (type == NORM_FROBENIUS) { 990 for (i=0; i< bs2*nz; i++ ) { 991 #if defined(PETSC_COMPLEX) 992 sum += real(conj(*v)*(*v)); v++; 993 #else 994 sum += (*v)*(*v); v++; 995 #endif 996 } 997 *norm = sqrt(sum); 998 } 999 else { 1000 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 1001 } 1002 return 0; 1003 } 1004 1005 /* 1006 note: This can only work for identity for row and col. It would 1007 be good to check this and otherwise generate an error. 1008 */ 1009 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1010 { 1011 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1012 Mat outA; 1013 int ierr; 1014 1015 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 1016 1017 outA = inA; 1018 inA->factor = FACTOR_LU; 1019 a->row = row; 1020 a->col = col; 1021 1022 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1023 1024 if (!a->diag) { 1025 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1026 } 1027 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1028 return 0; 1029 } 1030 1031 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1032 { 1033 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1034 int one = 1, totalnz = a->bs*a->bs*a->nz; 1035 BLscal_( &totalnz, alpha, a->a, &one ); 1036 PLogFlops(totalnz); 1037 return 0; 1038 } 1039 1040 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1041 { 1042 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1043 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1044 int *ai = a->i, *ailen = a->ilen, shift = a->indexshift; 1045 int brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs; 1046 Scalar *ap, *aa = a->a, zero = 0.0; 1047 1048 for ( k=0; k<m; k++ ) { /* loop over rows */ 1049 row = im[k]; brow = row/bs; 1050 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1051 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1052 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 1053 nrow = ailen[brow]; 1054 for ( l=0; l<n; l++ ) { /* loop over columns */ 1055 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1056 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1057 col = in[l] - shift; 1058 bcol = col/bs; 1059 cidx = col%bs; 1060 ridx = row%bs; 1061 high = nrow; 1062 low = 0; /* assume unsorted */ 1063 while (high-low > 5) { 1064 t = (low+high)/2; 1065 if (rp[t] > bcol) high = t; 1066 else low = t; 1067 } 1068 for ( i=low; i<high; i++ ) { 1069 if (rp[i] > bcol) break; 1070 if (rp[i] == bcol) { 1071 *v++ = ap[bs2*i+bs*cidx+ridx]; 1072 goto finished; 1073 } 1074 } 1075 *v++ = zero; 1076 finished:; 1077 } 1078 } 1079 return 0; 1080 } 1081 1082 int MatPrintHelp_SeqBAIJ(Mat A) 1083 { 1084 static int called = 0; 1085 1086 if (called) return 0; else called = 1; 1087 return 0; 1088 } 1089 /* -------------------------------------------------------------------*/ 1090 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1091 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1092 MatMult_SeqBAIJ,MatMultAdd_SeqBAIJ, 1093 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1094 MatSolve_SeqBAIJ,0, 1095 0,0, 1096 MatLUFactor_SeqBAIJ,0, 1097 0, 1098 MatTranspose_SeqBAIJ, 1099 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1100 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1101 0,MatAssemblyEnd_SeqBAIJ, 1102 0, 1103 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 1104 MatGetReordering_SeqBAIJ, 1105 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1106 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1107 MatILUFactorSymbolic_SeqBAIJ,0, 1108 0,0,/* MatConvert_SeqBAIJ */ 0, 1109 0,0, 1110 MatConvertSameType_SeqBAIJ,0,0, 1111 MatILUFactor_SeqBAIJ,0,0, 1112 0,0, 1113 MatGetValues_SeqBAIJ,0, 1114 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1115 0}; 1116 1117 /*@C 1118 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1119 compressed row) format. For good matrix assembly performance the 1120 user should preallocate the matrix storage by setting the parameter nz 1121 (or nzz). By setting these parameters accurately, performance can be 1122 increased by more than a factor of 50. 1123 1124 Input Parameters: 1125 . comm - MPI communicator, set to MPI_COMM_SELF 1126 . bs - size of block 1127 . m - number of rows 1128 . n - number of columns 1129 . nz - number of block nonzeros per block row (same for all rows) 1130 . nzz - number of block nonzeros per block row or PETSC_NULL 1131 (possibly different for each row) 1132 1133 Output Parameter: 1134 . A - the matrix 1135 1136 Notes: 1137 The block AIJ format is fully compatible with standard Fortran 77 1138 storage. That is, the stored row and column indices can begin at 1139 either one (as in Fortran) or zero. See the users' manual for details. 1140 1141 Specify the preallocated storage with either nz or nnz (not both). 1142 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1143 allocation. For additional details, see the users manual chapter on 1144 matrices and the file $(PETSC_DIR)/Performance. 1145 1146 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1147 @*/ 1148 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1149 { 1150 Mat B; 1151 Mat_SeqBAIJ *b; 1152 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1153 1154 if (mbs*bs!=m || nbs*bs!=n) 1155 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1156 1157 *A = 0; 1158 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1159 PLogObjectCreate(B); 1160 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1161 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1162 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1163 switch (bs) { 1164 case 1: 1165 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1166 break; 1167 case 2: 1168 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1169 break; 1170 case 3: 1171 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1172 break; 1173 case 4: 1174 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1175 break; 1176 case 5: 1177 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1178 break; 1179 } 1180 B->destroy = MatDestroy_SeqBAIJ; 1181 B->view = MatView_SeqBAIJ; 1182 B->factor = 0; 1183 B->lupivotthreshold = 1.0; 1184 b->row = 0; 1185 b->col = 0; 1186 b->reallocs = 0; 1187 1188 b->m = m; B->m = m; B->M = m; 1189 b->n = n; B->n = n; B->N = n; 1190 b->mbs = mbs; 1191 b->nbs = nbs; 1192 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1193 if (nnz == PETSC_NULL) { 1194 if (nz == PETSC_DEFAULT) nz = 5; 1195 else if (nz <= 0) nz = 1; 1196 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1197 nz = nz*mbs; 1198 } 1199 else { 1200 nz = 0; 1201 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1202 } 1203 1204 /* allocate the matrix space */ 1205 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1206 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1207 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1208 b->j = (int *) (b->a + nz*bs2); 1209 PetscMemzero(b->j,nz*sizeof(int)); 1210 b->i = b->j + nz; 1211 b->singlemalloc = 1; 1212 1213 b->i[0] = 0; 1214 for (i=1; i<mbs+1; i++) { 1215 b->i[i] = b->i[i-1] + b->imax[i-1]; 1216 } 1217 1218 /* b->ilen will count nonzeros in each block row so far. */ 1219 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1220 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1221 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1222 1223 b->bs = bs; 1224 b->mbs = mbs; 1225 b->nz = 0; 1226 b->maxnz = nz; 1227 b->sorted = 0; 1228 b->roworiented = 1; 1229 b->nonew = 0; 1230 b->diag = 0; 1231 b->solve_work = 0; 1232 b->mult_work = 0; 1233 b->spptr = 0; 1234 b->indexshift = 0; 1235 *A = B; 1236 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1237 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1238 return 0; 1239 } 1240 1241 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1242 { 1243 Mat C; 1244 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1245 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs; 1246 1247 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1248 1249 *B = 0; 1250 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1251 PLogObjectCreate(C); 1252 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1253 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1254 C->destroy = MatDestroy_SeqBAIJ; 1255 C->view = MatView_SeqBAIJ; 1256 C->factor = A->factor; 1257 c->row = 0; 1258 c->col = 0; 1259 C->assembled = PETSC_TRUE; 1260 1261 c->m = C->m = a->m; 1262 c->n = C->n = a->n; 1263 C->M = a->m; 1264 C->N = a->n; 1265 1266 c->bs = a->bs; 1267 c->mbs = a->mbs; 1268 c->nbs = a->nbs; 1269 c->indexshift = 0; 1270 1271 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1272 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1273 for ( i=0; i<mbs; i++ ) { 1274 c->imax[i] = a->imax[i]; 1275 c->ilen[i] = a->ilen[i]; 1276 } 1277 1278 /* allocate the matrix space */ 1279 c->singlemalloc = 1; 1280 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1281 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1282 c->j = (int *) (c->a + nz*bs2); 1283 c->i = c->j + nz; 1284 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1285 if (mbs > 0) { 1286 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1287 if (cpvalues == COPY_VALUES) { 1288 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1289 } 1290 } 1291 1292 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1293 c->sorted = a->sorted; 1294 c->roworiented = a->roworiented; 1295 c->nonew = a->nonew; 1296 1297 if (a->diag) { 1298 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1299 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1300 for ( i=0; i<mbs; i++ ) { 1301 c->diag[i] = a->diag[i]; 1302 } 1303 } 1304 else c->diag = 0; 1305 c->nz = a->nz; 1306 c->maxnz = a->maxnz; 1307 c->solve_work = 0; 1308 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1309 c->mult_work = 0; 1310 *B = C; 1311 return 0; 1312 } 1313 1314 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1315 { 1316 Mat_SeqBAIJ *a; 1317 Mat B; 1318 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1319 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1320 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1321 int *masked, nmask,tmp,ishift,bs2; 1322 Scalar *aa; 1323 MPI_Comm comm = ((PetscObject) viewer)->comm; 1324 1325 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1326 bs2 = bs*bs; 1327 1328 MPI_Comm_size(comm,&size); 1329 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1330 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1331 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1332 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1333 M = header[1]; N = header[2]; nz = header[3]; 1334 1335 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1336 1337 /* 1338 This code adds extra rows to make sure the number of rows is 1339 divisible by the blocksize 1340 */ 1341 mbs = M/bs; 1342 extra_rows = bs - M + bs*(mbs); 1343 if (extra_rows == bs) extra_rows = 0; 1344 else mbs++; 1345 if (extra_rows) { 1346 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 1347 } 1348 1349 /* read in row lengths */ 1350 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1351 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1352 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1353 1354 /* read in column indices */ 1355 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1356 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1357 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1358 1359 /* loop over row lengths determining block row lengths */ 1360 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1361 PetscMemzero(browlengths,mbs*sizeof(int)); 1362 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1363 PetscMemzero(mask,mbs*sizeof(int)); 1364 masked = mask + mbs; 1365 rowcount = 0; nzcount = 0; 1366 for ( i=0; i<mbs; i++ ) { 1367 nmask = 0; 1368 for ( j=0; j<bs; j++ ) { 1369 kmax = rowlengths[rowcount]; 1370 for ( k=0; k<kmax; k++ ) { 1371 tmp = jj[nzcount++]/bs; 1372 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1373 } 1374 rowcount++; 1375 } 1376 browlengths[i] += nmask; 1377 /* zero out the mask elements we set */ 1378 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1379 } 1380 1381 /* create our matrix */ 1382 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1383 CHKERRQ(ierr); 1384 B = *A; 1385 a = (Mat_SeqBAIJ *) B->data; 1386 1387 /* set matrix "i" values */ 1388 a->i[0] = 0; 1389 for ( i=1; i<= mbs; i++ ) { 1390 a->i[i] = a->i[i-1] + browlengths[i-1]; 1391 a->ilen[i-1] = browlengths[i-1]; 1392 } 1393 a->indexshift = 0; 1394 a->nz = 0; 1395 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1396 1397 /* read in nonzero values */ 1398 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1399 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1400 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1401 1402 /* set "a" and "j" values into matrix */ 1403 nzcount = 0; jcount = 0; 1404 for ( i=0; i<mbs; i++ ) { 1405 nzcountb = nzcount; 1406 nmask = 0; 1407 for ( j=0; j<bs; j++ ) { 1408 kmax = rowlengths[i*bs+j]; 1409 for ( k=0; k<kmax; k++ ) { 1410 tmp = jj[nzcount++]/bs; 1411 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1412 } 1413 rowcount++; 1414 } 1415 /* sort the masked values */ 1416 PetscSortInt(nmask,masked); 1417 1418 /* set "j" values into matrix */ 1419 maskcount = 1; 1420 for ( j=0; j<nmask; j++ ) { 1421 a->j[jcount++] = masked[j]; 1422 mask[masked[j]] = maskcount++; 1423 } 1424 /* set "a" values into matrix */ 1425 ishift = bs2*a->i[i]; 1426 for ( j=0; j<bs; j++ ) { 1427 kmax = rowlengths[i*bs+j]; 1428 for ( k=0; k<kmax; k++ ) { 1429 tmp = jj[nzcountb]/bs ; 1430 block = mask[tmp] - 1; 1431 point = jj[nzcountb] - bs*tmp; 1432 idx = ishift + bs2*block + j + bs*point; 1433 a->a[idx] = aa[nzcountb++]; 1434 } 1435 } 1436 /* zero out the mask elements we set */ 1437 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1438 } 1439 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1440 1441 PetscFree(rowlengths); 1442 PetscFree(browlengths); 1443 PetscFree(aa); 1444 PetscFree(jj); 1445 PetscFree(mask); 1446 1447 B->assembled = PETSC_TRUE; 1448 1449 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1450 if (flg) { 1451 Viewer tviewer; 1452 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1453 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1454 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1455 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1456 } 1457 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1458 if (flg) { 1459 Viewer tviewer; 1460 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1461 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1462 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1463 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1464 } 1465 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1466 if (flg) { 1467 Viewer tviewer; 1468 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1469 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1470 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1471 } 1472 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1473 if (flg) { 1474 Viewer tviewer; 1475 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1476 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1477 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1478 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1479 } 1480 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1481 if (flg) { 1482 Viewer tviewer; 1483 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1484 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1485 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1486 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1487 } 1488 return 0; 1489 } 1490 1491 1492 1493