1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.27 1996/04/05 16:20:05 balay Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = a->indexshift; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45 CHKERRQ(ierr); 46 ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 47 PetscFree(ia); PetscFree(ja); 48 } else { 49 if (ishift == oshift) { 50 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51 } 52 else if (ishift == -1) { 53 /* temporarily subtract 1 from i and j indices */ 54 int nz = a->i[a->n] - 1; 55 for ( i=0; i<nz; i++ ) a->j[i]--; 56 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58 for ( i=0; i<nz; i++ ) a->j[i]++; 59 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60 } else { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[a->n] - 1; 63 for ( i=0; i<nz; i++ ) a->j[i]++; 64 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66 for ( i=0; i<nz; i++ ) a->j[i]--; 67 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68 } 69 } 70 return 0; 71 } 72 73 /* 74 Adds diagonal pointers to sparse matrix structure. 75 */ 76 77 int MatMarkDiag_SeqBAIJ(Mat A) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,j, *diag, m = a->mbs; 81 82 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83 PLogObjectMemory(A,(m+1)*sizeof(int)); 84 for ( i=0; i<m; i++ ) { 85 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86 if (a->j[j] == i) { 87 diag[i] = j; 88 break; 89 } 90 } 91 } 92 a->diag = diag; 93 return 0; 94 } 95 96 #include "draw.h" 97 #include "pinclude/pviewer.h" 98 #include "sys.h" 99 100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 101 { 102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 103 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs; 104 Scalar *aa; 105 106 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 107 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 108 col_lens[0] = MAT_COOKIE; 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs2; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs2*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 return 0; 153 } 154 155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 156 { 157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 158 int ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs; 159 FILE *fd; 160 char *outputname; 161 162 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 163 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 164 ierr = ViewerGetFormat(viewer,&format); 165 if (format == ASCII_FORMAT_INFO) { 166 /* no need to print additional information */ ; 167 } 168 else if (format == ASCII_FORMAT_MATLAB) { 169 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 170 } 171 else { 172 for ( i=0; i<a->mbs; i++ ) { 173 for ( j=0; j<bs; j++ ) { 174 fprintf(fd,"row %d:",i*bs+j); 175 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 176 for ( l=0; l<bs; l++ ) { 177 #if defined(PETSC_COMPLEX) 178 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 179 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 180 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 181 } 182 else { 183 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 184 } 185 #else 186 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 187 #endif 188 } 189 } 190 fprintf(fd,"\n"); 191 } 192 } 193 } 194 fflush(fd); 195 return 0; 196 } 197 198 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 199 { 200 Mat A = (Mat) obj; 201 ViewerType vtype; 202 int ierr; 203 204 if (!viewer) { 205 viewer = STDOUT_VIEWER_SELF; 206 } 207 208 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 209 if (vtype == MATLAB_VIEWER) { 210 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 211 } 212 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 213 return MatView_SeqBAIJ_ASCII(A,viewer); 214 } 215 else if (vtype == BINARY_FILE_VIEWER) { 216 return MatView_SeqBAIJ_Binary(A,viewer); 217 } 218 else if (vtype == DRAW_VIEWER) { 219 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 220 } 221 return 0; 222 } 223 224 #define CHUNKSIZE 1 225 226 /* This version has row oriented v */ 227 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 228 { 229 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 230 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 231 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 232 int *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol; 233 int ridx,cidx,bs2=bs*bs; 234 Scalar *ap,value,*aa=a->a,*bap; 235 236 for ( k=0; k<m; k++ ) { /* loop over added rows */ 237 row = im[k]; brow = row/bs; 238 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 239 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 240 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 241 rmax = imax[brow]; nrow = ailen[brow]; 242 low = 0; 243 for ( l=0; l<n; l++ ) { /* loop over added columns */ 244 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 245 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 246 col = in[l] - shift; bcol = col/bs; 247 ridx = row % bs; cidx = col % bs; 248 if (roworiented) { 249 value = *v++; 250 } 251 else { 252 value = v[k + l*m]; 253 } 254 if (!sorted) low = 0; high = nrow; 255 while (high-low > 5) { 256 t = (low+high)/2; 257 if (rp[t] > bcol) high = t; 258 else low = t; 259 } 260 for ( i=low; i<high; i++ ) { 261 if (rp[i] > bcol) break; 262 if (rp[i] == bcol) { 263 bap = ap + bs2*i + bs*cidx + ridx; 264 if (is == ADD_VALUES) *bap += value; 265 else *bap = value; 266 goto noinsert; 267 } 268 } 269 if (nonew) goto noinsert; 270 if (nrow >= rmax) { 271 /* there is no extra room in row, therefore enlarge */ 272 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 273 Scalar *new_a; 274 275 /* malloc new storage space */ 276 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 277 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 278 new_j = (int *) (new_a + bs2*new_nz); 279 new_i = new_j + new_nz; 280 281 /* copy over old data into new slots */ 282 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 283 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 284 PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int)); 285 len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift); 286 PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow, 287 len*sizeof(int)); 288 PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar)); 289 PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 290 PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE), 291 aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar)); 292 /* free up old matrix storage */ 293 PetscFree(a->a); 294 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 295 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 296 a->singlemalloc = 1; 297 298 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 299 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 300 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 301 a->maxnz += bs2*CHUNKSIZE; 302 a->reallocs++; 303 a->nz+=bs2; 304 } 305 N = nrow++ - 1; 306 /* shift up all the later entries in this row */ 307 for ( ii=N; ii>=i; ii-- ) { 308 rp[ii+1] = rp[ii]; 309 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 310 } 311 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 312 rp[i] = bcol; 313 ap[bs2*i + bs*cidx + ridx] = value; 314 noinsert:; 315 low = i; 316 } 317 ailen[brow] = nrow; 318 } 319 return 0; 320 } 321 322 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 323 { 324 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 325 *m = a->m; *n = a->n; 326 return 0; 327 } 328 329 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 332 *m = 0; *n = a->m; 333 return 0; 334 } 335 336 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 337 { 338 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 339 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 340 Scalar *aa,*v_i,*aa_i; 341 342 bs = a->bs; 343 ai = a->i; 344 aj = a->j; 345 aa = a->a; 346 bs2 = bs*bs; 347 348 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 349 350 bn = row/bs; /* Block number */ 351 bp = row % bs; /* Block Position */ 352 M = ai[bn+1] - ai[bn]; 353 *nz = bs*M; 354 355 if (v) { 356 *v = 0; 357 if (*nz) { 358 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 359 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 360 v_i = *v + i*bs; 361 aa_i = aa + bs2*(ai[bn] + i); 362 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 363 } 364 } 365 } 366 367 if (idx) { 368 *idx = 0; 369 if (*nz) { 370 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 371 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 372 idx_i = *idx + i*bs; 373 itmp = bs*aj[ai[bn] + i]; 374 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 375 } 376 } 377 } 378 return 0; 379 } 380 381 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 382 { 383 if (idx) {if (*idx) PetscFree(*idx);} 384 if (v) {if (*v) PetscFree(*v);} 385 return 0; 386 } 387 388 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 389 { 390 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 391 Mat C; 392 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 393 int shift=a->indexshift,*rows,*cols,bs2=bs*bs; 394 Scalar *array=a->a; 395 396 if (B==PETSC_NULL && mbs!=nbs) 397 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 398 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 399 PetscMemzero(col,(1+nbs)*sizeof(int)); 400 if (shift) { 401 for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1; 402 } 403 for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1; 404 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 405 PetscFree(col); 406 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 407 cols = rows + bs; 408 for ( i=0; i<mbs; i++ ) { 409 cols[0] = i*bs; 410 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 411 len = ai[i+1] - ai[i]; 412 for ( j=0; j<len; j++ ) { 413 rows[0] = (*aj++)*bs; 414 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 415 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 416 array += bs2; 417 } 418 } 419 if (shift) { 420 for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1; 421 } 422 423 ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 424 ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 425 426 if (B != PETSC_NULL) { 427 *B = C; 428 } else { 429 /* This isn't really an in-place transpose */ 430 PetscFree(a->a); 431 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 432 if (a->diag) PetscFree(a->diag); 433 if (a->ilen) PetscFree(a->ilen); 434 if (a->imax) PetscFree(a->imax); 435 if (a->solve_work) PetscFree(a->solve_work); 436 PetscFree(a); 437 PetscMemcpy(A,C,sizeof(struct _Mat)); 438 PetscHeaderDestroy(C); 439 } 440 return 0; 441 } 442 443 444 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 445 { 446 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 447 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 448 int m = a->m, *ip, N, *ailen = a->ilen,shift = a->indexshift; 449 int bs = a->bs, mbs = a->mbs, bs2 = bs*bs; 450 Scalar *aa = a->a, *ap; 451 452 if (mode == FLUSH_ASSEMBLY) return 0; 453 454 for ( i=1; i<mbs; i++ ) { 455 /* move each row back by the amount of empty slots (fshift) before it*/ 456 fshift += imax[i-1] - ailen[i-1]; 457 if (fshift) { 458 ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift; 459 N = ailen[i]; 460 for ( j=0; j<N; j++ ) { 461 ip[j-fshift] = ip[j]; 462 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 463 } 464 } 465 ai[i] = ai[i-1] + ailen[i-1]; 466 } 467 if (mbs) { 468 fshift += imax[mbs-1] - ailen[mbs-1]; 469 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 470 } 471 /* reset ilen and imax for each row */ 472 for ( i=0; i<mbs; i++ ) { 473 ailen[i] = imax[i] = ai[i+1] - ai[i]; 474 } 475 a->nz = (ai[m] + shift)*bs2; 476 477 /* diagonals may have moved, so kill the diagonal pointers */ 478 if (fshift && a->diag) { 479 PetscFree(a->diag); 480 PLogObjectMemory(A,-(m+1)*sizeof(int)); 481 a->diag = 0; 482 } 483 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n", 484 fshift,a->nz,m); 485 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n", 486 a->reallocs); 487 return 0; 488 } 489 490 static int MatZeroEntries_SeqBAIJ(Mat A) 491 { 492 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 493 PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 494 return 0; 495 } 496 497 int MatDestroy_SeqBAIJ(PetscObject obj) 498 { 499 Mat A = (Mat) obj; 500 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 501 502 #if defined(PETSC_LOG) 503 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 504 #endif 505 PetscFree(a->a); 506 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 507 if (a->diag) PetscFree(a->diag); 508 if (a->ilen) PetscFree(a->ilen); 509 if (a->imax) PetscFree(a->imax); 510 if (a->solve_work) PetscFree(a->solve_work); 511 if (a->mult_work) PetscFree(a->mult_work); 512 PetscFree(a); 513 PLogObjectDestroy(A); 514 PetscHeaderDestroy(A); 515 return 0; 516 } 517 518 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 519 { 520 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 521 if (op == ROW_ORIENTED) a->roworiented = 1; 522 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 523 else if (op == COLUMNS_SORTED) a->sorted = 1; 524 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 525 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 526 else if (op == ROWS_SORTED || 527 op == SYMMETRIC_MATRIX || 528 op == STRUCTURALLY_SYMMETRIC_MATRIX || 529 op == YES_NEW_DIAGONALS) 530 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 531 else if (op == NO_NEW_DIAGONALS) 532 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 533 else 534 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 535 return 0; 536 } 537 538 539 /* -------------------------------------------------------*/ 540 /* Should check that shapes of vectors and matrices match */ 541 /* -------------------------------------------------------*/ 542 #include "pinclude/plapack.h" 543 544 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 545 { 546 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 547 Scalar *xg,*yg; 548 register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 549 register Scalar x1,x2,x3,x4,x5; 550 int mbs = a->mbs, m = a->m, i, *idx,*ii; 551 int bs = a->bs,j,n,bs2 = bs*bs; 552 553 VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 554 PetscMemzero(y,m*sizeof(Scalar)); 555 x = x; 556 idx = a->j; 557 v = a->a; 558 ii = a->i; 559 560 switch (bs) { 561 case 1: 562 for ( i=0; i<m; i++ ) { 563 n = ii[1] - ii[0]; ii++; 564 sum = 0.0; 565 while (n--) sum += *v++ * x[*idx++]; 566 y[i] = sum; 567 } 568 break; 569 case 2: 570 for ( i=0; i<mbs; i++ ) { 571 n = ii[1] - ii[0]; ii++; 572 sum1 = 0.0; sum2 = 0.0; 573 for ( j=0; j<n; j++ ) { 574 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 575 sum1 += v[0]*x1 + v[2]*x2; 576 sum2 += v[1]*x1 + v[3]*x2; 577 v += 4; 578 } 579 y[0] += sum1; y[1] += sum2; 580 y += 2; 581 } 582 break; 583 case 3: 584 for ( i=0; i<mbs; i++ ) { 585 n = ii[1] - ii[0]; ii++; 586 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 587 for ( j=0; j<n; j++ ) { 588 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 589 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 590 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 591 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 592 v += 9; 593 } 594 y[0] += sum1; y[1] += sum2; y[2] += sum3; 595 y += 3; 596 } 597 break; 598 case 4: 599 for ( i=0; i<mbs; i++ ) { 600 n = ii[1] - ii[0]; ii++; 601 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 602 for ( j=0; j<n; j++ ) { 603 xb = x + 4*(*idx++); 604 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 605 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 606 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 607 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 608 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 609 v += 16; 610 } 611 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 612 y += 4; 613 } 614 break; 615 case 5: 616 for ( i=0; i<mbs; i++ ) { 617 n = ii[1] - ii[0]; ii++; 618 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 619 for ( j=0; j<n; j++ ) { 620 xb = x + 5*(*idx++); 621 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 622 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 623 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 624 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 625 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 626 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 627 v += 25; 628 } 629 y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 630 y += 5; 631 } 632 break; 633 /* block sizes larger then 5 by 5 are handled by BLAS */ 634 default: { 635 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 636 if (!a->mult_work) { 637 a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 638 CHKPTRQ(a->mult_work); 639 } 640 work = a->mult_work; 641 for ( i=0; i<mbs; i++ ) { 642 n = ii[1] - ii[0]; ii++; 643 ncols = n*bs; 644 workt = work; 645 for ( j=0; j<n; j++ ) { 646 xb = x + bs*(*idx++); 647 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 648 workt += bs; 649 } 650 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 651 v += n*bs2; 652 y += bs; 653 } 654 } 655 } 656 PLogFlops(2*bs2*a->nz - m); 657 return 0; 658 } 659 660 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 661 { 662 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 663 if (nz) *nz = a->bs*a->bs*a->nz; 664 if (nza) *nza = a->maxnz; 665 if (mem) *mem = (int)A->mem; 666 return 0; 667 } 668 669 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 670 { 671 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 672 673 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 674 675 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 676 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 677 (a->nz != b->nz)||(a->indexshift != b->indexshift)) { 678 *flg = PETSC_FALSE; return 0; 679 } 680 681 /* if the a->i are the same */ 682 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 683 *flg = PETSC_FALSE; return 0; 684 } 685 686 /* if a->j are the same */ 687 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 688 *flg = PETSC_FALSE; return 0; 689 } 690 691 /* if a->a are the same */ 692 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 693 *flg = PETSC_FALSE; return 0; 694 } 695 *flg = PETSC_TRUE; 696 return 0; 697 698 } 699 700 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 701 { 702 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 703 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 704 Scalar *x, zero = 0.0,*aa,*aa_j; 705 706 bs = a->bs; 707 aa = a->a; 708 ai = a->i; 709 aj = a->j; 710 ambs = a->mbs; 711 bs2 = bs*bs; 712 713 VecSet(&zero,v); 714 VecGetArray(v,&x); VecGetLocalSize(v,&n); 715 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 716 for ( i=0; i<ambs; i++ ) { 717 for ( j=ai[i]; j<ai[i+1]; j++ ) { 718 if (aj[j] == i) { 719 row = i*bs; 720 aa_j = aa+j*bs2; 721 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 722 break; 723 } 724 } 725 } 726 return 0; 727 } 728 729 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 730 { 731 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 732 Scalar *l,*r,x,*v,*aa,*li,*ri; 733 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 734 735 ai = a->i; 736 aj = a->j; 737 aa = a->a; 738 m = a->m; 739 n = a->n; 740 bs = a->bs; 741 mbs = a->mbs; 742 bs2 = bs*bs; 743 if (ll) { 744 VecGetArray(ll,&l); VecGetSize(ll,&lm); 745 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 746 for ( i=0; i<mbs; i++ ) { /* for each block row */ 747 M = ai[i+1] - ai[i]; 748 li = l + i*bs; 749 v = aa + bs2*ai[i]; 750 for ( j=0; j<M; j++ ) { /* for each block */ 751 for ( k=0; k<bs2; k++ ) { 752 (*v++) *= li[k%bs]; 753 } 754 } 755 } 756 } 757 758 if (rr) { 759 VecGetArray(rr,&r); VecGetSize(rr,&rn); 760 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 761 for ( i=0; i<mbs; i++ ) { /* for each block row */ 762 M = ai[i+1] - ai[i]; 763 v = aa + bs2*ai[i]; 764 for ( j=0; j<M; j++ ) { /* for each block */ 765 ri = r + bs*aj[ai[i]+j]; 766 for ( k=0; k<bs; k++ ) { 767 x = ri[k]; 768 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 769 } 770 } 771 } 772 } 773 return 0; 774 } 775 776 777 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 778 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 779 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 780 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 781 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 782 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 783 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 784 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 785 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 786 787 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 788 { 789 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 790 Scalar *v = a->a; 791 double sum = 0.0; 792 int i; 793 794 if (type == NORM_FROBENIUS) { 795 for (i=0; i<a->nz; i++ ) { 796 #if defined(PETSC_COMPLEX) 797 sum += real(conj(*v)*(*v)); v++; 798 #else 799 sum += (*v)*(*v); v++; 800 #endif 801 } 802 *norm = sqrt(sum); 803 } 804 else { 805 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 806 } 807 return 0; 808 } 809 810 /* 811 note: This can only work for identity for row and col. It would 812 be good to check this and otherwise generate an error. 813 */ 814 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 815 { 816 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 817 Mat outA; 818 int ierr; 819 820 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 821 822 outA = inA; 823 inA->factor = FACTOR_LU; 824 a->row = row; 825 a->col = col; 826 827 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 828 829 if (!a->diag) { 830 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 831 } 832 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 833 return 0; 834 } 835 836 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 837 { 838 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 839 int one = 1, totalnz = a->bs*a->bs*a->nz; 840 BLscal_( &totalnz, alpha, a->a, &one ); 841 PLogFlops(totalnz); 842 return 0; 843 } 844 845 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 846 { 847 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 848 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 849 int *ai = a->i, *ailen = a->ilen, shift = a->indexshift; 850 int brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs; 851 Scalar *ap, *aa = a->a, zero = 0.0; 852 853 for ( k=0; k<m; k++ ) { /* loop over rows */ 854 row = im[k]; brow = row/bs; 855 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 856 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 857 rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 858 nrow = ailen[brow]; 859 for ( l=0; l<n; l++ ) { /* loop over columns */ 860 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 861 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 862 col = in[l] - shift; 863 bcol = col/bs; 864 cidx = col%bs; 865 ridx = row%bs; 866 high = nrow; 867 low = 0; /* assume unsorted */ 868 while (high-low > 5) { 869 t = (low+high)/2; 870 if (rp[t] > bcol) high = t; 871 else low = t; 872 } 873 for ( i=low; i<high; i++ ) { 874 if (rp[i] > bcol) break; 875 if (rp[i] == bcol) { 876 *v++ = ap[bs2*i+bs*cidx+ridx]; 877 goto finished; 878 } 879 } 880 *v++ = zero; 881 finished:; 882 } 883 } 884 return 0; 885 } 886 887 int MatPrintHelp_SeqBAIJ(Mat A) 888 { 889 static int called = 0; 890 891 if (called) return 0; else called = 1; 892 return 0; 893 } 894 /* -------------------------------------------------------------------*/ 895 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 896 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 897 MatMult_SeqBAIJ,0, 898 0,0, 899 MatSolve_SeqBAIJ,0, 900 0,0, 901 MatLUFactor_SeqBAIJ,0, 902 0, 903 MatTranspose_SeqBAIJ, 904 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 905 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 906 0,MatAssemblyEnd_SeqBAIJ, 907 0, 908 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 909 MatGetReordering_SeqBAIJ, 910 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 911 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 912 MatILUFactorSymbolic_SeqBAIJ,0, 913 0,0,/* MatConvert_SeqBAIJ */ 0, 914 0,0, 915 MatConvertSameType_SeqBAIJ,0,0, 916 MatILUFactor_SeqBAIJ,0,0, 917 0,0, 918 MatGetValues_SeqBAIJ,0, 919 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 920 0}; 921 922 /*@C 923 MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format 924 (the default parallel PETSc format). For good matrix assembly performance 925 the user should preallocate the matrix storage by setting the parameter nz 926 (or nzz). By setting these parameters accurately, performance can be 927 increased by more than a factor of 50. 928 929 Input Parameters: 930 . comm - MPI communicator, set to MPI_COMM_SELF 931 . bs - size of block 932 . m - number of rows 933 . n - number of columns 934 . nz - number of block nonzeros per block row (same for all rows) 935 . nzz - number of block nonzeros per block row or PETSC_NULL 936 (possibly different for each row) 937 938 Output Parameter: 939 . A - the matrix 940 941 Notes: 942 The BAIJ format, is fully compatible with standard Fortran 77 943 storage. That is, the stored row and column indices can begin at 944 either one (as in Fortran) or zero. See the users manual for details. 945 946 Specify the preallocated storage with either nz or nnz (not both). 947 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 948 allocation. For additional details, see the users manual chapter on 949 matrices and the file $(PETSC_DIR)/Performance. 950 951 .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues() 952 @*/ 953 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 954 { 955 Mat B; 956 Mat_SeqBAIJ *b; 957 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 958 959 if (mbs*bs!=m || nbs*bs!=n) 960 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 961 962 *A = 0; 963 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 964 PLogObjectCreate(B); 965 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 966 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 967 switch (bs) { 968 case 1: 969 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 970 break; 971 case 2: 972 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 973 break; 974 case 3: 975 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 976 break; 977 case 4: 978 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 979 break; 980 case 5: 981 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 982 break; 983 } 984 B->destroy = MatDestroy_SeqBAIJ; 985 B->view = MatView_SeqBAIJ; 986 B->factor = 0; 987 B->lupivotthreshold = 1.0; 988 b->row = 0; 989 b->col = 0; 990 b->reallocs = 0; 991 992 b->m = m; 993 b->mbs = mbs; 994 b->n = n; 995 b->nbs = nbs; 996 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 997 if (nnz == PETSC_NULL) { 998 if (nz == PETSC_DEFAULT) nz = CHUNKSIZE; 999 else if (nz <= 0) nz = 1; 1000 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1001 nz = nz*mbs; 1002 } 1003 else { 1004 nz = 0; 1005 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1006 } 1007 1008 /* allocate the matrix space */ 1009 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1010 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1011 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1012 b->j = (int *) (b->a + nz*bs2); 1013 PetscMemzero(b->j,nz*sizeof(int)); 1014 b->i = b->j + nz; 1015 b->singlemalloc = 1; 1016 1017 b->i[0] = 0; 1018 for (i=1; i<mbs+1; i++) { 1019 b->i[i] = b->i[i-1] + b->imax[i-1]; 1020 } 1021 1022 /* b->ilen will count nonzeros in each block row so far. */ 1023 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1024 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1025 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1026 1027 b->bs = bs; 1028 b->mbs = mbs; 1029 b->nz = 0; 1030 b->maxnz = nz; 1031 b->sorted = 0; 1032 b->roworiented = 1; 1033 b->nonew = 0; 1034 b->diag = 0; 1035 b->solve_work = 0; 1036 b->mult_work = 0; 1037 b->spptr = 0; 1038 1039 *A = B; 1040 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1041 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1042 return 0; 1043 } 1044 1045 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1046 { 1047 Mat C; 1048 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1049 int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs; 1050 1051 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1052 1053 *B = 0; 1054 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1055 PLogObjectCreate(C); 1056 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1057 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1058 C->destroy = MatDestroy_SeqBAIJ; 1059 C->view = MatView_SeqBAIJ; 1060 C->factor = A->factor; 1061 c->row = 0; 1062 c->col = 0; 1063 C->assembled = PETSC_TRUE; 1064 1065 c->m = a->m; 1066 c->n = a->n; 1067 c->bs = a->bs; 1068 c->mbs = a->mbs; 1069 c->nbs = a->nbs; 1070 1071 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1072 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1073 for ( i=0; i<mbs; i++ ) { 1074 c->imax[i] = a->imax[i]; 1075 c->ilen[i] = a->ilen[i]; 1076 } 1077 1078 /* allocate the matrix space */ 1079 c->singlemalloc = 1; 1080 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1081 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1082 c->j = (int *) (c->a + nz*bs2); 1083 c->i = c->j + nz; 1084 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1085 if (mbs > 0) { 1086 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1087 if (cpvalues == COPY_VALUES) { 1088 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1089 } 1090 } 1091 1092 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1093 c->sorted = a->sorted; 1094 c->roworiented = a->roworiented; 1095 c->nonew = a->nonew; 1096 1097 if (a->diag) { 1098 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1099 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1100 for ( i=0; i<mbs; i++ ) { 1101 c->diag[i] = a->diag[i]; 1102 } 1103 } 1104 else c->diag = 0; 1105 c->nz = a->nz; 1106 c->maxnz = a->maxnz; 1107 c->solve_work = 0; 1108 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1109 c->mult_work = 0; 1110 *B = C; 1111 return 0; 1112 } 1113 1114 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1115 { 1116 Mat_SeqBAIJ *a; 1117 Mat B; 1118 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1119 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1120 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1121 int *masked, nmask,tmp,ishift,bs2; 1122 Scalar *aa; 1123 MPI_Comm comm = ((PetscObject) viewer)->comm; 1124 1125 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1126 bs2 = bs*bs; 1127 1128 MPI_Comm_size(comm,&size); 1129 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1130 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1131 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1132 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1133 M = header[1]; N = header[2]; nz = header[3]; 1134 1135 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1136 1137 /* 1138 This code adds extra rows to make sure the number of rows is 1139 divisible by the blocksize 1140 */ 1141 mbs = M/bs; 1142 extra_rows = bs - M + bs*(mbs); 1143 if (extra_rows == bs) extra_rows = 0; 1144 else mbs++; 1145 if (extra_rows) { 1146 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 1147 } 1148 1149 /* read in row lengths */ 1150 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1151 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1152 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1153 1154 /* read in column indices */ 1155 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1156 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1157 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1158 1159 /* loop over row lengths determining block row lengths */ 1160 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1161 PetscMemzero(browlengths,mbs*sizeof(int)); 1162 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1163 PetscMemzero(mask,mbs*sizeof(int)); 1164 masked = mask + mbs; 1165 rowcount = 0; nzcount = 0; 1166 for ( i=0; i<mbs; i++ ) { 1167 nmask = 0; 1168 for ( j=0; j<bs; j++ ) { 1169 kmax = rowlengths[rowcount]; 1170 for ( k=0; k<kmax; k++ ) { 1171 tmp = jj[nzcount++]/bs; 1172 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1173 } 1174 rowcount++; 1175 } 1176 browlengths[i] += nmask; 1177 /* zero out the mask elements we set */ 1178 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1179 } 1180 1181 /* create our matrix */ 1182 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1183 CHKERRQ(ierr); 1184 B = *A; 1185 a = (Mat_SeqBAIJ *) B->data; 1186 1187 /* set matrix "i" values */ 1188 a->i[0] = 0; 1189 for ( i=1; i<= mbs; i++ ) { 1190 a->i[i] = a->i[i-1] + browlengths[i-1]; 1191 a->ilen[i-1] = browlengths[i-1]; 1192 } 1193 a->indexshift = 0; 1194 a->nz = 0; 1195 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1196 1197 /* read in nonzero values */ 1198 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1199 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1200 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1201 1202 /* set "a" and "j" values into matrix */ 1203 nzcount = 0; jcount = 0; 1204 for ( i=0; i<mbs; i++ ) { 1205 nzcountb = nzcount; 1206 nmask = 0; 1207 for ( j=0; j<bs; j++ ) { 1208 kmax = rowlengths[i*bs+j]; 1209 for ( k=0; k<kmax; k++ ) { 1210 tmp = jj[nzcount++]/bs; 1211 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1212 } 1213 rowcount++; 1214 } 1215 /* sort the masked values */ 1216 PetscSortInt(nmask,masked); 1217 1218 /* set "j" values into matrix */ 1219 maskcount = 1; 1220 for ( j=0; j<nmask; j++ ) { 1221 a->j[jcount++] = masked[j]; 1222 mask[masked[j]] = maskcount++; 1223 } 1224 /* set "a" values into matrix */ 1225 ishift = bs2*a->i[i]; 1226 for ( j=0; j<bs; j++ ) { 1227 kmax = rowlengths[i*bs+j]; 1228 for ( k=0; k<kmax; k++ ) { 1229 tmp = jj[nzcountb]/bs ; 1230 block = mask[tmp] - 1; 1231 point = jj[nzcountb] - bs*tmp; 1232 idx = ishift + bs2*block + j + bs*point; 1233 a->a[idx] = aa[nzcountb++]; 1234 } 1235 } 1236 /* zero out the mask elements we set */ 1237 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1238 } 1239 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1240 1241 PetscFree(rowlengths); 1242 PetscFree(browlengths); 1243 PetscFree(aa); 1244 PetscFree(jj); 1245 PetscFree(mask); 1246 1247 B->assembled = PETSC_TRUE; 1248 1249 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1250 if (flg) { 1251 Viewer tviewer; 1252 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1253 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1254 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1255 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1256 } 1257 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1258 if (flg) { 1259 Viewer tviewer; 1260 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1261 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1262 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1263 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1264 } 1265 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1266 if (flg) { 1267 Viewer tviewer; 1268 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1269 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1270 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1271 } 1272 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1273 if (flg) { 1274 Viewer tviewer; 1275 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1276 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1277 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1278 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1279 } 1280 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1281 if (flg) { 1282 Viewer tviewer; 1283 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1284 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1285 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1286 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1287 } 1288 return 0; 1289 } 1290 1291 1292 1293