1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int s_nz = a->i[n]; 63 for (i=0; i<s_nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int s_nz = a->i[n]-1; 84 for (i=0; i<s_nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 141 { 142 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 143 144 PetscFunctionBegin; 145 switch (op) { 146 case MAT_ROW_ORIENTED: 147 a->roworiented = PETSC_TRUE; 148 break; 149 case MAT_COLUMN_ORIENTED: 150 a->roworiented = PETSC_FALSE; 151 break; 152 case MAT_COLUMNS_SORTED: 153 a->sorted = PETSC_TRUE; 154 break; 155 case MAT_COLUMNS_UNSORTED: 156 a->sorted = PETSC_FALSE; 157 break; 158 case MAT_KEEP_ZEROED_ROWS: 159 a->keepzeroedrows = PETSC_TRUE; 160 break; 161 case MAT_NO_NEW_NONZERO_LOCATIONS: 162 a->nonew = 1; 163 break; 164 case MAT_NEW_NONZERO_LOCATION_ERR: 165 a->nonew = -1; 166 break; 167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 168 a->nonew = -2; 169 break; 170 case MAT_YES_NEW_NONZERO_LOCATIONS: 171 a->nonew = 0; 172 break; 173 case MAT_ROWS_SORTED: 174 case MAT_ROWS_UNSORTED: 175 case MAT_YES_NEW_DIAGONALS: 176 case MAT_IGNORE_OFF_PROC_ENTRIES: 177 case MAT_USE_HASH_TABLE: 178 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 179 break; 180 case MAT_NO_NEW_DIAGONALS: 181 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 182 default: 183 SETERRQ(PETSC_ERR_SUP,"unknown option"); 184 } 185 PetscFunctionReturn(0); 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 190 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 191 { 192 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 193 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 194 MatScalar *aa,*aa_i; 195 PetscScalar *v_i; 196 197 PetscFunctionBegin; 198 bs = a->bs; 199 ai = a->i; 200 aj = a->j; 201 aa = a->a; 202 bs2 = a->bs2; 203 204 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 205 206 bn = row/bs; /* Block number */ 207 bp = row % bs; /* Block position */ 208 M = ai[bn+1] - ai[bn]; 209 *ncols = bs*M; 210 211 if (v) { 212 *v = 0; 213 if (*ncols) { 214 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 215 for (i=0; i<M; i++) { /* for each block in the block row */ 216 v_i = *v + i*bs; 217 aa_i = aa + bs2*(ai[bn] + i); 218 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 219 } 220 } 221 } 222 223 if (cols) { 224 *cols = 0; 225 if (*ncols) { 226 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 227 for (i=0; i<M; i++) { /* for each block in the block row */ 228 cols_i = *cols + i*bs; 229 itmp = bs*aj[ai[bn] + i]; 230 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 231 } 232 } 233 } 234 235 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 236 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 237 #ifdef column_search 238 v_i = *v + M*bs; 239 cols_i = *cols + M*bs; 240 for (i=0; i<bn; i++){ /* for each block row */ 241 M = ai[i+1] - ai[i]; 242 for (j=0; j<M; j++){ 243 itmp = aj[ai[i] + j]; /* block column value */ 244 if (itmp == bn){ 245 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 246 for (k=0; k<bs; k++) { 247 *cols_i++ = i*bs+k; 248 *v_i++ = aa_i[k]; 249 } 250 *ncols += bs; 251 break; 252 } 253 } 254 } 255 #endif 256 257 PetscFunctionReturn(0); 258 } 259 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 262 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 263 { 264 int ierr; 265 266 PetscFunctionBegin; 267 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 268 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 274 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 275 { 276 int ierr; 277 PetscFunctionBegin; 278 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 284 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 285 { 286 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 287 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 288 PetscScalar *aa; 289 FILE *file; 290 291 PetscFunctionBegin; 292 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 293 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 294 col_lens[0] = MAT_FILE_COOKIE; 295 296 col_lens[1] = A->m; 297 col_lens[2] = A->m; 298 col_lens[3] = a->s_nz*bs2; 299 300 /* store lengths of each row and write (including header) to file */ 301 count = 0; 302 for (i=0; i<a->mbs; i++) { 303 for (j=0; j<bs; j++) { 304 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 305 } 306 } 307 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 308 ierr = PetscFree(col_lens);CHKERRQ(ierr); 309 310 /* store column indices (zero start index) */ 311 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 312 count = 0; 313 for (i=0; i<a->mbs; i++) { 314 for (j=0; j<bs; j++) { 315 for (k=a->i[i]; k<a->i[i+1]; k++) { 316 for (l=0; l<bs; l++) { 317 jj[count++] = bs*a->j[k] + l; 318 } 319 } 320 } 321 } 322 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 323 ierr = PetscFree(jj);CHKERRQ(ierr); 324 325 /* store nonzero values */ 326 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 327 count = 0; 328 for (i=0; i<a->mbs; i++) { 329 for (j=0; j<bs; j++) { 330 for (k=a->i[i]; k<a->i[i+1]; k++) { 331 for (l=0; l<bs; l++) { 332 aa[count++] = a->a[bs2*k + l*bs + j]; 333 } 334 } 335 } 336 } 337 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 338 ierr = PetscFree(aa);CHKERRQ(ierr); 339 340 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 341 if (file) { 342 fprintf(file,"-matload_block_size %d\n",a->bs); 343 } 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 349 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 350 { 351 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 352 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 353 char *name; 354 PetscViewerFormat format; 355 356 PetscFunctionBegin; 357 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 358 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 359 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 360 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 361 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 362 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 363 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 364 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 365 for (i=0; i<a->mbs; i++) { 366 for (j=0; j<bs; j++) { 367 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 368 for (k=a->i[i]; k<a->i[i+1]; k++) { 369 for (l=0; l<bs; l++) { 370 #if defined(PETSC_USE_COMPLEX) 371 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 372 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 373 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 374 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 375 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 376 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 377 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 378 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 379 } 380 #else 381 if (a->a[bs2*k + l*bs + j] != 0.0) { 382 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 383 } 384 #endif 385 } 386 } 387 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 388 } 389 } 390 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 391 } else { 392 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 393 for (i=0; i<a->mbs; i++) { 394 for (j=0; j<bs; j++) { 395 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 396 for (k=a->i[i]; k<a->i[i+1]; k++) { 397 for (l=0; l<bs; l++) { 398 #if defined(PETSC_USE_COMPLEX) 399 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 400 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 401 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 402 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 403 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 404 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 405 } else { 406 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 407 } 408 #else 409 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 410 #endif 411 } 412 } 413 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 414 } 415 } 416 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 417 } 418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 424 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 425 { 426 Mat A = (Mat) Aa; 427 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 428 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 429 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 430 MatScalar *aa; 431 MPI_Comm comm; 432 PetscViewer viewer; 433 434 PetscFunctionBegin; 435 /* 436 This is nasty. If this is called from an originally parallel matrix 437 then all processes call this,but only the first has the matrix so the 438 rest should return immediately. 439 */ 440 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 441 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 442 if (rank) PetscFunctionReturn(0); 443 444 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 445 446 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 447 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 448 449 /* loop over matrix elements drawing boxes */ 450 color = PETSC_DRAW_BLUE; 451 for (i=0,row=0; i<mbs; i++,row+=bs) { 452 for (j=a->i[i]; j<a->i[i+1]; j++) { 453 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 454 x_l = a->j[j]*bs; x_r = x_l + 1.0; 455 aa = a->a + j*bs2; 456 for (k=0; k<bs; k++) { 457 for (l=0; l<bs; l++) { 458 if (PetscRealPart(*aa++) >= 0.) continue; 459 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 460 } 461 } 462 } 463 } 464 color = PETSC_DRAW_CYAN; 465 for (i=0,row=0; i<mbs; i++,row+=bs) { 466 for (j=a->i[i]; j<a->i[i+1]; j++) { 467 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 468 x_l = a->j[j]*bs; x_r = x_l + 1.0; 469 aa = a->a + j*bs2; 470 for (k=0; k<bs; k++) { 471 for (l=0; l<bs; l++) { 472 if (PetscRealPart(*aa++) != 0.) continue; 473 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 474 } 475 } 476 } 477 } 478 479 color = PETSC_DRAW_RED; 480 for (i=0,row=0; i<mbs; i++,row+=bs) { 481 for (j=a->i[i]; j<a->i[i+1]; j++) { 482 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 483 x_l = a->j[j]*bs; x_r = x_l + 1.0; 484 aa = a->a + j*bs2; 485 for (k=0; k<bs; k++) { 486 for (l=0; l<bs; l++) { 487 if (PetscRealPart(*aa++) <= 0.) continue; 488 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 489 } 490 } 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 498 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 499 { 500 int ierr; 501 PetscReal xl,yl,xr,yr,w,h; 502 PetscDraw draw; 503 PetscTruth isnull; 504 505 PetscFunctionBegin; 506 507 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 508 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 509 510 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 511 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 512 xr += w; yr += h; xl = -w; yl = -h; 513 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 514 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 515 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 #undef __FUNCT__ 520 #define __FUNCT__ "MatView_SeqSBAIJ" 521 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 522 { 523 int ierr; 524 PetscTruth issocket,isascii,isbinary,isdraw; 525 526 PetscFunctionBegin; 527 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 528 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 529 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 530 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 531 if (issocket) { 532 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 533 } else if (isascii){ 534 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 535 } else if (isbinary) { 536 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 537 } else if (isdraw) { 538 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 539 } else { 540 SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name); 541 } 542 PetscFunctionReturn(0); 543 } 544 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 548 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 549 { 550 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 551 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 552 int *ai = a->i,*ailen = a->ilen; 553 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 554 MatScalar *ap,*aa = a->a,zero = 0.0; 555 556 PetscFunctionBegin; 557 for (k=0; k<m; k++) { /* loop over rows */ 558 row = im[k]; brow = row/bs; 559 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row); 560 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 561 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 562 nrow = ailen[brow]; 563 for (l=0; l<n; l++) { /* loop over columns */ 564 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]); 565 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 566 col = in[l] ; 567 bcol = col/bs; 568 cidx = col%bs; 569 ridx = row%bs; 570 high = nrow; 571 low = 0; /* assume unsorted */ 572 while (high-low > 5) { 573 t = (low+high)/2; 574 if (rp[t] > bcol) high = t; 575 else low = t; 576 } 577 for (i=low; i<high; i++) { 578 if (rp[i] > bcol) break; 579 if (rp[i] == bcol) { 580 *v++ = ap[bs2*i+bs*cidx+ridx]; 581 goto finished; 582 } 583 } 584 *v++ = zero; 585 finished:; 586 } 587 } 588 PetscFunctionReturn(0); 589 } 590 591 592 #undef __FUNCT__ 593 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 594 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 595 { 596 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 597 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 598 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 599 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 600 PetscTruth roworiented=a->roworiented; 601 const MatScalar *value = v; 602 MatScalar *ap,*aa = a->a,*bap; 603 604 PetscFunctionBegin; 605 if (roworiented) { 606 stepval = (n-1)*bs; 607 } else { 608 stepval = (m-1)*bs; 609 } 610 for (k=0; k<m; k++) { /* loop over added rows */ 611 row = im[k]; 612 if (row < 0) continue; 613 #if defined(PETSC_USE_BOPT_g) 614 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 615 #endif 616 rp = aj + ai[row]; 617 ap = aa + bs2*ai[row]; 618 rmax = imax[row]; 619 nrow = ailen[row]; 620 low = 0; 621 for (l=0; l<n; l++) { /* loop over added columns */ 622 if (in[l] < 0) continue; 623 #if defined(PETSC_USE_BOPT_g) 624 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1); 625 #endif 626 col = in[l]; 627 if (col < row) continue; /* ignore lower triangular block */ 628 if (roworiented) { 629 value = v + k*(stepval+bs)*bs + l*bs; 630 } else { 631 value = v + l*(stepval+bs)*bs + k*bs; 632 } 633 if (!sorted) low = 0; high = nrow; 634 while (high-low > 7) { 635 t = (low+high)/2; 636 if (rp[t] > col) high = t; 637 else low = t; 638 } 639 for (i=low; i<high; i++) { 640 if (rp[i] > col) break; 641 if (rp[i] == col) { 642 bap = ap + bs2*i; 643 if (roworiented) { 644 if (is == ADD_VALUES) { 645 for (ii=0; ii<bs; ii++,value+=stepval) { 646 for (jj=ii; jj<bs2; jj+=bs) { 647 bap[jj] += *value++; 648 } 649 } 650 } else { 651 for (ii=0; ii<bs; ii++,value+=stepval) { 652 for (jj=ii; jj<bs2; jj+=bs) { 653 bap[jj] = *value++; 654 } 655 } 656 } 657 } else { 658 if (is == ADD_VALUES) { 659 for (ii=0; ii<bs; ii++,value+=stepval) { 660 for (jj=0; jj<bs; jj++) { 661 *bap++ += *value++; 662 } 663 } 664 } else { 665 for (ii=0; ii<bs; ii++,value+=stepval) { 666 for (jj=0; jj<bs; jj++) { 667 *bap++ = *value++; 668 } 669 } 670 } 671 } 672 goto noinsert2; 673 } 674 } 675 if (nonew == 1) goto noinsert2; 676 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 677 if (nrow >= rmax) { 678 /* there is no extra room in row, therefore enlarge */ 679 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 680 MatScalar *new_a; 681 682 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 683 684 /* malloc new storage space */ 685 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 686 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 687 new_j = (int*)(new_a + bs2*new_nz); 688 new_i = new_j + new_nz; 689 690 /* copy over old data into new slots */ 691 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 692 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 693 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 694 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 695 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 696 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 697 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 698 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 699 /* free up old matrix storage */ 700 ierr = PetscFree(a->a);CHKERRQ(ierr); 701 if (!a->singlemalloc) { 702 ierr = PetscFree(a->i);CHKERRQ(ierr); 703 ierr = PetscFree(a->j);CHKERRQ(ierr); 704 } 705 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 706 a->singlemalloc = PETSC_TRUE; 707 708 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 709 rmax = imax[row] = imax[row] + CHUNKSIZE; 710 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 711 a->s_maxnz += bs2*CHUNKSIZE; 712 a->reallocs++; 713 a->s_nz++; 714 } 715 N = nrow++ - 1; 716 /* shift up all the later entries in this row */ 717 for (ii=N; ii>=i; ii--) { 718 rp[ii+1] = rp[ii]; 719 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 720 } 721 if (N >= i) { 722 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 723 } 724 rp[i] = col; 725 bap = ap + bs2*i; 726 if (roworiented) { 727 for (ii=0; ii<bs; ii++,value+=stepval) { 728 for (jj=ii; jj<bs2; jj+=bs) { 729 bap[jj] = *value++; 730 } 731 } 732 } else { 733 for (ii=0; ii<bs; ii++,value+=stepval) { 734 for (jj=0; jj<bs; jj++) { 735 *bap++ = *value++; 736 } 737 } 738 } 739 noinsert2:; 740 low = i; 741 } 742 ailen[row] = nrow; 743 } 744 PetscFunctionReturn(0); 745 } 746 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 749 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 750 { 751 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 752 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 753 int m = A->m,*ip,N,*ailen = a->ilen; 754 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 755 MatScalar *aa = a->a,*ap; 756 757 PetscFunctionBegin; 758 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 759 760 if (m) rmax = ailen[0]; 761 for (i=1; i<mbs; i++) { 762 /* move each row back by the amount of empty slots (fshift) before it*/ 763 fshift += imax[i-1] - ailen[i-1]; 764 rmax = PetscMax(rmax,ailen[i]); 765 if (fshift) { 766 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 767 N = ailen[i]; 768 for (j=0; j<N; j++) { 769 ip[j-fshift] = ip[j]; 770 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 771 } 772 } 773 ai[i] = ai[i-1] + ailen[i-1]; 774 } 775 if (mbs) { 776 fshift += imax[mbs-1] - ailen[mbs-1]; 777 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 778 } 779 /* reset ilen and imax for each row */ 780 for (i=0; i<mbs; i++) { 781 ailen[i] = imax[i] = ai[i+1] - ai[i]; 782 } 783 a->s_nz = ai[mbs]; 784 785 /* diagonals may have moved, reset it */ 786 if (a->diag) { 787 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 788 } 789 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 790 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 791 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 792 a->reallocs); 793 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 794 a->reallocs = 0; 795 A->info.nz_unneeded = (PetscReal)fshift*bs2; 796 797 PetscFunctionReturn(0); 798 } 799 800 /* 801 This function returns an array of flags which indicate the locations of contiguous 802 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 803 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 804 Assume: sizes should be long enough to hold all the values. 805 */ 806 #undef __FUNCT__ 807 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 808 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 809 { 810 int i,j,k,row; 811 PetscTruth flg; 812 813 PetscFunctionBegin; 814 for (i=0,j=0; i<n; j++) { 815 row = idx[i]; 816 if (row%bs!=0) { /* Not the begining of a block */ 817 sizes[j] = 1; 818 i++; 819 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 820 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 821 i++; 822 } else { /* Begining of the block, so check if the complete block exists */ 823 flg = PETSC_TRUE; 824 for (k=1; k<bs; k++) { 825 if (row+k != idx[i+k]) { /* break in the block */ 826 flg = PETSC_FALSE; 827 break; 828 } 829 } 830 if (flg == PETSC_TRUE) { /* No break in the bs */ 831 sizes[j] = bs; 832 i+= bs; 833 } else { 834 sizes[j] = 1; 835 i++; 836 } 837 } 838 } 839 *bs_max = j; 840 PetscFunctionReturn(0); 841 } 842 843 #undef __FUNCT__ 844 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 845 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 846 { 847 PetscFunctionBegin; 848 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 849 } 850 851 /* Only add/insert a(i,j) with i<=j (blocks). 852 Any a(i,j) with i>j input by user is ingored. 853 */ 854 855 #undef __FUNCT__ 856 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 857 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 858 { 859 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 860 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 861 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 862 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 863 int ridx,cidx,bs2=a->bs2,ierr; 864 MatScalar *ap,value,*aa=a->a,*bap; 865 866 PetscFunctionBegin; 867 868 for (k=0; k<m; k++) { /* loop over added rows */ 869 row = im[k]; /* row number */ 870 brow = row/bs; /* block row number */ 871 if (row < 0) continue; 872 #if defined(PETSC_USE_BOPT_g) 873 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 874 #endif 875 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 876 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 877 rmax = imax[brow]; /* maximum space allocated for this row */ 878 nrow = ailen[brow]; /* actual length of this row */ 879 low = 0; 880 881 for (l=0; l<n; l++) { /* loop over added columns */ 882 if (in[l] < 0) continue; 883 #if defined(PETSC_USE_BOPT_g) 884 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1); 885 #endif 886 col = in[l]; 887 bcol = col/bs; /* block col number */ 888 889 if (brow <= bcol){ 890 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 891 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 892 /* element value a(k,l) */ 893 if (roworiented) { 894 value = v[l + k*n]; 895 } else { 896 value = v[k + l*m]; 897 } 898 899 /* move pointer bap to a(k,l) quickly and add/insert value */ 900 if (!sorted) low = 0; high = nrow; 901 while (high-low > 7) { 902 t = (low+high)/2; 903 if (rp[t] > bcol) high = t; 904 else low = t; 905 } 906 for (i=low; i<high; i++) { 907 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 908 if (rp[i] > bcol) break; 909 if (rp[i] == bcol) { 910 bap = ap + bs2*i + bs*cidx + ridx; 911 if (is == ADD_VALUES) *bap += value; 912 else *bap = value; 913 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 914 if (brow == bcol && ridx < cidx){ 915 bap = ap + bs2*i + bs*ridx + cidx; 916 if (is == ADD_VALUES) *bap += value; 917 else *bap = value; 918 } 919 goto noinsert1; 920 } 921 } 922 923 if (nonew == 1) goto noinsert1; 924 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 925 if (nrow >= rmax) { 926 /* there is no extra room in row, therefore enlarge */ 927 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 928 MatScalar *new_a; 929 930 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 931 932 /* Malloc new storage space */ 933 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 934 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 935 new_j = (int*)(new_a + bs2*new_nz); 936 new_i = new_j + new_nz; 937 938 /* copy over old data into new slots */ 939 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 940 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 941 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 942 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 943 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 944 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 945 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 946 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 947 /* free up old matrix storage */ 948 ierr = PetscFree(a->a);CHKERRQ(ierr); 949 if (!a->singlemalloc) { 950 ierr = PetscFree(a->i);CHKERRQ(ierr); 951 ierr = PetscFree(a->j);CHKERRQ(ierr); 952 } 953 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 954 a->singlemalloc = PETSC_TRUE; 955 956 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 957 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 958 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 959 a->s_maxnz += bs2*CHUNKSIZE; 960 a->reallocs++; 961 a->s_nz++; 962 } 963 964 N = nrow++ - 1; 965 /* shift up all the later entries in this row */ 966 for (ii=N; ii>=i; ii--) { 967 rp[ii+1] = rp[ii]; 968 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 969 } 970 if (N>=i) { 971 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 972 } 973 rp[i] = bcol; 974 ap[bs2*i + bs*cidx + ridx] = value; 975 noinsert1:; 976 low = i; 977 /* } */ 978 } 979 } /* end of if .. if.. */ 980 } /* end of loop over added columns */ 981 ailen[brow] = nrow; 982 } /* end of loop over added rows */ 983 984 PetscFunctionReturn(0); 985 } 986 987 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 988 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 989 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 990 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 991 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 992 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 993 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 994 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat); 995 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 996 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 997 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 998 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 999 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1000 extern int MatZeroEntries_SeqSBAIJ(Mat); 1001 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1002 1003 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1004 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1005 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1006 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1007 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1008 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1009 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1010 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1011 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1012 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1013 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1014 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1015 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1016 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1017 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1018 1019 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 1020 1021 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1022 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1023 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1024 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1025 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1026 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1027 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1028 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1029 1030 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1031 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1032 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1033 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1034 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1035 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1036 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1038 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 1039 1040 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1041 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1042 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1043 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1044 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1045 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1046 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1047 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1048 1049 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1050 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1051 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1052 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1053 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1054 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1055 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1056 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1057 1058 #ifdef HAVE_MatICCFactor 1059 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1062 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1063 { 1064 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1065 Mat outA; 1066 int ierr; 1067 PetscTruth row_identity,col_identity; 1068 1069 PetscFunctionBegin; 1070 /* 1071 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1072 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1073 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1074 if (!row_identity || !col_identity) { 1075 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1076 } 1077 */ 1078 1079 outA = inA; 1080 inA->factor = FACTOR_CHOLESKY; 1081 1082 if (!a->diag) { 1083 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1084 } 1085 /* 1086 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1087 for ILU(0) factorization with natural ordering 1088 */ 1089 switch (a->bs) { 1090 case 1: 1091 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1092 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1093 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1094 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1095 case 2: 1096 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1097 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1098 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1099 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1100 break; 1101 case 3: 1102 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1103 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1104 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1105 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1106 break; 1107 case 4: 1108 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1109 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1110 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1111 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1112 break; 1113 case 5: 1114 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1115 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1116 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1117 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1118 break; 1119 case 6: 1120 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1121 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1122 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1123 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1124 break; 1125 case 7: 1126 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1127 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1128 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1129 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1130 break; 1131 default: 1132 a->row = row; 1133 a->icol = col; 1134 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1135 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1136 1137 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1138 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1139 PetscLogObjectParent(inA,a->icol); 1140 1141 if (!a->solve_work) { 1142 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1143 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1144 } 1145 } 1146 1147 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1148 1149 PetscFunctionReturn(0); 1150 } 1151 #endif 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1155 int MatPrintHelp_SeqSBAIJ(Mat A) 1156 { 1157 static PetscTruth called = PETSC_FALSE; 1158 MPI_Comm comm = A->comm; 1159 int ierr; 1160 1161 PetscFunctionBegin; 1162 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1163 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1164 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 EXTERN_C_BEGIN 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1171 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1172 { 1173 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1174 int i,nz,n; 1175 1176 PetscFunctionBegin; 1177 nz = baij->s_maxnz; 1178 n = mat->n; 1179 for (i=0; i<nz; i++) { 1180 baij->j[i] = indices[i]; 1181 } 1182 baij->s_nz = nz; 1183 for (i=0; i<n; i++) { 1184 baij->ilen[i] = baij->imax[i]; 1185 } 1186 1187 PetscFunctionReturn(0); 1188 } 1189 EXTERN_C_END 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1193 /*@ 1194 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1195 in the matrix. 1196 1197 Input Parameters: 1198 + mat - the SeqSBAIJ matrix 1199 - indices - the column indices 1200 1201 Level: advanced 1202 1203 Notes: 1204 This can be called if you have precomputed the nonzero structure of the 1205 matrix and want to provide it to the matrix object to improve the performance 1206 of the MatSetValues() operation. 1207 1208 You MUST have set the correct numbers of nonzeros per row in the call to 1209 MatCreateSeqSBAIJ(). 1210 1211 MUST be called before any calls to MatSetValues() 1212 1213 .seealso: MatCreateSeqSBAIJ 1214 @*/ 1215 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1216 { 1217 int ierr,(*f)(Mat,int *); 1218 1219 PetscFunctionBegin; 1220 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1221 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1222 if (f) { 1223 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1224 } else { 1225 SETERRQ(1,"Wrong type of matrix to set column indices"); 1226 } 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1232 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1233 { 1234 int ierr; 1235 1236 PetscFunctionBegin; 1237 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1243 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1244 { 1245 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1246 PetscFunctionBegin; 1247 *array = a->a; 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1253 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1254 { 1255 PetscFunctionBegin; 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #include "petscblaslapack.h" 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1262 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1263 { 1264 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1265 int ierr,one=1,i,bs=y->bs,bs2,j; 1266 1267 PetscFunctionBegin; 1268 if (str == SAME_NONZERO_PATTERN) { 1269 BLaxpy_(&x->s_nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1270 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1271 if (y->xtoy && y->XtoY != X) { 1272 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1273 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1274 } 1275 if (!y->xtoy) { /* get xtoy */ 1276 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1277 y->XtoY = X; 1278 } 1279 bs2 = bs*bs; 1280 for (i=0; i<x->s_nz; i++) { 1281 j = 0; 1282 while (j < bs2){ 1283 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1284 j++; 1285 } 1286 } 1287 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz)); 1288 } else { 1289 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1290 } 1291 PetscFunctionReturn(0); 1292 } 1293 1294 /* -------------------------------------------------------------------*/ 1295 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1296 MatGetRow_SeqSBAIJ, 1297 MatRestoreRow_SeqSBAIJ, 1298 MatMult_SeqSBAIJ_N, 1299 MatMultAdd_SeqSBAIJ_N, 1300 MatMultTranspose_SeqSBAIJ, 1301 MatMultTransposeAdd_SeqSBAIJ, 1302 MatSolve_SeqSBAIJ_N, 1303 0, 1304 0, 1305 0, 1306 0, 1307 MatCholeskyFactor_SeqSBAIJ, 1308 MatRelax_SeqSBAIJ, 1309 MatTranspose_SeqSBAIJ, 1310 MatGetInfo_SeqSBAIJ, 1311 MatEqual_SeqSBAIJ, 1312 MatGetDiagonal_SeqSBAIJ, 1313 MatDiagonalScale_SeqSBAIJ, 1314 MatNorm_SeqSBAIJ, 1315 0, 1316 MatAssemblyEnd_SeqSBAIJ, 1317 0, 1318 MatSetOption_SeqSBAIJ, 1319 MatZeroEntries_SeqSBAIJ, 1320 MatZeroRows_SeqSBAIJ, 1321 0, 1322 0, 1323 MatCholeskyFactorSymbolic_SeqSBAIJ, 1324 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1325 MatSetUpPreallocation_SeqSBAIJ, 1326 0, 1327 MatICCFactorSymbolic_SeqSBAIJ, 1328 MatGetArray_SeqSBAIJ, 1329 MatRestoreArray_SeqSBAIJ, 1330 MatDuplicate_SeqSBAIJ, 1331 0, 1332 0, 1333 0, 1334 0, 1335 MatAXPY_SeqSBAIJ, 1336 MatGetSubMatrices_SeqSBAIJ, 1337 MatIncreaseOverlap_SeqSBAIJ, 1338 MatGetValues_SeqSBAIJ, 1339 0, 1340 MatPrintHelp_SeqSBAIJ, 1341 MatScale_SeqSBAIJ, 1342 0, 1343 0, 1344 0, 1345 MatGetBlockSize_SeqSBAIJ, 1346 MatGetRowIJ_SeqSBAIJ, 1347 MatRestoreRowIJ_SeqSBAIJ, 1348 0, 1349 0, 1350 0, 1351 0, 1352 0, 1353 0, 1354 MatSetValuesBlocked_SeqSBAIJ, 1355 MatGetSubMatrix_SeqSBAIJ, 1356 0, 1357 0, 1358 MatGetPetscMaps_Petsc, 1359 0, 1360 0, 1361 0, 1362 0, 1363 0, 1364 0, 1365 MatGetRowMax_SeqSBAIJ, 1366 0, 1367 0, 1368 0, 1369 0, 1370 0, 1371 0, 1372 0, 1373 0, 1374 0, 1375 0, 1376 0, 1377 0, 1378 0, 1379 #if !defined(PETSC_USE_COMPLEX) 1380 MatGetInertia_SeqSBAIJ 1381 #else 1382 0 1383 #endif 1384 }; 1385 1386 EXTERN_C_BEGIN 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1389 int MatStoreValues_SeqSBAIJ(Mat mat) 1390 { 1391 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1392 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1393 int ierr; 1394 1395 PetscFunctionBegin; 1396 if (aij->nonew != 1) { 1397 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1398 } 1399 1400 /* allocate space for values if not already there */ 1401 if (!aij->saved_values) { 1402 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1403 } 1404 1405 /* copy values over */ 1406 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 EXTERN_C_END 1410 1411 EXTERN_C_BEGIN 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1414 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1415 { 1416 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1417 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1418 1419 PetscFunctionBegin; 1420 if (aij->nonew != 1) { 1421 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1422 } 1423 if (!aij->saved_values) { 1424 SETERRQ(1,"Must call MatStoreValues(A);first"); 1425 } 1426 1427 /* copy values over */ 1428 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 EXTERN_C_END 1432 1433 EXTERN_C_BEGIN 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1436 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1437 { 1438 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1439 int i,len,ierr,mbs,bs2; 1440 PetscTruth flg; 1441 int s_nz; 1442 1443 PetscFunctionBegin; 1444 B->preallocated = PETSC_TRUE; 1445 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1446 mbs = B->m/bs; 1447 bs2 = bs*bs; 1448 1449 if (mbs*bs != B->m) { 1450 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1451 } 1452 1453 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1454 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1455 if (nnz) { 1456 for (i=0; i<mbs; i++) { 1457 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1458 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1459 } 1460 } 1461 1462 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1463 if (!flg) { 1464 switch (bs) { 1465 case 1: 1466 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1467 B->ops->solve = MatSolve_SeqSBAIJ_1; 1468 B->ops->solves = MatSolves_SeqSBAIJ_1; 1469 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1470 B->ops->mult = MatMult_SeqSBAIJ_1; 1471 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1472 break; 1473 case 2: 1474 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1475 B->ops->solve = MatSolve_SeqSBAIJ_2; 1476 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1477 B->ops->mult = MatMult_SeqSBAIJ_2; 1478 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1479 break; 1480 case 3: 1481 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1482 B->ops->solve = MatSolve_SeqSBAIJ_3; 1483 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1484 B->ops->mult = MatMult_SeqSBAIJ_3; 1485 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1486 break; 1487 case 4: 1488 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1489 B->ops->solve = MatSolve_SeqSBAIJ_4; 1490 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1491 B->ops->mult = MatMult_SeqSBAIJ_4; 1492 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1493 break; 1494 case 5: 1495 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1496 B->ops->solve = MatSolve_SeqSBAIJ_5; 1497 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1498 B->ops->mult = MatMult_SeqSBAIJ_5; 1499 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1500 break; 1501 case 6: 1502 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1503 B->ops->solve = MatSolve_SeqSBAIJ_6; 1504 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1505 B->ops->mult = MatMult_SeqSBAIJ_6; 1506 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1507 break; 1508 case 7: 1509 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1510 B->ops->solve = MatSolve_SeqSBAIJ_7; 1511 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1512 B->ops->mult = MatMult_SeqSBAIJ_7; 1513 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1514 break; 1515 } 1516 } 1517 1518 b->mbs = mbs; 1519 b->nbs = mbs; 1520 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1521 if (!nnz) { 1522 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1523 else if (nz <= 0) nz = 1; 1524 for (i=0; i<mbs; i++) { 1525 b->imax[i] = nz; 1526 } 1527 nz = nz*mbs; /* total nz */ 1528 } else { 1529 nz = 0; 1530 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1531 } 1532 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1533 s_nz = nz; 1534 1535 /* allocate the matrix space */ 1536 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1537 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1538 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1539 b->j = (int*)(b->a + s_nz*bs2); 1540 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1541 b->i = b->j + s_nz; 1542 b->singlemalloc = PETSC_TRUE; 1543 1544 /* pointer to beginning of each row */ 1545 b->i[0] = 0; 1546 for (i=1; i<mbs+1; i++) { 1547 b->i[i] = b->i[i-1] + b->imax[i-1]; 1548 } 1549 1550 /* b->ilen will count nonzeros in each block row so far. */ 1551 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1552 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1553 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1554 1555 b->bs = bs; 1556 b->bs2 = bs2; 1557 b->s_nz = 0; 1558 b->s_maxnz = s_nz*bs2; 1559 1560 b->inew = 0; 1561 b->jnew = 0; 1562 b->anew = 0; 1563 b->a2anew = 0; 1564 b->permute = PETSC_FALSE; 1565 PetscFunctionReturn(0); 1566 } 1567 EXTERN_C_END 1568 1569 EXTERN_C_BEGIN 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1572 int MatCreate_SeqSBAIJ(Mat B) 1573 { 1574 Mat_SeqSBAIJ *b; 1575 int ierr,size; 1576 1577 PetscFunctionBegin; 1578 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1579 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1580 B->m = B->M = PetscMax(B->m,B->M); 1581 B->n = B->N = PetscMax(B->n,B->N); 1582 1583 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1584 B->data = (void*)b; 1585 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1586 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1587 B->ops->destroy = MatDestroy_SeqSBAIJ; 1588 B->ops->view = MatView_SeqSBAIJ; 1589 B->factor = 0; 1590 B->lupivotthreshold = 1.0; 1591 B->mapping = 0; 1592 b->row = 0; 1593 b->icol = 0; 1594 b->reallocs = 0; 1595 b->saved_values = 0; 1596 1597 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1598 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1599 1600 b->sorted = PETSC_FALSE; 1601 b->roworiented = PETSC_TRUE; 1602 b->nonew = 0; 1603 b->diag = 0; 1604 b->solve_work = 0; 1605 b->mult_work = 0; 1606 B->spptr = 0; 1607 b->keepzeroedrows = PETSC_FALSE; 1608 b->xtoy = 0; 1609 b->XtoY = 0; 1610 1611 b->inew = 0; 1612 b->jnew = 0; 1613 b->anew = 0; 1614 b->a2anew = 0; 1615 b->permute = PETSC_FALSE; 1616 1617 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1618 "MatStoreValues_SeqSBAIJ", 1619 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1620 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1621 "MatRetrieveValues_SeqSBAIJ", 1622 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1623 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1624 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1625 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1627 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1628 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1629 PetscFunctionReturn(0); 1630 } 1631 EXTERN_C_END 1632 1633 #undef __FUNCT__ 1634 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1635 /*@C 1636 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1637 compressed row) format. For good matrix assembly performance the 1638 user should preallocate the matrix storage by setting the parameter nz 1639 (or the array nnz). By setting these parameters accurately, performance 1640 during matrix assembly can be increased by more than a factor of 50. 1641 1642 Collective on Mat 1643 1644 Input Parameters: 1645 + A - the symmetric matrix 1646 . bs - size of block 1647 . nz - number of block nonzeros per block row (same for all rows) 1648 - nnz - array containing the number of block nonzeros in the upper triangular plus 1649 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1650 1651 Options Database Keys: 1652 . -mat_no_unroll - uses code that does not unroll the loops in the 1653 block calculations (much slower) 1654 . -mat_block_size - size of the blocks to use 1655 1656 Level: intermediate 1657 1658 Notes: 1659 Specify the preallocated storage with either nz or nnz (not both). 1660 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1661 allocation. For additional details, see the users manual chapter on 1662 matrices. 1663 1664 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1665 @*/ 1666 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1667 int ierr,(*f)(Mat,int,int,const int[]); 1668 1669 PetscFunctionBegin; 1670 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1671 if (f) { 1672 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1673 } 1674 PetscFunctionReturn(0); 1675 } 1676 1677 #undef __FUNCT__ 1678 #define __FUNCT__ "MatCreateSeqSBAIJ" 1679 /*@C 1680 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1681 compressed row) format. For good matrix assembly performance the 1682 user should preallocate the matrix storage by setting the parameter nz 1683 (or the array nnz). By setting these parameters accurately, performance 1684 during matrix assembly can be increased by more than a factor of 50. 1685 1686 Collective on MPI_Comm 1687 1688 Input Parameters: 1689 + comm - MPI communicator, set to PETSC_COMM_SELF 1690 . bs - size of block 1691 . m - number of rows, or number of columns 1692 . nz - number of block nonzeros per block row (same for all rows) 1693 - nnz - array containing the number of block nonzeros in the upper triangular plus 1694 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1695 1696 Output Parameter: 1697 . A - the symmetric matrix 1698 1699 Options Database Keys: 1700 . -mat_no_unroll - uses code that does not unroll the loops in the 1701 block calculations (much slower) 1702 . -mat_block_size - size of the blocks to use 1703 1704 Level: intermediate 1705 1706 Notes: 1707 1708 Specify the preallocated storage with either nz or nnz (not both). 1709 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1710 allocation. For additional details, see the users manual chapter on 1711 matrices. 1712 1713 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1714 @*/ 1715 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1716 { 1717 int ierr; 1718 1719 PetscFunctionBegin; 1720 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1721 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1722 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1723 PetscFunctionReturn(0); 1724 } 1725 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1728 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1729 { 1730 Mat C; 1731 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1732 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1733 1734 PetscFunctionBegin; 1735 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1736 1737 *B = 0; 1738 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1739 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1740 c = (Mat_SeqSBAIJ*)C->data; 1741 1742 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1743 C->preallocated = PETSC_TRUE; 1744 C->factor = A->factor; 1745 c->row = 0; 1746 c->icol = 0; 1747 c->saved_values = 0; 1748 c->keepzeroedrows = a->keepzeroedrows; 1749 C->assembled = PETSC_TRUE; 1750 1751 c->bs = a->bs; 1752 c->bs2 = a->bs2; 1753 c->mbs = a->mbs; 1754 c->nbs = a->nbs; 1755 1756 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1757 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1758 for (i=0; i<mbs; i++) { 1759 c->imax[i] = a->imax[i]; 1760 c->ilen[i] = a->ilen[i]; 1761 } 1762 1763 /* allocate the matrix space */ 1764 c->singlemalloc = PETSC_TRUE; 1765 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1766 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1767 c->j = (int*)(c->a + nz*bs2); 1768 c->i = c->j + nz; 1769 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1770 if (mbs > 0) { 1771 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1772 if (cpvalues == MAT_COPY_VALUES) { 1773 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1774 } else { 1775 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1776 } 1777 } 1778 1779 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1780 c->sorted = a->sorted; 1781 c->roworiented = a->roworiented; 1782 c->nonew = a->nonew; 1783 1784 if (a->diag) { 1785 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1786 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1787 for (i=0; i<mbs; i++) { 1788 c->diag[i] = a->diag[i]; 1789 } 1790 } else c->diag = 0; 1791 c->s_nz = a->s_nz; 1792 c->s_maxnz = a->s_maxnz; 1793 c->solve_work = 0; 1794 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1795 c->mult_work = 0; 1796 *B = C; 1797 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1798 PetscFunctionReturn(0); 1799 } 1800 1801 EXTERN_C_BEGIN 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1804 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1805 { 1806 Mat_SeqSBAIJ *a; 1807 Mat B; 1808 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1809 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1810 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1811 int *masked,nmask,tmp,bs2,ishift; 1812 PetscScalar *aa; 1813 MPI_Comm comm = ((PetscObject)viewer)->comm; 1814 1815 PetscFunctionBegin; 1816 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1817 bs2 = bs*bs; 1818 1819 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1820 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1821 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1822 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1823 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1824 M = header[1]; N = header[2]; nz = header[3]; 1825 1826 if (header[3] < 0) { 1827 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1828 } 1829 1830 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1831 1832 /* 1833 This code adds extra rows to make sure the number of rows is 1834 divisible by the blocksize 1835 */ 1836 mbs = M/bs; 1837 extra_rows = bs - M + bs*(mbs); 1838 if (extra_rows == bs) extra_rows = 0; 1839 else mbs++; 1840 if (extra_rows) { 1841 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1842 } 1843 1844 /* read in row lengths */ 1845 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1846 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1847 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1848 1849 /* read in column indices */ 1850 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1851 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1852 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1853 1854 /* loop over row lengths determining block row lengths */ 1855 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1856 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1857 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1858 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1859 masked = mask + mbs; 1860 rowcount = 0; nzcount = 0; 1861 for (i=0; i<mbs; i++) { 1862 nmask = 0; 1863 for (j=0; j<bs; j++) { 1864 kmax = rowlengths[rowcount]; 1865 for (k=0; k<kmax; k++) { 1866 tmp = jj[nzcount++]/bs; /* block col. index */ 1867 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1868 } 1869 rowcount++; 1870 } 1871 s_browlengths[i] += nmask; 1872 1873 /* zero out the mask elements we set */ 1874 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1875 } 1876 1877 /* create our matrix */ 1878 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1879 ierr = MatSetType(B,type);CHKERRQ(ierr); 1880 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1881 a = (Mat_SeqSBAIJ*)B->data; 1882 1883 /* set matrix "i" values */ 1884 a->i[0] = 0; 1885 for (i=1; i<= mbs; i++) { 1886 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1887 a->ilen[i-1] = s_browlengths[i-1]; 1888 } 1889 a->s_nz = a->i[mbs]; 1890 1891 /* read in nonzero values */ 1892 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1893 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1894 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1895 1896 /* set "a" and "j" values into matrix */ 1897 nzcount = 0; jcount = 0; 1898 for (i=0; i<mbs; i++) { 1899 nzcountb = nzcount; 1900 nmask = 0; 1901 for (j=0; j<bs; j++) { 1902 kmax = rowlengths[i*bs+j]; 1903 for (k=0; k<kmax; k++) { 1904 tmp = jj[nzcount++]/bs; /* block col. index */ 1905 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1906 } 1907 } 1908 /* sort the masked values */ 1909 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1910 1911 /* set "j" values into matrix */ 1912 maskcount = 1; 1913 for (j=0; j<nmask; j++) { 1914 a->j[jcount++] = masked[j]; 1915 mask[masked[j]] = maskcount++; 1916 } 1917 1918 /* set "a" values into matrix */ 1919 ishift = bs2*a->i[i]; 1920 for (j=0; j<bs; j++) { 1921 kmax = rowlengths[i*bs+j]; 1922 for (k=0; k<kmax; k++) { 1923 tmp = jj[nzcountb]/bs ; /* block col. index */ 1924 if (tmp >= i){ 1925 block = mask[tmp] - 1; 1926 point = jj[nzcountb] - bs*tmp; 1927 idx = ishift + bs2*block + j + bs*point; 1928 a->a[idx] = aa[nzcountb]; 1929 } 1930 nzcountb++; 1931 } 1932 } 1933 /* zero out the mask elements we set */ 1934 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1935 } 1936 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1937 1938 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1939 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1940 ierr = PetscFree(aa);CHKERRQ(ierr); 1941 ierr = PetscFree(jj);CHKERRQ(ierr); 1942 ierr = PetscFree(mask);CHKERRQ(ierr); 1943 1944 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1945 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1946 ierr = MatView_Private(B);CHKERRQ(ierr); 1947 *A = B; 1948 PetscFunctionReturn(0); 1949 } 1950 EXTERN_C_END 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1954 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1955 { 1956 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1957 MatScalar *aa=a->a,*v,*v1; 1958 PetscScalar *x,*b,*t,sum,d; 1959 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1960 int nz,nz1,*vj,*vj1,i; 1961 1962 PetscFunctionBegin; 1963 its = its*lits; 1964 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1965 1966 if (bs > 1) 1967 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1968 1969 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1970 if (xx != bb) { 1971 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1972 } else { 1973 b = x; 1974 } 1975 1976 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1977 1978 if (flag & SOR_ZERO_INITIAL_GUESS) { 1979 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1980 for (i=0; i<m; i++) 1981 t[i] = b[i]; 1982 1983 for (i=0; i<m; i++){ 1984 d = *(aa + ai[i]); /* diag[i] */ 1985 v = aa + ai[i] + 1; 1986 vj = aj + ai[i] + 1; 1987 nz = ai[i+1] - ai[i] - 1; 1988 x[i] = omega*t[i]/d; 1989 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1990 PetscLogFlops(2*nz-1); 1991 } 1992 } 1993 1994 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1995 for (i=0; i<m; i++) 1996 t[i] = b[i]; 1997 1998 for (i=0; i<m-1; i++){ /* update rhs */ 1999 v = aa + ai[i] + 1; 2000 vj = aj + ai[i] + 1; 2001 nz = ai[i+1] - ai[i] - 1; 2002 while (nz--) t[*vj++] -= x[i]*(*v++); 2003 PetscLogFlops(2*nz-1); 2004 } 2005 for (i=m-1; i>=0; i--){ 2006 d = *(aa + ai[i]); 2007 v = aa + ai[i] + 1; 2008 vj = aj + ai[i] + 1; 2009 nz = ai[i+1] - ai[i] - 1; 2010 sum = t[i]; 2011 while (nz--) sum -= x[*vj++]*(*v++); 2012 PetscLogFlops(2*nz-1); 2013 x[i] = (1-omega)*x[i] + omega*sum/d; 2014 } 2015 } 2016 its--; 2017 } 2018 2019 while (its--) { 2020 /* 2021 forward sweep: 2022 for i=0,...,m-1: 2023 sum[i] = (b[i] - U(i,:)x )/d[i]; 2024 x[i] = (1-omega)x[i] + omega*sum[i]; 2025 b = b - x[i]*U^T(i,:); 2026 2027 */ 2028 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2029 for (i=0; i<m; i++) 2030 t[i] = b[i]; 2031 2032 for (i=0; i<m; i++){ 2033 d = *(aa + ai[i]); /* diag[i] */ 2034 v = aa + ai[i] + 1; v1=v; 2035 vj = aj + ai[i] + 1; vj1=vj; 2036 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2037 sum = t[i]; 2038 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2039 x[i] = (1-omega)*x[i] + omega*sum/d; 2040 while (nz--) t[*vj++] -= x[i]*(*v++); 2041 PetscLogFlops(4*nz-2); 2042 } 2043 } 2044 2045 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2046 /* 2047 backward sweep: 2048 b = b - x[i]*U^T(i,:), i=0,...,n-2 2049 for i=m-1,...,0: 2050 sum[i] = (b[i] - U(i,:)x )/d[i]; 2051 x[i] = (1-omega)x[i] + omega*sum[i]; 2052 */ 2053 for (i=0; i<m; i++) 2054 t[i] = b[i]; 2055 2056 for (i=0; i<m-1; i++){ /* update rhs */ 2057 v = aa + ai[i] + 1; 2058 vj = aj + ai[i] + 1; 2059 nz = ai[i+1] - ai[i] - 1; 2060 while (nz--) t[*vj++] -= x[i]*(*v++); 2061 PetscLogFlops(2*nz-1); 2062 } 2063 for (i=m-1; i>=0; i--){ 2064 d = *(aa + ai[i]); 2065 v = aa + ai[i] + 1; 2066 vj = aj + ai[i] + 1; 2067 nz = ai[i+1] - ai[i] - 1; 2068 sum = t[i]; 2069 while (nz--) sum -= x[*vj++]*(*v++); 2070 PetscLogFlops(2*nz-1); 2071 x[i] = (1-omega)*x[i] + omega*sum/d; 2072 } 2073 } 2074 } 2075 2076 ierr = PetscFree(t); CHKERRQ(ierr); 2077 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2078 if (bb != xx) { 2079 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2080 } 2081 2082 PetscFunctionReturn(0); 2083 } 2084 2085 2086 2087 2088 2089 2090