1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int s_nz = a->i[n]; 63 for (i=0; i<s_nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int s_nz = a->i[n]-1; 84 for (i=0; i<s_nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 141 { 142 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 143 144 PetscFunctionBegin; 145 switch (op) { 146 case MAT_ROW_ORIENTED: 147 a->roworiented = PETSC_TRUE; 148 break; 149 case MAT_COLUMN_ORIENTED: 150 a->roworiented = PETSC_FALSE; 151 break; 152 case MAT_COLUMNS_SORTED: 153 a->sorted = PETSC_TRUE; 154 break; 155 case MAT_COLUMNS_UNSORTED: 156 a->sorted = PETSC_FALSE; 157 break; 158 case MAT_KEEP_ZEROED_ROWS: 159 a->keepzeroedrows = PETSC_TRUE; 160 break; 161 case MAT_NO_NEW_NONZERO_LOCATIONS: 162 a->nonew = 1; 163 break; 164 case MAT_NEW_NONZERO_LOCATION_ERR: 165 a->nonew = -1; 166 break; 167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 168 a->nonew = -2; 169 break; 170 case MAT_YES_NEW_NONZERO_LOCATIONS: 171 a->nonew = 0; 172 break; 173 case MAT_ROWS_SORTED: 174 case MAT_ROWS_UNSORTED: 175 case MAT_YES_NEW_DIAGONALS: 176 case MAT_IGNORE_OFF_PROC_ENTRIES: 177 case MAT_USE_HASH_TABLE: 178 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 179 break; 180 case MAT_NO_NEW_DIAGONALS: 181 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 182 default: 183 SETERRQ(PETSC_ERR_SUP,"unknown option"); 184 } 185 PetscFunctionReturn(0); 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 190 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 191 { 192 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 193 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 194 MatScalar *aa,*aa_i; 195 PetscScalar *v_i; 196 197 PetscFunctionBegin; 198 bs = a->bs; 199 ai = a->i; 200 aj = a->j; 201 aa = a->a; 202 bs2 = a->bs2; 203 204 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 205 206 bn = row/bs; /* Block number */ 207 bp = row % bs; /* Block position */ 208 M = ai[bn+1] - ai[bn]; 209 *ncols = bs*M; 210 211 if (v) { 212 *v = 0; 213 if (*ncols) { 214 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 215 for (i=0; i<M; i++) { /* for each block in the block row */ 216 v_i = *v + i*bs; 217 aa_i = aa + bs2*(ai[bn] + i); 218 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 219 } 220 } 221 } 222 223 if (cols) { 224 *cols = 0; 225 if (*ncols) { 226 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 227 for (i=0; i<M; i++) { /* for each block in the block row */ 228 cols_i = *cols + i*bs; 229 itmp = bs*aj[ai[bn] + i]; 230 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 231 } 232 } 233 } 234 235 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 236 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 237 #ifdef column_search 238 v_i = *v + M*bs; 239 cols_i = *cols + M*bs; 240 for (i=0; i<bn; i++){ /* for each block row */ 241 M = ai[i+1] - ai[i]; 242 for (j=0; j<M; j++){ 243 itmp = aj[ai[i] + j]; /* block column value */ 244 if (itmp == bn){ 245 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 246 for (k=0; k<bs; k++) { 247 *cols_i++ = i*bs+k; 248 *v_i++ = aa_i[k]; 249 } 250 *ncols += bs; 251 break; 252 } 253 } 254 } 255 #endif 256 257 PetscFunctionReturn(0); 258 } 259 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 262 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 263 { 264 int ierr; 265 266 PetscFunctionBegin; 267 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 268 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 274 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 275 { 276 int ierr; 277 PetscFunctionBegin; 278 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 284 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 285 { 286 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 287 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 288 PetscScalar *aa; 289 FILE *file; 290 291 PetscFunctionBegin; 292 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 293 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 294 col_lens[0] = MAT_FILE_COOKIE; 295 296 col_lens[1] = A->m; 297 col_lens[2] = A->m; 298 col_lens[3] = a->s_nz*bs2; 299 300 /* store lengths of each row and write (including header) to file */ 301 count = 0; 302 for (i=0; i<a->mbs; i++) { 303 for (j=0; j<bs; j++) { 304 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 305 } 306 } 307 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 308 ierr = PetscFree(col_lens);CHKERRQ(ierr); 309 310 /* store column indices (zero start index) */ 311 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 312 count = 0; 313 for (i=0; i<a->mbs; i++) { 314 for (j=0; j<bs; j++) { 315 for (k=a->i[i]; k<a->i[i+1]; k++) { 316 for (l=0; l<bs; l++) { 317 jj[count++] = bs*a->j[k] + l; 318 } 319 } 320 } 321 } 322 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 323 ierr = PetscFree(jj);CHKERRQ(ierr); 324 325 /* store nonzero values */ 326 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 327 count = 0; 328 for (i=0; i<a->mbs; i++) { 329 for (j=0; j<bs; j++) { 330 for (k=a->i[i]; k<a->i[i+1]; k++) { 331 for (l=0; l<bs; l++) { 332 aa[count++] = a->a[bs2*k + l*bs + j]; 333 } 334 } 335 } 336 } 337 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 338 ierr = PetscFree(aa);CHKERRQ(ierr); 339 340 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 341 if (file) { 342 fprintf(file,"-matload_block_size %d\n",a->bs); 343 } 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 349 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 350 { 351 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 352 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 353 char *name; 354 PetscViewerFormat format; 355 356 PetscFunctionBegin; 357 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 358 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 359 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 360 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 361 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 362 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 363 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 364 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 365 for (i=0; i<a->mbs; i++) { 366 for (j=0; j<bs; j++) { 367 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 368 for (k=a->i[i]; k<a->i[i+1]; k++) { 369 for (l=0; l<bs; l++) { 370 #if defined(PETSC_USE_COMPLEX) 371 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 372 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 373 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 374 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 375 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 376 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 377 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 378 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 379 } 380 #else 381 if (a->a[bs2*k + l*bs + j] != 0.0) { 382 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 383 } 384 #endif 385 } 386 } 387 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 388 } 389 } 390 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 391 } else { 392 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 393 for (i=0; i<a->mbs; i++) { 394 for (j=0; j<bs; j++) { 395 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 396 for (k=a->i[i]; k<a->i[i+1]; k++) { 397 for (l=0; l<bs; l++) { 398 #if defined(PETSC_USE_COMPLEX) 399 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 400 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 401 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 402 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 403 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 404 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 405 } else { 406 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 407 } 408 #else 409 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 410 #endif 411 } 412 } 413 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 414 } 415 } 416 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 417 } 418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 424 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 425 { 426 Mat A = (Mat) Aa; 427 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 428 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 429 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 430 MatScalar *aa; 431 MPI_Comm comm; 432 PetscViewer viewer; 433 434 PetscFunctionBegin; 435 /* 436 This is nasty. If this is called from an originally parallel matrix 437 then all processes call this,but only the first has the matrix so the 438 rest should return immediately. 439 */ 440 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 441 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 442 if (rank) PetscFunctionReturn(0); 443 444 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 445 446 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 447 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 448 449 /* loop over matrix elements drawing boxes */ 450 color = PETSC_DRAW_BLUE; 451 for (i=0,row=0; i<mbs; i++,row+=bs) { 452 for (j=a->i[i]; j<a->i[i+1]; j++) { 453 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 454 x_l = a->j[j]*bs; x_r = x_l + 1.0; 455 aa = a->a + j*bs2; 456 for (k=0; k<bs; k++) { 457 for (l=0; l<bs; l++) { 458 if (PetscRealPart(*aa++) >= 0.) continue; 459 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 460 } 461 } 462 } 463 } 464 color = PETSC_DRAW_CYAN; 465 for (i=0,row=0; i<mbs; i++,row+=bs) { 466 for (j=a->i[i]; j<a->i[i+1]; j++) { 467 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 468 x_l = a->j[j]*bs; x_r = x_l + 1.0; 469 aa = a->a + j*bs2; 470 for (k=0; k<bs; k++) { 471 for (l=0; l<bs; l++) { 472 if (PetscRealPart(*aa++) != 0.) continue; 473 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 474 } 475 } 476 } 477 } 478 479 color = PETSC_DRAW_RED; 480 for (i=0,row=0; i<mbs; i++,row+=bs) { 481 for (j=a->i[i]; j<a->i[i+1]; j++) { 482 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 483 x_l = a->j[j]*bs; x_r = x_l + 1.0; 484 aa = a->a + j*bs2; 485 for (k=0; k<bs; k++) { 486 for (l=0; l<bs; l++) { 487 if (PetscRealPart(*aa++) <= 0.) continue; 488 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 489 } 490 } 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 498 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 499 { 500 int ierr; 501 PetscReal xl,yl,xr,yr,w,h; 502 PetscDraw draw; 503 PetscTruth isnull; 504 505 PetscFunctionBegin; 506 507 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 508 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 509 510 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 511 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 512 xr += w; yr += h; xl = -w; yl = -h; 513 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 514 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 515 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 #undef __FUNCT__ 520 #define __FUNCT__ "MatView_SeqSBAIJ" 521 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 522 { 523 int ierr; 524 PetscTruth issocket,isascii,isbinary,isdraw; 525 526 PetscFunctionBegin; 527 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 528 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 529 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 530 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 531 if (issocket) { 532 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 533 } else if (isascii){ 534 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 535 } else if (isbinary) { 536 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 537 } else if (isdraw) { 538 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 539 } else { 540 SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name); 541 } 542 PetscFunctionReturn(0); 543 } 544 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 548 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 549 { 550 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 551 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 552 int *ai = a->i,*ailen = a->ilen; 553 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 554 MatScalar *ap,*aa = a->a,zero = 0.0; 555 556 PetscFunctionBegin; 557 for (k=0; k<m; k++) { /* loop over rows */ 558 row = im[k]; brow = row/bs; 559 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row); 560 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 561 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 562 nrow = ailen[brow]; 563 for (l=0; l<n; l++) { /* loop over columns */ 564 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]); 565 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 566 col = in[l] ; 567 bcol = col/bs; 568 cidx = col%bs; 569 ridx = row%bs; 570 high = nrow; 571 low = 0; /* assume unsorted */ 572 while (high-low > 5) { 573 t = (low+high)/2; 574 if (rp[t] > bcol) high = t; 575 else low = t; 576 } 577 for (i=low; i<high; i++) { 578 if (rp[i] > bcol) break; 579 if (rp[i] == bcol) { 580 *v++ = ap[bs2*i+bs*cidx+ridx]; 581 goto finished; 582 } 583 } 584 *v++ = zero; 585 finished:; 586 } 587 } 588 PetscFunctionReturn(0); 589 } 590 591 592 #undef __FUNCT__ 593 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 594 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 595 { 596 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 597 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 598 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 599 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 600 PetscTruth roworiented=a->roworiented; 601 const MatScalar *value = v; 602 MatScalar *ap,*aa = a->a,*bap; 603 604 PetscFunctionBegin; 605 if (roworiented) { 606 stepval = (n-1)*bs; 607 } else { 608 stepval = (m-1)*bs; 609 } 610 for (k=0; k<m; k++) { /* loop over added rows */ 611 row = im[k]; 612 if (row < 0) continue; 613 #if defined(PETSC_USE_BOPT_g) 614 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 615 #endif 616 rp = aj + ai[row]; 617 ap = aa + bs2*ai[row]; 618 rmax = imax[row]; 619 nrow = ailen[row]; 620 low = 0; 621 for (l=0; l<n; l++) { /* loop over added columns */ 622 if (in[l] < 0) continue; 623 col = in[l]; 624 #if defined(PETSC_USE_BOPT_g) 625 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1); 626 #endif 627 if (col < row) continue; /* ignore lower triangular block */ 628 if (roworiented) { 629 value = v + k*(stepval+bs)*bs + l*bs; 630 } else { 631 value = v + l*(stepval+bs)*bs + k*bs; 632 } 633 if (!sorted) low = 0; high = nrow; 634 while (high-low > 7) { 635 t = (low+high)/2; 636 if (rp[t] > col) high = t; 637 else low = t; 638 } 639 for (i=low; i<high; i++) { 640 if (rp[i] > col) break; 641 if (rp[i] == col) { 642 bap = ap + bs2*i; 643 if (roworiented) { 644 if (is == ADD_VALUES) { 645 for (ii=0; ii<bs; ii++,value+=stepval) { 646 for (jj=ii; jj<bs2; jj+=bs) { 647 bap[jj] += *value++; 648 } 649 } 650 } else { 651 for (ii=0; ii<bs; ii++,value+=stepval) { 652 for (jj=ii; jj<bs2; jj+=bs) { 653 bap[jj] = *value++; 654 } 655 } 656 } 657 } else { 658 if (is == ADD_VALUES) { 659 for (ii=0; ii<bs; ii++,value+=stepval) { 660 for (jj=0; jj<bs; jj++) { 661 *bap++ += *value++; 662 } 663 } 664 } else { 665 for (ii=0; ii<bs; ii++,value+=stepval) { 666 for (jj=0; jj<bs; jj++) { 667 *bap++ = *value++; 668 } 669 } 670 } 671 } 672 goto noinsert2; 673 } 674 } 675 if (nonew == 1) goto noinsert2; 676 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 677 if (nrow >= rmax) { 678 /* there is no extra room in row, therefore enlarge */ 679 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 680 MatScalar *new_a; 681 682 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 683 684 /* malloc new storage space */ 685 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 686 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 687 new_j = (int*)(new_a + bs2*new_nz); 688 new_i = new_j + new_nz; 689 690 /* copy over old data into new slots */ 691 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 692 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 693 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 694 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 695 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 696 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 697 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 698 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 699 /* free up old matrix storage */ 700 ierr = PetscFree(a->a);CHKERRQ(ierr); 701 if (!a->singlemalloc) { 702 ierr = PetscFree(a->i);CHKERRQ(ierr); 703 ierr = PetscFree(a->j);CHKERRQ(ierr); 704 } 705 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 706 a->singlemalloc = PETSC_TRUE; 707 708 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 709 rmax = imax[row] = imax[row] + CHUNKSIZE; 710 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 711 a->s_maxnz += bs2*CHUNKSIZE; 712 a->reallocs++; 713 a->s_nz++; 714 } 715 N = nrow++ - 1; 716 /* shift up all the later entries in this row */ 717 for (ii=N; ii>=i; ii--) { 718 rp[ii+1] = rp[ii]; 719 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 720 } 721 if (N >= i) { 722 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 723 } 724 rp[i] = col; 725 bap = ap + bs2*i; 726 if (roworiented) { 727 for (ii=0; ii<bs; ii++,value+=stepval) { 728 for (jj=ii; jj<bs2; jj+=bs) { 729 bap[jj] = *value++; 730 } 731 } 732 } else { 733 for (ii=0; ii<bs; ii++,value+=stepval) { 734 for (jj=0; jj<bs; jj++) { 735 *bap++ = *value++; 736 } 737 } 738 } 739 noinsert2:; 740 low = i; 741 } 742 ailen[row] = nrow; 743 } 744 PetscFunctionReturn(0); 745 } 746 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 749 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 750 { 751 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 752 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 753 int m = A->m,*ip,N,*ailen = a->ilen; 754 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 755 MatScalar *aa = a->a,*ap; 756 757 PetscFunctionBegin; 758 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 759 760 if (m) rmax = ailen[0]; 761 for (i=1; i<mbs; i++) { 762 /* move each row back by the amount of empty slots (fshift) before it*/ 763 fshift += imax[i-1] - ailen[i-1]; 764 rmax = PetscMax(rmax,ailen[i]); 765 if (fshift) { 766 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 767 N = ailen[i]; 768 for (j=0; j<N; j++) { 769 ip[j-fshift] = ip[j]; 770 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 771 } 772 } 773 ai[i] = ai[i-1] + ailen[i-1]; 774 } 775 if (mbs) { 776 fshift += imax[mbs-1] - ailen[mbs-1]; 777 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 778 } 779 /* reset ilen and imax for each row */ 780 for (i=0; i<mbs; i++) { 781 ailen[i] = imax[i] = ai[i+1] - ai[i]; 782 } 783 a->s_nz = ai[mbs]; 784 785 /* diagonals may have moved, reset it */ 786 if (a->diag) { 787 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 788 } 789 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 790 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 791 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 792 a->reallocs); 793 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 794 a->reallocs = 0; 795 A->info.nz_unneeded = (PetscReal)fshift*bs2; 796 797 PetscFunctionReturn(0); 798 } 799 800 /* 801 This function returns an array of flags which indicate the locations of contiguous 802 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 803 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 804 Assume: sizes should be long enough to hold all the values. 805 */ 806 #undef __FUNCT__ 807 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 808 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 809 { 810 int i,j,k,row; 811 PetscTruth flg; 812 813 PetscFunctionBegin; 814 for (i=0,j=0; i<n; j++) { 815 row = idx[i]; 816 if (row%bs!=0) { /* Not the begining of a block */ 817 sizes[j] = 1; 818 i++; 819 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 820 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 821 i++; 822 } else { /* Begining of the block, so check if the complete block exists */ 823 flg = PETSC_TRUE; 824 for (k=1; k<bs; k++) { 825 if (row+k != idx[i+k]) { /* break in the block */ 826 flg = PETSC_FALSE; 827 break; 828 } 829 } 830 if (flg == PETSC_TRUE) { /* No break in the bs */ 831 sizes[j] = bs; 832 i+= bs; 833 } else { 834 sizes[j] = 1; 835 i++; 836 } 837 } 838 } 839 *bs_max = j; 840 PetscFunctionReturn(0); 841 } 842 843 #undef __FUNCT__ 844 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 845 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 846 { 847 PetscFunctionBegin; 848 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 849 } 850 851 /* Only add/insert a(i,j) with i<=j (blocks). 852 Any a(i,j) with i>j input by user is ingored. 853 */ 854 855 #undef __FUNCT__ 856 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 857 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 858 { 859 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 860 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 861 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 862 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 863 int ridx,cidx,bs2=a->bs2,ierr; 864 MatScalar *ap,value,*aa=a->a,*bap; 865 866 PetscFunctionBegin; 867 868 for (k=0; k<m; k++) { /* loop over added rows */ 869 row = im[k]; /* row number */ 870 brow = row/bs; /* block row number */ 871 if (row < 0) continue; 872 #if defined(PETSC_USE_BOPT_g) 873 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 874 #endif 875 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 876 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 877 rmax = imax[brow]; /* maximum space allocated for this row */ 878 nrow = ailen[brow]; /* actual length of this row */ 879 low = 0; 880 881 for (l=0; l<n; l++) { /* loop over added columns */ 882 if (in[l] < 0) continue; 883 #if defined(PETSC_USE_BOPT_g) 884 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1); 885 #endif 886 col = in[l]; 887 bcol = col/bs; /* block col number */ 888 889 if (brow <= bcol){ 890 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 891 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 892 /* element value a(k,l) */ 893 if (roworiented) { 894 value = v[l + k*n]; 895 } else { 896 value = v[k + l*m]; 897 } 898 899 /* move pointer bap to a(k,l) quickly and add/insert value */ 900 if (!sorted) low = 0; high = nrow; 901 while (high-low > 7) { 902 t = (low+high)/2; 903 if (rp[t] > bcol) high = t; 904 else low = t; 905 } 906 for (i=low; i<high; i++) { 907 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 908 if (rp[i] > bcol) break; 909 if (rp[i] == bcol) { 910 bap = ap + bs2*i + bs*cidx + ridx; 911 if (is == ADD_VALUES) *bap += value; 912 else *bap = value; 913 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 914 if (brow == bcol && ridx < cidx){ 915 bap = ap + bs2*i + bs*ridx + cidx; 916 if (is == ADD_VALUES) *bap += value; 917 else *bap = value; 918 } 919 goto noinsert1; 920 } 921 } 922 923 if (nonew == 1) goto noinsert1; 924 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 925 if (nrow >= rmax) { 926 /* there is no extra room in row, therefore enlarge */ 927 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 928 MatScalar *new_a; 929 930 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 931 932 /* Malloc new storage space */ 933 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 934 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 935 new_j = (int*)(new_a + bs2*new_nz); 936 new_i = new_j + new_nz; 937 938 /* copy over old data into new slots */ 939 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 940 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 941 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 942 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 943 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 944 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 945 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 946 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 947 /* free up old matrix storage */ 948 ierr = PetscFree(a->a);CHKERRQ(ierr); 949 if (!a->singlemalloc) { 950 ierr = PetscFree(a->i);CHKERRQ(ierr); 951 ierr = PetscFree(a->j);CHKERRQ(ierr); 952 } 953 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 954 a->singlemalloc = PETSC_TRUE; 955 956 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 957 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 958 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 959 a->s_maxnz += bs2*CHUNKSIZE; 960 a->reallocs++; 961 a->s_nz++; 962 } 963 964 N = nrow++ - 1; 965 /* shift up all the later entries in this row */ 966 for (ii=N; ii>=i; ii--) { 967 rp[ii+1] = rp[ii]; 968 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 969 } 970 if (N>=i) { 971 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 972 } 973 rp[i] = bcol; 974 ap[bs2*i + bs*cidx + ridx] = value; 975 noinsert1:; 976 low = i; 977 /* } */ 978 } 979 } /* end of if .. if.. */ 980 } /* end of loop over added columns */ 981 ailen[brow] = nrow; 982 } /* end of loop over added rows */ 983 984 PetscFunctionReturn(0); 985 } 986 987 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 988 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 989 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int); 990 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 991 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 992 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 993 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 994 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat); 995 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 996 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 997 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 998 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 999 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1000 extern int MatZeroEntries_SeqSBAIJ(Mat); 1001 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1002 1003 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1004 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1005 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1006 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1007 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1008 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1009 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1010 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1011 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1012 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1013 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1014 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1015 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1016 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1017 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1018 1019 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 1020 1021 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1022 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1023 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1024 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1025 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1026 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1027 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1028 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1029 1030 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1031 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1032 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1033 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1034 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1035 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1036 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1038 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 1039 1040 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1041 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1042 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1043 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1044 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1045 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1046 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1047 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1048 1049 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1050 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1051 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1052 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1053 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1054 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1055 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1056 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1057 1058 #ifdef HAVE_MatICCFactor 1059 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1062 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1063 { 1064 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1065 Mat outA; 1066 int ierr; 1067 PetscTruth row_identity,col_identity; 1068 1069 PetscFunctionBegin; 1070 /* 1071 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1072 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1073 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1074 if (!row_identity || !col_identity) { 1075 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1076 } 1077 */ 1078 1079 outA = inA; 1080 inA->factor = FACTOR_CHOLESKY; 1081 1082 if (!a->diag) { 1083 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1084 } 1085 /* 1086 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1087 for ILU(0) factorization with natural ordering 1088 */ 1089 switch (a->bs) { 1090 case 1: 1091 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1092 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1093 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1094 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1095 case 2: 1096 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1097 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1098 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1099 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1100 break; 1101 case 3: 1102 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1103 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1104 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1105 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1106 break; 1107 case 4: 1108 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1109 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1110 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1111 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1112 break; 1113 case 5: 1114 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1115 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1116 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1117 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1118 break; 1119 case 6: 1120 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1121 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1122 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1123 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1124 break; 1125 case 7: 1126 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1127 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1128 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1129 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1130 break; 1131 default: 1132 a->row = row; 1133 a->icol = col; 1134 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1135 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1136 1137 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1138 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1139 PetscLogObjectParent(inA,a->icol); 1140 1141 if (!a->solve_work) { 1142 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1143 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1144 } 1145 } 1146 1147 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1148 1149 PetscFunctionReturn(0); 1150 } 1151 #endif 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1155 int MatPrintHelp_SeqSBAIJ(Mat A) 1156 { 1157 static PetscTruth called = PETSC_FALSE; 1158 MPI_Comm comm = A->comm; 1159 int ierr; 1160 1161 PetscFunctionBegin; 1162 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1163 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1164 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 EXTERN_C_BEGIN 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1171 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1172 { 1173 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1174 int i,nz,n; 1175 1176 PetscFunctionBegin; 1177 nz = baij->s_maxnz; 1178 n = mat->n; 1179 for (i=0; i<nz; i++) { 1180 baij->j[i] = indices[i]; 1181 } 1182 baij->s_nz = nz; 1183 for (i=0; i<n; i++) { 1184 baij->ilen[i] = baij->imax[i]; 1185 } 1186 1187 PetscFunctionReturn(0); 1188 } 1189 EXTERN_C_END 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1193 /*@ 1194 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1195 in the matrix. 1196 1197 Input Parameters: 1198 + mat - the SeqSBAIJ matrix 1199 - indices - the column indices 1200 1201 Level: advanced 1202 1203 Notes: 1204 This can be called if you have precomputed the nonzero structure of the 1205 matrix and want to provide it to the matrix object to improve the performance 1206 of the MatSetValues() operation. 1207 1208 You MUST have set the correct numbers of nonzeros per row in the call to 1209 MatCreateSeqSBAIJ(). 1210 1211 MUST be called before any calls to MatSetValues() 1212 1213 .seealso: MatCreateSeqSBAIJ 1214 @*/ 1215 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1216 { 1217 int ierr,(*f)(Mat,int *); 1218 1219 PetscFunctionBegin; 1220 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1221 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1222 if (f) { 1223 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1224 } else { 1225 SETERRQ(1,"Wrong type of matrix to set column indices"); 1226 } 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1232 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1233 { 1234 int ierr; 1235 1236 PetscFunctionBegin; 1237 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1243 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1244 { 1245 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1246 PetscFunctionBegin; 1247 *array = a->a; 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1253 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1254 { 1255 PetscFunctionBegin; 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #include "petscblaslapack.h" 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1262 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1263 { 1264 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1265 int ierr,one=1,i,bs=y->bs,bs2,j; 1266 1267 PetscFunctionBegin; 1268 if (str == SAME_NONZERO_PATTERN) { 1269 BLaxpy_(&x->s_nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1270 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1271 if (y->xtoy && y->XtoY != X) { 1272 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1273 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1274 } 1275 if (!y->xtoy) { /* get xtoy */ 1276 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1277 y->XtoY = X; 1278 } 1279 bs2 = bs*bs; 1280 for (i=0; i<x->s_nz; i++) { 1281 j = 0; 1282 while (j < bs2){ 1283 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1284 j++; 1285 } 1286 } 1287 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz)); 1288 } else { 1289 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1290 } 1291 PetscFunctionReturn(0); 1292 } 1293 1294 /* -------------------------------------------------------------------*/ 1295 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1296 MatGetRow_SeqSBAIJ, 1297 MatRestoreRow_SeqSBAIJ, 1298 MatMult_SeqSBAIJ_N, 1299 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1300 MatMultTranspose_SeqSBAIJ, 1301 MatMultTransposeAdd_SeqSBAIJ, 1302 MatSolve_SeqSBAIJ_N, 1303 0, 1304 0, 1305 /*10*/ 0, 1306 0, 1307 MatCholeskyFactor_SeqSBAIJ, 1308 MatRelax_SeqSBAIJ, 1309 MatTranspose_SeqSBAIJ, 1310 /*15*/ MatGetInfo_SeqSBAIJ, 1311 MatEqual_SeqSBAIJ, 1312 MatGetDiagonal_SeqSBAIJ, 1313 MatDiagonalScale_SeqSBAIJ, 1314 MatNorm_SeqSBAIJ, 1315 /*20*/ 0, 1316 MatAssemblyEnd_SeqSBAIJ, 1317 0, 1318 MatSetOption_SeqSBAIJ, 1319 MatZeroEntries_SeqSBAIJ, 1320 /*25*/ MatZeroRows_SeqSBAIJ, 1321 0, 1322 0, 1323 MatCholeskyFactorSymbolic_SeqSBAIJ, 1324 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1325 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1326 0, 1327 MatICCFactorSymbolic_SeqSBAIJ, 1328 MatGetArray_SeqSBAIJ, 1329 MatRestoreArray_SeqSBAIJ, 1330 /*35*/ MatDuplicate_SeqSBAIJ, 1331 0, 1332 0, 1333 0, 1334 0, 1335 /*40*/ MatAXPY_SeqSBAIJ, 1336 MatGetSubMatrices_SeqSBAIJ, 1337 MatIncreaseOverlap_SeqSBAIJ, 1338 MatGetValues_SeqSBAIJ, 1339 0, 1340 /*45*/ MatPrintHelp_SeqSBAIJ, 1341 MatScale_SeqSBAIJ, 1342 0, 1343 0, 1344 0, 1345 /*50*/ MatGetBlockSize_SeqSBAIJ, 1346 MatGetRowIJ_SeqSBAIJ, 1347 MatRestoreRowIJ_SeqSBAIJ, 1348 0, 1349 0, 1350 /*55*/ 0, 1351 0, 1352 0, 1353 0, 1354 MatSetValuesBlocked_SeqSBAIJ, 1355 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1356 0, 1357 0, 1358 MatGetPetscMaps_Petsc, 1359 0, 1360 /*65*/ 0, 1361 0, 1362 0, 1363 0, 1364 0, 1365 /*70*/ MatGetRowMax_SeqSBAIJ, 1366 0, 1367 0, 1368 0, 1369 0, 1370 /*75*/ 0, 1371 0, 1372 0, 1373 0, 1374 0, 1375 /*80*/ 0, 1376 0, 1377 0, 1378 0, 1379 #if !defined(PETSC_USE_COMPLEX) 1380 MatGetInertia_SeqSBAIJ, 1381 #else 1382 0, 1383 #endif 1384 /*85*/ MatLoad_SeqSBAIJ 1385 }; 1386 1387 EXTERN_C_BEGIN 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1390 int MatStoreValues_SeqSBAIJ(Mat mat) 1391 { 1392 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1393 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1394 int ierr; 1395 1396 PetscFunctionBegin; 1397 if (aij->nonew != 1) { 1398 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1399 } 1400 1401 /* allocate space for values if not already there */ 1402 if (!aij->saved_values) { 1403 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1404 } 1405 1406 /* copy values over */ 1407 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 EXTERN_C_END 1411 1412 EXTERN_C_BEGIN 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1415 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1416 { 1417 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1418 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1419 1420 PetscFunctionBegin; 1421 if (aij->nonew != 1) { 1422 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1423 } 1424 if (!aij->saved_values) { 1425 SETERRQ(1,"Must call MatStoreValues(A);first"); 1426 } 1427 1428 /* copy values over */ 1429 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 EXTERN_C_END 1433 1434 EXTERN_C_BEGIN 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1437 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1438 { 1439 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1440 int i,len,ierr,mbs,bs2; 1441 PetscTruth flg; 1442 int s_nz; 1443 1444 PetscFunctionBegin; 1445 B->preallocated = PETSC_TRUE; 1446 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1447 mbs = B->m/bs; 1448 bs2 = bs*bs; 1449 1450 if (mbs*bs != B->m) { 1451 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1452 } 1453 1454 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1455 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1456 if (nnz) { 1457 for (i=0; i<mbs; i++) { 1458 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1459 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1460 } 1461 } 1462 1463 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1464 if (!flg) { 1465 switch (bs) { 1466 case 1: 1467 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1468 B->ops->solve = MatSolve_SeqSBAIJ_1; 1469 B->ops->solves = MatSolves_SeqSBAIJ_1; 1470 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1471 B->ops->mult = MatMult_SeqSBAIJ_1; 1472 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1473 break; 1474 case 2: 1475 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1476 B->ops->solve = MatSolve_SeqSBAIJ_2; 1477 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1478 B->ops->mult = MatMult_SeqSBAIJ_2; 1479 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1480 break; 1481 case 3: 1482 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1483 B->ops->solve = MatSolve_SeqSBAIJ_3; 1484 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1485 B->ops->mult = MatMult_SeqSBAIJ_3; 1486 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1487 break; 1488 case 4: 1489 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1490 B->ops->solve = MatSolve_SeqSBAIJ_4; 1491 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1492 B->ops->mult = MatMult_SeqSBAIJ_4; 1493 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1494 break; 1495 case 5: 1496 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1497 B->ops->solve = MatSolve_SeqSBAIJ_5; 1498 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1499 B->ops->mult = MatMult_SeqSBAIJ_5; 1500 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1501 break; 1502 case 6: 1503 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1504 B->ops->solve = MatSolve_SeqSBAIJ_6; 1505 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1506 B->ops->mult = MatMult_SeqSBAIJ_6; 1507 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1508 break; 1509 case 7: 1510 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1511 B->ops->solve = MatSolve_SeqSBAIJ_7; 1512 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1513 B->ops->mult = MatMult_SeqSBAIJ_7; 1514 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1515 break; 1516 } 1517 } 1518 1519 b->mbs = mbs; 1520 b->nbs = mbs; 1521 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1522 if (!nnz) { 1523 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1524 else if (nz <= 0) nz = 1; 1525 for (i=0; i<mbs; i++) { 1526 b->imax[i] = nz; 1527 } 1528 nz = nz*mbs; /* total nz */ 1529 } else { 1530 nz = 0; 1531 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1532 } 1533 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1534 s_nz = nz; 1535 1536 /* allocate the matrix space */ 1537 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1538 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1539 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1540 b->j = (int*)(b->a + s_nz*bs2); 1541 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1542 b->i = b->j + s_nz; 1543 b->singlemalloc = PETSC_TRUE; 1544 1545 /* pointer to beginning of each row */ 1546 b->i[0] = 0; 1547 for (i=1; i<mbs+1; i++) { 1548 b->i[i] = b->i[i-1] + b->imax[i-1]; 1549 } 1550 1551 /* b->ilen will count nonzeros in each block row so far. */ 1552 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1553 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1554 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1555 1556 b->bs = bs; 1557 b->bs2 = bs2; 1558 b->s_nz = 0; 1559 b->s_maxnz = s_nz*bs2; 1560 1561 b->inew = 0; 1562 b->jnew = 0; 1563 b->anew = 0; 1564 b->a2anew = 0; 1565 b->permute = PETSC_FALSE; 1566 PetscFunctionReturn(0); 1567 } 1568 EXTERN_C_END 1569 1570 /*MC 1571 MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1572 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1573 1574 Options Database Keys: 1575 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1576 1577 Level: beginner 1578 1579 .seealso: MatCreateSeqSBAIJ 1580 M*/ 1581 1582 EXTERN_C_BEGIN 1583 #undef __FUNCT__ 1584 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1585 int MatCreate_SeqSBAIJ(Mat B) 1586 { 1587 Mat_SeqSBAIJ *b; 1588 int ierr,size; 1589 1590 PetscFunctionBegin; 1591 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1592 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1593 B->m = B->M = PetscMax(B->m,B->M); 1594 B->n = B->N = PetscMax(B->n,B->N); 1595 1596 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1597 B->data = (void*)b; 1598 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1599 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1600 B->ops->destroy = MatDestroy_SeqSBAIJ; 1601 B->ops->view = MatView_SeqSBAIJ; 1602 B->factor = 0; 1603 B->lupivotthreshold = 1.0; 1604 B->mapping = 0; 1605 b->row = 0; 1606 b->icol = 0; 1607 b->reallocs = 0; 1608 b->saved_values = 0; 1609 1610 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1611 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1612 1613 b->sorted = PETSC_FALSE; 1614 b->roworiented = PETSC_TRUE; 1615 b->nonew = 0; 1616 b->diag = 0; 1617 b->solve_work = 0; 1618 b->mult_work = 0; 1619 B->spptr = 0; 1620 b->keepzeroedrows = PETSC_FALSE; 1621 b->xtoy = 0; 1622 b->XtoY = 0; 1623 1624 b->inew = 0; 1625 b->jnew = 0; 1626 b->anew = 0; 1627 b->a2anew = 0; 1628 b->permute = PETSC_FALSE; 1629 1630 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1631 "MatStoreValues_SeqSBAIJ", 1632 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1633 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1634 "MatRetrieveValues_SeqSBAIJ", 1635 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1637 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1638 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1639 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1640 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1641 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1642 PetscFunctionReturn(0); 1643 } 1644 EXTERN_C_END 1645 1646 #undef __FUNCT__ 1647 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1648 /*@C 1649 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1650 compressed row) format. For good matrix assembly performance the 1651 user should preallocate the matrix storage by setting the parameter nz 1652 (or the array nnz). By setting these parameters accurately, performance 1653 during matrix assembly can be increased by more than a factor of 50. 1654 1655 Collective on Mat 1656 1657 Input Parameters: 1658 + A - the symmetric matrix 1659 . bs - size of block 1660 . nz - number of block nonzeros per block row (same for all rows) 1661 - nnz - array containing the number of block nonzeros in the upper triangular plus 1662 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1663 1664 Options Database Keys: 1665 . -mat_no_unroll - uses code that does not unroll the loops in the 1666 block calculations (much slower) 1667 . -mat_block_size - size of the blocks to use 1668 1669 Level: intermediate 1670 1671 Notes: 1672 Specify the preallocated storage with either nz or nnz (not both). 1673 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1674 allocation. For additional details, see the users manual chapter on 1675 matrices. 1676 1677 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1678 @*/ 1679 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1680 int ierr,(*f)(Mat,int,int,const int[]); 1681 1682 PetscFunctionBegin; 1683 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1684 if (f) { 1685 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1686 } 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatCreateSeqSBAIJ" 1692 /*@C 1693 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1694 compressed row) format. For good matrix assembly performance the 1695 user should preallocate the matrix storage by setting the parameter nz 1696 (or the array nnz). By setting these parameters accurately, performance 1697 during matrix assembly can be increased by more than a factor of 50. 1698 1699 Collective on MPI_Comm 1700 1701 Input Parameters: 1702 + comm - MPI communicator, set to PETSC_COMM_SELF 1703 . bs - size of block 1704 . m - number of rows, or number of columns 1705 . nz - number of block nonzeros per block row (same for all rows) 1706 - nnz - array containing the number of block nonzeros in the upper triangular plus 1707 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1708 1709 Output Parameter: 1710 . A - the symmetric matrix 1711 1712 Options Database Keys: 1713 . -mat_no_unroll - uses code that does not unroll the loops in the 1714 block calculations (much slower) 1715 . -mat_block_size - size of the blocks to use 1716 1717 Level: intermediate 1718 1719 Notes: 1720 1721 Specify the preallocated storage with either nz or nnz (not both). 1722 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1723 allocation. For additional details, see the users manual chapter on 1724 matrices. 1725 1726 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1727 @*/ 1728 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1729 { 1730 int ierr; 1731 1732 PetscFunctionBegin; 1733 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1734 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1735 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1741 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1742 { 1743 Mat C; 1744 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1745 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1746 1747 PetscFunctionBegin; 1748 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1749 1750 *B = 0; 1751 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1752 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1753 c = (Mat_SeqSBAIJ*)C->data; 1754 1755 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1756 C->preallocated = PETSC_TRUE; 1757 C->factor = A->factor; 1758 c->row = 0; 1759 c->icol = 0; 1760 c->saved_values = 0; 1761 c->keepzeroedrows = a->keepzeroedrows; 1762 C->assembled = PETSC_TRUE; 1763 1764 c->bs = a->bs; 1765 c->bs2 = a->bs2; 1766 c->mbs = a->mbs; 1767 c->nbs = a->nbs; 1768 1769 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1770 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1771 for (i=0; i<mbs; i++) { 1772 c->imax[i] = a->imax[i]; 1773 c->ilen[i] = a->ilen[i]; 1774 } 1775 1776 /* allocate the matrix space */ 1777 c->singlemalloc = PETSC_TRUE; 1778 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1779 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1780 c->j = (int*)(c->a + nz*bs2); 1781 c->i = c->j + nz; 1782 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1783 if (mbs > 0) { 1784 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1785 if (cpvalues == MAT_COPY_VALUES) { 1786 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1787 } else { 1788 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1789 } 1790 } 1791 1792 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1793 c->sorted = a->sorted; 1794 c->roworiented = a->roworiented; 1795 c->nonew = a->nonew; 1796 1797 if (a->diag) { 1798 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1799 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1800 for (i=0; i<mbs; i++) { 1801 c->diag[i] = a->diag[i]; 1802 } 1803 } else c->diag = 0; 1804 c->s_nz = a->s_nz; 1805 c->s_maxnz = a->s_maxnz; 1806 c->solve_work = 0; 1807 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1808 c->mult_work = 0; 1809 *B = C; 1810 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1816 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1817 { 1818 Mat_SeqSBAIJ *a; 1819 Mat B; 1820 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1821 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1822 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1823 int *masked,nmask,tmp,bs2,ishift; 1824 PetscScalar *aa; 1825 MPI_Comm comm = ((PetscObject)viewer)->comm; 1826 1827 PetscFunctionBegin; 1828 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1829 bs2 = bs*bs; 1830 1831 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1832 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1833 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1834 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1835 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1836 M = header[1]; N = header[2]; nz = header[3]; 1837 1838 if (header[3] < 0) { 1839 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1840 } 1841 1842 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1843 1844 /* 1845 This code adds extra rows to make sure the number of rows is 1846 divisible by the blocksize 1847 */ 1848 mbs = M/bs; 1849 extra_rows = bs - M + bs*(mbs); 1850 if (extra_rows == bs) extra_rows = 0; 1851 else mbs++; 1852 if (extra_rows) { 1853 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1854 } 1855 1856 /* read in row lengths */ 1857 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1858 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1859 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1860 1861 /* read in column indices */ 1862 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1863 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1864 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1865 1866 /* loop over row lengths determining block row lengths */ 1867 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1868 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1869 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1870 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1871 masked = mask + mbs; 1872 rowcount = 0; nzcount = 0; 1873 for (i=0; i<mbs; i++) { 1874 nmask = 0; 1875 for (j=0; j<bs; j++) { 1876 kmax = rowlengths[rowcount]; 1877 for (k=0; k<kmax; k++) { 1878 tmp = jj[nzcount++]/bs; /* block col. index */ 1879 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1880 } 1881 rowcount++; 1882 } 1883 s_browlengths[i] += nmask; 1884 1885 /* zero out the mask elements we set */ 1886 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1887 } 1888 1889 /* create our matrix */ 1890 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1891 ierr = MatSetType(B,type);CHKERRQ(ierr); 1892 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1893 a = (Mat_SeqSBAIJ*)B->data; 1894 1895 /* set matrix "i" values */ 1896 a->i[0] = 0; 1897 for (i=1; i<= mbs; i++) { 1898 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1899 a->ilen[i-1] = s_browlengths[i-1]; 1900 } 1901 a->s_nz = a->i[mbs]; 1902 1903 /* read in nonzero values */ 1904 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1905 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1906 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1907 1908 /* set "a" and "j" values into matrix */ 1909 nzcount = 0; jcount = 0; 1910 for (i=0; i<mbs; i++) { 1911 nzcountb = nzcount; 1912 nmask = 0; 1913 for (j=0; j<bs; j++) { 1914 kmax = rowlengths[i*bs+j]; 1915 for (k=0; k<kmax; k++) { 1916 tmp = jj[nzcount++]/bs; /* block col. index */ 1917 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1918 } 1919 } 1920 /* sort the masked values */ 1921 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1922 1923 /* set "j" values into matrix */ 1924 maskcount = 1; 1925 for (j=0; j<nmask; j++) { 1926 a->j[jcount++] = masked[j]; 1927 mask[masked[j]] = maskcount++; 1928 } 1929 1930 /* set "a" values into matrix */ 1931 ishift = bs2*a->i[i]; 1932 for (j=0; j<bs; j++) { 1933 kmax = rowlengths[i*bs+j]; 1934 for (k=0; k<kmax; k++) { 1935 tmp = jj[nzcountb]/bs ; /* block col. index */ 1936 if (tmp >= i){ 1937 block = mask[tmp] - 1; 1938 point = jj[nzcountb] - bs*tmp; 1939 idx = ishift + bs2*block + j + bs*point; 1940 a->a[idx] = aa[nzcountb]; 1941 } 1942 nzcountb++; 1943 } 1944 } 1945 /* zero out the mask elements we set */ 1946 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1947 } 1948 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1949 1950 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1951 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1952 ierr = PetscFree(aa);CHKERRQ(ierr); 1953 ierr = PetscFree(jj);CHKERRQ(ierr); 1954 ierr = PetscFree(mask);CHKERRQ(ierr); 1955 1956 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1957 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1958 ierr = MatView_Private(B);CHKERRQ(ierr); 1959 *A = B; 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1965 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1966 { 1967 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1968 MatScalar *aa=a->a,*v,*v1; 1969 PetscScalar *x,*b,*t,sum,d; 1970 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1971 int nz,nz1,*vj,*vj1,i; 1972 1973 PetscFunctionBegin; 1974 its = its*lits; 1975 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1976 1977 if (bs > 1) 1978 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1979 1980 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1981 if (xx != bb) { 1982 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1983 } else { 1984 b = x; 1985 } 1986 1987 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1988 1989 if (flag & SOR_ZERO_INITIAL_GUESS) { 1990 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1991 for (i=0; i<m; i++) 1992 t[i] = b[i]; 1993 1994 for (i=0; i<m; i++){ 1995 d = *(aa + ai[i]); /* diag[i] */ 1996 v = aa + ai[i] + 1; 1997 vj = aj + ai[i] + 1; 1998 nz = ai[i+1] - ai[i] - 1; 1999 x[i] = omega*t[i]/d; 2000 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2001 PetscLogFlops(2*nz-1); 2002 } 2003 } 2004 2005 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2006 for (i=0; i<m; i++) 2007 t[i] = b[i]; 2008 2009 for (i=0; i<m-1; i++){ /* update rhs */ 2010 v = aa + ai[i] + 1; 2011 vj = aj + ai[i] + 1; 2012 nz = ai[i+1] - ai[i] - 1; 2013 while (nz--) t[*vj++] -= x[i]*(*v++); 2014 PetscLogFlops(2*nz-1); 2015 } 2016 for (i=m-1; i>=0; i--){ 2017 d = *(aa + ai[i]); 2018 v = aa + ai[i] + 1; 2019 vj = aj + ai[i] + 1; 2020 nz = ai[i+1] - ai[i] - 1; 2021 sum = t[i]; 2022 while (nz--) sum -= x[*vj++]*(*v++); 2023 PetscLogFlops(2*nz-1); 2024 x[i] = (1-omega)*x[i] + omega*sum/d; 2025 } 2026 } 2027 its--; 2028 } 2029 2030 while (its--) { 2031 /* 2032 forward sweep: 2033 for i=0,...,m-1: 2034 sum[i] = (b[i] - U(i,:)x )/d[i]; 2035 x[i] = (1-omega)x[i] + omega*sum[i]; 2036 b = b - x[i]*U^T(i,:); 2037 2038 */ 2039 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2040 for (i=0; i<m; i++) 2041 t[i] = b[i]; 2042 2043 for (i=0; i<m; i++){ 2044 d = *(aa + ai[i]); /* diag[i] */ 2045 v = aa + ai[i] + 1; v1=v; 2046 vj = aj + ai[i] + 1; vj1=vj; 2047 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2048 sum = t[i]; 2049 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2050 x[i] = (1-omega)*x[i] + omega*sum/d; 2051 while (nz--) t[*vj++] -= x[i]*(*v++); 2052 PetscLogFlops(4*nz-2); 2053 } 2054 } 2055 2056 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2057 /* 2058 backward sweep: 2059 b = b - x[i]*U^T(i,:), i=0,...,n-2 2060 for i=m-1,...,0: 2061 sum[i] = (b[i] - U(i,:)x )/d[i]; 2062 x[i] = (1-omega)x[i] + omega*sum[i]; 2063 */ 2064 for (i=0; i<m; i++) 2065 t[i] = b[i]; 2066 2067 for (i=0; i<m-1; i++){ /* update rhs */ 2068 v = aa + ai[i] + 1; 2069 vj = aj + ai[i] + 1; 2070 nz = ai[i+1] - ai[i] - 1; 2071 while (nz--) t[*vj++] -= x[i]*(*v++); 2072 PetscLogFlops(2*nz-1); 2073 } 2074 for (i=m-1; i>=0; i--){ 2075 d = *(aa + ai[i]); 2076 v = aa + ai[i] + 1; 2077 vj = aj + ai[i] + 1; 2078 nz = ai[i+1] - ai[i] - 1; 2079 sum = t[i]; 2080 while (nz--) sum -= x[*vj++]*(*v++); 2081 PetscLogFlops(2*nz-1); 2082 x[i] = (1-omega)*x[i] + omega*sum/d; 2083 } 2084 } 2085 } 2086 2087 ierr = PetscFree(t); CHKERRQ(ierr); 2088 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2089 if (bb != xx) { 2090 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2091 } 2092 2093 PetscFunctionReturn(0); 2094 } 2095 2096 2097 2098 2099 2100 2101