1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "../src/mat/impls/sbaij/seq/sbaij.h" 9 10 extern PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat,PetscTruth); 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A,PetscTruth *missing,PetscInt *dd) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 *missing = PETSC_FALSE; 28 for (i=0; i<a->mbs; i++) { 29 if (jj[diag[i]] != i) { 30 *missing = PETSC_TRUE; 31 if (dd) *dd = i; 32 break; 33 } 34 } 35 PetscFunctionReturn(0); 36 } 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 40 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 41 { 42 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 43 PetscErrorCode ierr; 44 PetscInt i; 45 46 PetscFunctionBegin; 47 if (!a->diag) { 48 ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 49 } 50 for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i]; 51 PetscFunctionReturn(0); 52 } 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 56 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 57 { 58 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 59 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs; 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 *nn = n; 64 if (!ia) PetscFunctionReturn(0); 65 if (!blockcompressed) { 66 /* malloc & create the natural set of indices */ 67 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 68 for (i=0; i<n+1; i++) { 69 for (j=0; j<bs; j++) { 70 *ia[i*bs+j] = a->i[i]*bs+j+oshift; 71 } 72 } 73 for (i=0; i<nz; i++) { 74 for (j=0; j<bs; j++) { 75 *ja[i*bs+j] = a->j[i]*bs+j+oshift; 76 } 77 } 78 } else { /* blockcompressed */ 79 if (oshift == 1) { 80 /* temporarily add 1 to i and j indices */ 81 for (i=0; i<nz; i++) a->j[i]++; 82 for (i=0; i<n+1; i++) a->i[i]++; 83 } 84 *ia = a->i; *ja = a->j; 85 } 86 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 92 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscInt i,n = a->mbs,nz = a->i[n]; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 if (!ia) PetscFunctionReturn(0); 100 101 if (!blockcompressed) { 102 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 103 } else if (oshift == 1) { /* blockcompressed */ 104 for (i=0; i<nz; i++) a->j[i]--; 105 for (i=0; i<n+1; i++) a->i[i]--; 106 } 107 108 PetscFunctionReturn(0); 109 } 110 111 #undef __FUNCT__ 112 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 113 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 114 { 115 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 116 PetscErrorCode ierr; 117 118 PetscFunctionBegin; 119 #if defined(PETSC_USE_LOG) 120 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap->N,a->nz); 121 #endif 122 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 123 if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);} 124 if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);} 125 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 126 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 127 if (a->inode.size) {ierr = PetscFree(a->inode.size);CHKERRQ(ierr);} 128 ierr = PetscFree(a->diag);CHKERRQ(ierr); 129 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 130 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 131 ierr = PetscFree(a->relax_work);CHKERRQ(ierr); 132 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 133 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 134 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 135 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 136 ierr = PetscFree(a->jshort);CHKERRQ(ierr); 137 ierr = PetscFree(a->inew);CHKERRQ(ierr); 138 ierr = PetscFree(a);CHKERRQ(ierr); 139 140 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 141 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 142 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 143 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 144 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 145 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 146 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 150 #undef __FUNCT__ 151 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 152 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg) 153 { 154 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 155 PetscErrorCode ierr; 156 157 PetscFunctionBegin; 158 switch (op) { 159 case MAT_ROW_ORIENTED: 160 a->roworiented = flg; 161 break; 162 case MAT_KEEP_NONZERO_PATTERN: 163 a->keepnonzeropattern = flg; 164 break; 165 case MAT_NEW_NONZERO_LOCATIONS: 166 a->nonew = (flg ? 0 : 1); 167 break; 168 case MAT_NEW_NONZERO_LOCATION_ERR: 169 a->nonew = (flg ? -1 : 0); 170 break; 171 case MAT_NEW_NONZERO_ALLOCATION_ERR: 172 a->nonew = (flg ? -2 : 0); 173 break; 174 case MAT_UNUSED_NONZERO_LOCATION_ERR: 175 a->nounused = (flg ? -1 : 0); 176 break; 177 case MAT_NEW_DIAGONALS: 178 case MAT_IGNORE_OFF_PROC_ENTRIES: 179 case MAT_USE_HASH_TABLE: 180 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 181 break; 182 case MAT_HERMITIAN: 183 if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 184 case MAT_SYMMETRIC: 185 case MAT_STRUCTURALLY_SYMMETRIC: 186 case MAT_SYMMETRY_ETERNAL: 187 if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 188 ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr); 189 break; 190 case MAT_IGNORE_LOWER_TRIANGULAR: 191 a->ignore_ltriangular = flg; 192 break; 193 case MAT_ERROR_LOWER_TRIANGULAR: 194 a->ignore_ltriangular = flg; 195 break; 196 case MAT_GETROW_UPPERTRIANGULAR: 197 a->getrow_utriangular = flg; 198 break; 199 default: 200 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 201 } 202 PetscFunctionReturn(0); 203 } 204 205 #undef __FUNCT__ 206 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 207 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 208 { 209 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 210 PetscErrorCode ierr; 211 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 212 MatScalar *aa,*aa_i; 213 PetscScalar *v_i; 214 215 PetscFunctionBegin; 216 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()"); 217 /* Get the upper triangular part of the row */ 218 bs = A->rmap->bs; 219 ai = a->i; 220 aj = a->j; 221 aa = a->a; 222 bs2 = a->bs2; 223 224 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 225 226 bn = row/bs; /* Block number */ 227 bp = row % bs; /* Block position */ 228 M = ai[bn+1] - ai[bn]; 229 *ncols = bs*M; 230 231 if (v) { 232 *v = 0; 233 if (*ncols) { 234 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 235 for (i=0; i<M; i++) { /* for each block in the block row */ 236 v_i = *v + i*bs; 237 aa_i = aa + bs2*(ai[bn] + i); 238 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 239 } 240 } 241 } 242 243 if (cols) { 244 *cols = 0; 245 if (*ncols) { 246 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 247 for (i=0; i<M; i++) { /* for each block in the block row */ 248 cols_i = *cols + i*bs; 249 itmp = bs*aj[ai[bn] + i]; 250 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 251 } 252 } 253 } 254 255 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 256 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 257 #ifdef column_search 258 v_i = *v + M*bs; 259 cols_i = *cols + M*bs; 260 for (i=0; i<bn; i++){ /* for each block row */ 261 M = ai[i+1] - ai[i]; 262 for (j=0; j<M; j++){ 263 itmp = aj[ai[i] + j]; /* block column value */ 264 if (itmp == bn){ 265 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 266 for (k=0; k<bs; k++) { 267 *cols_i++ = i*bs+k; 268 *v_i++ = aa_i[k]; 269 } 270 *ncols += bs; 271 break; 272 } 273 } 274 } 275 #endif 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 281 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 282 { 283 PetscErrorCode ierr; 284 285 PetscFunctionBegin; 286 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 287 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 293 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 294 { 295 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 296 297 PetscFunctionBegin; 298 a->getrow_utriangular = PETSC_TRUE; 299 PetscFunctionReturn(0); 300 } 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 303 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 304 { 305 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 306 307 PetscFunctionBegin; 308 a->getrow_utriangular = PETSC_FALSE; 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 314 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,MatReuse reuse,Mat *B) 315 { 316 PetscErrorCode ierr; 317 PetscFunctionBegin; 318 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 319 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 326 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 327 { 328 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 329 PetscErrorCode ierr; 330 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 331 const char *name; 332 PetscViewerFormat format; 333 334 PetscFunctionBegin; 335 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 336 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 337 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 338 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 339 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 340 Mat aij; 341 342 if (A->factor && bs>1){ 343 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 347 ierr = MatView(aij,viewer);CHKERRQ(ierr); 348 ierr = MatDestroy(aij);CHKERRQ(ierr); 349 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 350 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 351 for (i=0; i<a->mbs; i++) { 352 for (j=0; j<bs; j++) { 353 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 354 for (k=a->i[i]; k<a->i[i+1]; k++) { 355 for (l=0; l<bs; l++) { 356 #if defined(PETSC_USE_COMPLEX) 357 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 358 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 359 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 360 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 361 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 362 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 363 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 364 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 365 } 366 #else 367 if (a->a[bs2*k + l*bs + j] != 0.0) { 368 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 369 } 370 #endif 371 } 372 } 373 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 374 } 375 } 376 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 377 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 378 PetscFunctionReturn(0); 379 } else { 380 if (A->factor && bs>1){ 381 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr); 382 } 383 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 384 for (i=0; i<a->mbs; i++) { 385 for (j=0; j<bs; j++) { 386 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 387 for (k=a->i[i]; k<a->i[i+1]; k++) { 388 for (l=0; l<bs; l++) { 389 #if defined(PETSC_USE_COMPLEX) 390 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 391 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 392 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 393 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 394 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 395 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 396 } else { 397 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 398 } 399 #else 400 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 401 #endif 402 } 403 } 404 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 405 } 406 } 407 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 408 } 409 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 415 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 416 { 417 Mat A = (Mat) Aa; 418 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 419 PetscErrorCode ierr; 420 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 421 PetscMPIInt rank; 422 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 423 MatScalar *aa; 424 MPI_Comm comm; 425 PetscViewer viewer; 426 427 PetscFunctionBegin; 428 /* 429 This is nasty. If this is called from an originally parallel matrix 430 then all processes call this,but only the first has the matrix so the 431 rest should return immediately. 432 */ 433 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 434 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 435 if (rank) PetscFunctionReturn(0); 436 437 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 438 439 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 440 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 441 442 /* loop over matrix elements drawing boxes */ 443 color = PETSC_DRAW_BLUE; 444 for (i=0,row=0; i<mbs; i++,row+=bs) { 445 for (j=a->i[i]; j<a->i[i+1]; j++) { 446 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 447 x_l = a->j[j]*bs; x_r = x_l + 1.0; 448 aa = a->a + j*bs2; 449 for (k=0; k<bs; k++) { 450 for (l=0; l<bs; l++) { 451 if (PetscRealPart(*aa++) >= 0.) continue; 452 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 453 } 454 } 455 } 456 } 457 color = PETSC_DRAW_CYAN; 458 for (i=0,row=0; i<mbs; i++,row+=bs) { 459 for (j=a->i[i]; j<a->i[i+1]; j++) { 460 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 461 x_l = a->j[j]*bs; x_r = x_l + 1.0; 462 aa = a->a + j*bs2; 463 for (k=0; k<bs; k++) { 464 for (l=0; l<bs; l++) { 465 if (PetscRealPart(*aa++) != 0.) continue; 466 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 467 } 468 } 469 } 470 } 471 472 color = PETSC_DRAW_RED; 473 for (i=0,row=0; i<mbs; i++,row+=bs) { 474 for (j=a->i[i]; j<a->i[i+1]; j++) { 475 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 476 x_l = a->j[j]*bs; x_r = x_l + 1.0; 477 aa = a->a + j*bs2; 478 for (k=0; k<bs; k++) { 479 for (l=0; l<bs; l++) { 480 if (PetscRealPart(*aa++) <= 0.) continue; 481 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 482 } 483 } 484 } 485 } 486 PetscFunctionReturn(0); 487 } 488 489 #undef __FUNCT__ 490 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 491 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 492 { 493 PetscErrorCode ierr; 494 PetscReal xl,yl,xr,yr,w,h; 495 PetscDraw draw; 496 PetscTruth isnull; 497 498 PetscFunctionBegin; 499 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 500 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 501 502 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 503 xr = A->rmap->N; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 504 xr += w; yr += h; xl = -w; yl = -h; 505 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 506 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 507 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNCT__ 512 #define __FUNCT__ "MatView_SeqSBAIJ" 513 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 514 { 515 PetscErrorCode ierr; 516 PetscTruth iascii,isdraw; 517 518 PetscFunctionBegin; 519 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 520 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 521 if (iascii){ 522 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 523 } else if (isdraw) { 524 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 525 } else { 526 Mat B; 527 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 528 ierr = MatView(B,viewer);CHKERRQ(ierr); 529 ierr = MatDestroy(B);CHKERRQ(ierr); 530 } 531 PetscFunctionReturn(0); 532 } 533 534 535 #undef __FUNCT__ 536 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 537 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 538 { 539 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 540 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 541 PetscInt *ai = a->i,*ailen = a->ilen; 542 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 543 MatScalar *ap,*aa = a->a; 544 545 PetscFunctionBegin; 546 for (k=0; k<m; k++) { /* loop over rows */ 547 row = im[k]; brow = row/bs; 548 if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */ 549 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 550 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 551 nrow = ailen[brow]; 552 for (l=0; l<n; l++) { /* loop over columns */ 553 if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */ 554 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 555 col = in[l] ; 556 bcol = col/bs; 557 cidx = col%bs; 558 ridx = row%bs; 559 high = nrow; 560 low = 0; /* assume unsorted */ 561 while (high-low > 5) { 562 t = (low+high)/2; 563 if (rp[t] > bcol) high = t; 564 else low = t; 565 } 566 for (i=low; i<high; i++) { 567 if (rp[i] > bcol) break; 568 if (rp[i] == bcol) { 569 *v++ = ap[bs2*i+bs*cidx+ridx]; 570 goto finished; 571 } 572 } 573 *v++ = 0.0; 574 finished:; 575 } 576 } 577 PetscFunctionReturn(0); 578 } 579 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 583 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 584 { 585 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 586 PetscErrorCode ierr; 587 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 588 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 589 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 590 PetscTruth roworiented=a->roworiented; 591 const PetscScalar *value = v; 592 MatScalar *ap,*aa = a->a,*bap; 593 594 PetscFunctionBegin; 595 if (roworiented) { 596 stepval = (n-1)*bs; 597 } else { 598 stepval = (m-1)*bs; 599 } 600 for (k=0; k<m; k++) { /* loop over added rows */ 601 row = im[k]; 602 if (row < 0) continue; 603 #if defined(PETSC_USE_DEBUG) 604 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 605 #endif 606 rp = aj + ai[row]; 607 ap = aa + bs2*ai[row]; 608 rmax = imax[row]; 609 nrow = ailen[row]; 610 low = 0; 611 high = nrow; 612 for (l=0; l<n; l++) { /* loop over added columns */ 613 if (in[l] < 0) continue; 614 col = in[l]; 615 #if defined(PETSC_USE_DEBUG) 616 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 617 #endif 618 if (col < row) continue; /* ignore lower triangular block */ 619 if (roworiented) { 620 value = v + k*(stepval+bs)*bs + l*bs; 621 } else { 622 value = v + l*(stepval+bs)*bs + k*bs; 623 } 624 if (col <= lastcol) low = 0; else high = nrow; 625 lastcol = col; 626 while (high-low > 7) { 627 t = (low+high)/2; 628 if (rp[t] > col) high = t; 629 else low = t; 630 } 631 for (i=low; i<high; i++) { 632 if (rp[i] > col) break; 633 if (rp[i] == col) { 634 bap = ap + bs2*i; 635 if (roworiented) { 636 if (is == ADD_VALUES) { 637 for (ii=0; ii<bs; ii++,value+=stepval) { 638 for (jj=ii; jj<bs2; jj+=bs) { 639 bap[jj] += *value++; 640 } 641 } 642 } else { 643 for (ii=0; ii<bs; ii++,value+=stepval) { 644 for (jj=ii; jj<bs2; jj+=bs) { 645 bap[jj] = *value++; 646 } 647 } 648 } 649 } else { 650 if (is == ADD_VALUES) { 651 for (ii=0; ii<bs; ii++,value+=stepval) { 652 for (jj=0; jj<bs; jj++) { 653 *bap++ += *value++; 654 } 655 } 656 } else { 657 for (ii=0; ii<bs; ii++,value+=stepval) { 658 for (jj=0; jj<bs; jj++) { 659 *bap++ = *value++; 660 } 661 } 662 } 663 } 664 goto noinsert2; 665 } 666 } 667 if (nonew == 1) goto noinsert2; 668 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 669 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 670 N = nrow++ - 1; high++; 671 /* shift up all the later entries in this row */ 672 for (ii=N; ii>=i; ii--) { 673 rp[ii+1] = rp[ii]; 674 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 675 } 676 if (N >= i) { 677 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 678 } 679 rp[i] = col; 680 bap = ap + bs2*i; 681 if (roworiented) { 682 for (ii=0; ii<bs; ii++,value+=stepval) { 683 for (jj=ii; jj<bs2; jj+=bs) { 684 bap[jj] = *value++; 685 } 686 } 687 } else { 688 for (ii=0; ii<bs; ii++,value+=stepval) { 689 for (jj=0; jj<bs; jj++) { 690 *bap++ = *value++; 691 } 692 } 693 } 694 noinsert2:; 695 low = i; 696 } 697 ailen[row] = nrow; 698 } 699 PetscFunctionReturn(0); 700 } 701 702 /* 703 This is not yet used 704 */ 705 #undef __FUNCT__ 706 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ_Inode" 707 PetscErrorCode MatAssemblyEnd_SeqSBAIJ_Inode(Mat A) 708 { 709 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 710 PetscErrorCode ierr; 711 const PetscInt *ai = a->i, *aj = a->j,*cols; 712 PetscInt i = 0,j,blk_size,m = A->rmap->n,node_count = 0,nzx,nzy,*ns,row,nz,cnt,cnt2,*counts; 713 PetscTruth flag; 714 715 PetscFunctionBegin; 716 ierr = PetscMalloc(m*sizeof(PetscInt),&ns);CHKERRQ(ierr); 717 while (i < m){ 718 nzx = ai[i+1] - ai[i]; /* Number of nonzeros */ 719 /* Limits the number of elements in a node to 'a->inode.limit' */ 720 for (j=i+1,blk_size=1; j<m && blk_size <a->inode.limit; ++j,++blk_size) { 721 nzy = ai[j+1] - ai[j]; 722 if (nzy != (nzx - j + i)) break; 723 ierr = PetscMemcmp(aj + ai[i] + j - i,aj + ai[j],nzy*sizeof(PetscInt),&flag);CHKERRQ(ierr); 724 if (!flag) break; 725 } 726 ns[node_count++] = blk_size; 727 i = j; 728 } 729 if (!a->inode.size && m && node_count > .9*m) { 730 ierr = PetscFree(ns);CHKERRQ(ierr); 731 ierr = PetscInfo2(A,"Found %D nodes out of %D rows. Not using Inode routines\n",node_count,m);CHKERRQ(ierr); 732 } else { 733 a->inode.node_count = node_count; 734 ierr = PetscMalloc(node_count*sizeof(PetscInt),&a->inode.size);CHKERRQ(ierr); 735 ierr = PetscMemcpy(a->inode.size,ns,node_count*sizeof(PetscInt)); 736 ierr = PetscFree(ns);CHKERRQ(ierr); 737 ierr = PetscInfo3(A,"Found %D nodes of %D. Limit used: %D. Using Inode routines\n",node_count,m,a->inode.limit);CHKERRQ(ierr); 738 739 /* count collections of adjacent columns in each inode */ 740 row = 0; 741 cnt = 0; 742 for (i=0; i<node_count; i++) { 743 cols = aj + ai[row] + a->inode.size[i]; 744 nz = ai[row+1] - ai[row] - a->inode.size[i]; 745 for (j=1; j<nz; j++) { 746 if (cols[j] != cols[j-1]+1) { 747 cnt++; 748 } 749 } 750 cnt++; 751 row += a->inode.size[i]; 752 } 753 ierr = PetscMalloc(2*cnt*sizeof(PetscInt),&counts);CHKERRQ(ierr); 754 cnt = 0; 755 row = 0; 756 for (i=0; i<node_count; i++) { 757 cols = aj + ai[row] + a->inode.size[i]; 758 CHKMEMQ; 759 counts[2*cnt] = cols[0]; 760 CHKMEMQ; 761 nz = ai[row+1] - ai[row] - a->inode.size[i]; 762 cnt2 = 1; 763 for (j=1; j<nz; j++) { 764 if (cols[j] != cols[j-1]+1) { 765 CHKMEMQ; 766 counts[2*(cnt++)+1] = cnt2; 767 counts[2*cnt] = cols[j]; 768 CHKMEMQ; 769 cnt2 = 1; 770 } else cnt2++; 771 } 772 CHKMEMQ; 773 counts[2*(cnt++)+1] = cnt2; 774 CHKMEMQ; 775 row += a->inode.size[i]; 776 } 777 ierr = PetscIntView(2*cnt,counts,0); 778 } 779 780 781 PetscFunctionReturn(0); 782 } 783 784 #undef __FUNCT__ 785 #define __FUNCT__ "MatRelax_SeqSBAIJ_1_ushort" 786 PetscErrorCode MatRelax_SeqSBAIJ_1_ushort(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 787 { 788 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 789 const MatScalar *aa=a->a,*v,*v1,*aidiag; 790 PetscScalar *x,*b,*t,sum; 791 MatScalar tmp; 792 PetscErrorCode ierr; 793 PetscInt m=a->mbs,bs=A->rmap->bs,j; 794 const PetscInt *ai=a->i; 795 const unsigned short *aj=a->jshort,*vj,*vj1; 796 PetscInt nz,nz1,i; 797 798 PetscFunctionBegin; 799 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 800 801 its = its*lits; 802 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 803 804 if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 805 806 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 807 if (xx != bb) { 808 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 809 } else { 810 b = x; 811 } 812 813 if (!a->idiagvalid) { 814 if (!a->idiag) { 815 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 816 } 817 for (i=0; i<a->mbs; i++) a->idiag[i] = 1.0/a->a[a->i[i]]; 818 a->idiagvalid = PETSC_TRUE; 819 } 820 821 if (!a->relax_work) { 822 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 823 } 824 t = a->relax_work; 825 826 aidiag = a->idiag; 827 828 if (flag == SOR_APPLY_UPPER) { 829 /* apply (U + D/omega) to the vector */ 830 PetscScalar d; 831 for (i=0; i<m; i++) { 832 d = fshift + aa[ai[i]]; 833 nz = ai[i+1] - ai[i] - 1; 834 vj = aj + ai[i] + 1; 835 v = aa + ai[i] + 1; 836 sum = b[i]*d/omega; 837 PetscSparseDensePlusDot(sum,b,v,vj,nz); 838 x[i] = sum; 839 } 840 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 841 } 842 843 if (flag & SOR_ZERO_INITIAL_GUESS) { 844 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 845 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 846 847 v = aa + 1; 848 vj = aj + 1; 849 for (i=0; i<m; i++){ 850 nz = ai[i+1] - ai[i] - 1; 851 tmp = - (x[i] = omega*t[i]*aidiag[i]); 852 for (j=0; j<nz; j++) { 853 t[vj[j]] += tmp*v[j]; 854 } 855 v += nz + 1; 856 vj += nz + 1; 857 } 858 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 859 } 860 861 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 862 int nz2; 863 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 864 t = b; 865 } 866 867 v = aa + ai[m-1] + 1; 868 vj = aj + ai[m-1] + 1; 869 nz = 0; 870 for (i=m-1; i>=0; i--){ 871 sum = 0.0; 872 nz2 = ai[i] - ai[i-1] - 1; 873 PETSC_Prefetch(v-nz2-1,0,1); 874 PETSC_Prefetch(vj-nz2-1,0,1); 875 PetscSparseDensePlusDot(sum,x,v,vj,nz); 876 sum = t[i] - sum; 877 if (t == b) { 878 x[i] = omega*sum*aidiag[i]; 879 } else { 880 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 881 } 882 nz = nz2; 883 v -= nz + 1; 884 vj -= nz + 1; 885 } 886 t = a->relax_work; 887 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 888 } 889 its--; 890 } 891 892 while (its--) { 893 /* 894 forward sweep: 895 for i=0,...,m-1: 896 sum[i] = (b[i] - U(i,:)x )/d[i]; 897 x[i] = (1-omega)x[i] + omega*sum[i]; 898 b = b - x[i]*U^T(i,:); 899 900 */ 901 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 902 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 903 904 for (i=0; i<m; i++){ 905 v = aa + ai[i] + 1; v1=v; 906 vj = aj + ai[i] + 1; vj1=vj; 907 nz = ai[i+1] - ai[i] - 1; nz1=nz; 908 sum = t[i]; 909 ierr = PetscLogFlops(4.0*nz-2);CHKERRQ(ierr); 910 while (nz1--) sum -= (*v1++)*x[*vj1++]; 911 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 912 while (nz--) t[*vj++] -= x[i]*(*v++); 913 } 914 } 915 916 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 917 /* 918 backward sweep: 919 b = b - x[i]*U^T(i,:), i=0,...,n-2 920 for i=m-1,...,0: 921 sum[i] = (b[i] - U(i,:)x )/d[i]; 922 x[i] = (1-omega)x[i] + omega*sum[i]; 923 */ 924 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 925 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 926 927 for (i=0; i<m-1; i++){ /* update rhs */ 928 v = aa + ai[i] + 1; 929 vj = aj + ai[i] + 1; 930 nz = ai[i+1] - ai[i] - 1; 931 ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr); 932 while (nz--) t[*vj++] -= x[i]*(*v++); 933 } 934 for (i=m-1; i>=0; i--){ 935 v = aa + ai[i] + 1; 936 vj = aj + ai[i] + 1; 937 nz = ai[i+1] - ai[i] - 1; 938 ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr); 939 sum = t[i]; 940 while (nz--) sum -= x[*vj++]*(*v++); 941 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 942 } 943 } 944 } 945 946 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 947 if (bb != xx) { 948 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 949 } 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatMult_SeqSBAIJ_1_ushort" 955 PetscErrorCode MatMult_SeqSBAIJ_1_ushort(Mat A,Vec xx,Vec zz) 956 { 957 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 958 const PetscScalar *x; 959 PetscScalar *z,x1,sum; 960 const MatScalar *v; 961 PetscErrorCode ierr; 962 PetscInt mbs=a->mbs,i,j,nz; 963 const unsigned short *ib=a->jshort; 964 const PetscInt *ai=a->i; 965 966 PetscFunctionBegin; 967 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 968 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 969 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 970 971 v = a->a; 972 for (i=0; i<mbs; i++) { 973 nz = ai[i+1] - ai[i]; /* length of i_th row of A */ 974 x1 = x[i]; 975 sum = v[0]*x1; /* diagonal term */ 976 for (j=1; j<nz; j++) { 977 z[ib[j]] += v[j] * x1; /* (strict lower triangular part of A)*x */ 978 sum += v[j] * x[ib[j]]; /* (strict upper triangular part of A)*x */ 979 } 980 z[i] += sum; 981 v += nz; 982 ib += nz; 983 } 984 985 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 986 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 987 ierr = PetscLogFlops(2.0*(2.0*a->nz - mbs) - mbs);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 993 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 994 { 995 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 996 PetscErrorCode ierr; 997 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 998 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 999 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1000 MatScalar *aa = a->a,*ap; 1001 1002 PetscFunctionBegin; 1003 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1004 1005 if (m) rmax = ailen[0]; 1006 for (i=1; i<mbs; i++) { 1007 /* move each row back by the amount of empty slots (fshift) before it*/ 1008 fshift += imax[i-1] - ailen[i-1]; 1009 rmax = PetscMax(rmax,ailen[i]); 1010 if (fshift) { 1011 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1012 N = ailen[i]; 1013 for (j=0; j<N; j++) { 1014 ip[j-fshift] = ip[j]; 1015 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1016 } 1017 } 1018 ai[i] = ai[i-1] + ailen[i-1]; 1019 } 1020 if (mbs) { 1021 fshift += imax[mbs-1] - ailen[mbs-1]; 1022 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1023 } 1024 /* reset ilen and imax for each row */ 1025 for (i=0; i<mbs; i++) { 1026 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1027 } 1028 a->nz = ai[mbs]; 1029 1030 /* diagonals may have moved, reset it */ 1031 if (a->diag) { 1032 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1033 } 1034 if (fshift && a->nounused == -1) { 1035 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1036 } 1037 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap->N,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1038 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1039 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1040 a->reallocs = 0; 1041 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1042 a->idiagvalid = PETSC_FALSE; 1043 1044 if (A->cmap->n < 65536 && A->cmap->bs == 1) { 1045 if (!a->jshort) { 1046 ierr = PetscMalloc(a->i[A->rmap->n]*sizeof(unsigned short),&a->jshort);CHKERRQ(ierr); 1047 for (i=0; i<a->i[A->rmap->n]; i++) a->jshort[i] = a->j[i]; 1048 A->ops->mult = MatMult_SeqSBAIJ_1_ushort; 1049 A->ops->relax = MatRelax_SeqSBAIJ_1_ushort; 1050 } 1051 } 1052 PetscFunctionReturn(0); 1053 } 1054 1055 /* 1056 This function returns an array of flags which indicate the locations of contiguous 1057 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1058 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1059 Assume: sizes should be long enough to hold all the values. 1060 */ 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 1063 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1064 { 1065 PetscInt i,j,k,row; 1066 PetscTruth flg; 1067 1068 PetscFunctionBegin; 1069 for (i=0,j=0; i<n; j++) { 1070 row = idx[i]; 1071 if (row%bs!=0) { /* Not the begining of a block */ 1072 sizes[j] = 1; 1073 i++; 1074 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 1075 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1076 i++; 1077 } else { /* Begining of the block, so check if the complete block exists */ 1078 flg = PETSC_TRUE; 1079 for (k=1; k<bs; k++) { 1080 if (row+k != idx[i+k]) { /* break in the block */ 1081 flg = PETSC_FALSE; 1082 break; 1083 } 1084 } 1085 if (flg) { /* No break in the bs */ 1086 sizes[j] = bs; 1087 i+= bs; 1088 } else { 1089 sizes[j] = 1; 1090 i++; 1091 } 1092 } 1093 } 1094 *bs_max = j; 1095 PetscFunctionReturn(0); 1096 } 1097 1098 1099 /* Only add/insert a(i,j) with i<=j (blocks). 1100 Any a(i,j) with i>j input by user is ingored. 1101 */ 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 1105 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1106 { 1107 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1110 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 1111 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 1112 PetscInt ridx,cidx,bs2=a->bs2; 1113 MatScalar *ap,value,*aa=a->a,*bap; 1114 1115 PetscFunctionBegin; 1116 for (k=0; k<m; k++) { /* loop over added rows */ 1117 row = im[k]; /* row number */ 1118 brow = row/bs; /* block row number */ 1119 if (row < 0) continue; 1120 #if defined(PETSC_USE_DEBUG) 1121 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 1122 #endif 1123 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 1124 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 1125 rmax = imax[brow]; /* maximum space allocated for this row */ 1126 nrow = ailen[brow]; /* actual length of this row */ 1127 low = 0; 1128 1129 for (l=0; l<n; l++) { /* loop over added columns */ 1130 if (in[l] < 0) continue; 1131 #if defined(PETSC_USE_DEBUG) 1132 if (in[l] >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap->N-1); 1133 #endif 1134 col = in[l]; 1135 bcol = col/bs; /* block col number */ 1136 1137 if (brow > bcol) { 1138 if (a->ignore_ltriangular){ 1139 continue; /* ignore lower triangular values */ 1140 } else { 1141 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 1142 } 1143 } 1144 1145 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 1146 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 1147 /* element value a(k,l) */ 1148 if (roworiented) { 1149 value = v[l + k*n]; 1150 } else { 1151 value = v[k + l*m]; 1152 } 1153 1154 /* move pointer bap to a(k,l) quickly and add/insert value */ 1155 if (col <= lastcol) low = 0; high = nrow; 1156 lastcol = col; 1157 while (high-low > 7) { 1158 t = (low+high)/2; 1159 if (rp[t] > bcol) high = t; 1160 else low = t; 1161 } 1162 for (i=low; i<high; i++) { 1163 if (rp[i] > bcol) break; 1164 if (rp[i] == bcol) { 1165 bap = ap + bs2*i + bs*cidx + ridx; 1166 if (is == ADD_VALUES) *bap += value; 1167 else *bap = value; 1168 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 1169 if (brow == bcol && ridx < cidx){ 1170 bap = ap + bs2*i + bs*ridx + cidx; 1171 if (is == ADD_VALUES) *bap += value; 1172 else *bap = value; 1173 } 1174 goto noinsert1; 1175 } 1176 } 1177 1178 if (nonew == 1) goto noinsert1; 1179 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1180 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1181 1182 N = nrow++ - 1; high++; 1183 /* shift up all the later entries in this row */ 1184 for (ii=N; ii>=i; ii--) { 1185 rp[ii+1] = rp[ii]; 1186 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1187 } 1188 if (N>=i) { 1189 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1190 } 1191 rp[i] = bcol; 1192 ap[bs2*i + bs*cidx + ridx] = value; 1193 noinsert1:; 1194 low = i; 1195 } 1196 } /* end of loop over added columns */ 1197 ailen[brow] = nrow; 1198 } /* end of loop over added rows */ 1199 PetscFunctionReturn(0); 1200 } 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1204 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,const MatFactorInfo *info) 1205 { 1206 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1207 Mat outA; 1208 PetscErrorCode ierr; 1209 PetscTruth row_identity; 1210 1211 PetscFunctionBegin; 1212 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc"); 1213 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1214 if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 1215 if (inA->rmap->bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap->bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */ 1216 1217 outA = inA; 1218 inA->factor = MAT_FACTOR_ICC; 1219 1220 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1221 ierr = MatSeqSBAIJSetNumericFactorization(inA,row_identity);CHKERRQ(ierr); 1222 1223 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1224 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 1225 a->row = row; 1226 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1227 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 1228 a->col = row; 1229 1230 /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */ 1231 if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);} 1232 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1233 1234 if (!a->solve_work) { 1235 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1236 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1237 } 1238 1239 ierr = MatCholeskyFactorNumeric(outA,inA,info);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 EXTERN_C_BEGIN 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1246 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1247 { 1248 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1249 PetscInt i,nz,n; 1250 1251 PetscFunctionBegin; 1252 nz = baij->maxnz; 1253 n = mat->cmap->n; 1254 for (i=0; i<nz; i++) { 1255 baij->j[i] = indices[i]; 1256 } 1257 baij->nz = nz; 1258 for (i=0; i<n; i++) { 1259 baij->ilen[i] = baij->imax[i]; 1260 } 1261 PetscFunctionReturn(0); 1262 } 1263 EXTERN_C_END 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1267 /*@ 1268 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1269 in the matrix. 1270 1271 Input Parameters: 1272 + mat - the SeqSBAIJ matrix 1273 - indices - the column indices 1274 1275 Level: advanced 1276 1277 Notes: 1278 This can be called if you have precomputed the nonzero structure of the 1279 matrix and want to provide it to the matrix object to improve the performance 1280 of the MatSetValues() operation. 1281 1282 You MUST have set the correct numbers of nonzeros per row in the call to 1283 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1284 1285 MUST be called before any calls to MatSetValues() 1286 1287 .seealso: MatCreateSeqSBAIJ 1288 @*/ 1289 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1290 { 1291 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1292 1293 PetscFunctionBegin; 1294 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1295 PetscValidPointer(indices,2); 1296 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1297 if (f) { 1298 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1299 } else { 1300 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1301 } 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1307 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1308 { 1309 PetscErrorCode ierr; 1310 1311 PetscFunctionBegin; 1312 /* If the two matrices have the same copy implementation, use fast copy. */ 1313 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1314 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1315 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1316 1317 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 1318 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1319 } 1320 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 1321 } else { 1322 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1323 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1324 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1325 } 1326 PetscFunctionReturn(0); 1327 } 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1331 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1332 { 1333 PetscErrorCode ierr; 1334 1335 PetscFunctionBegin; 1336 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1342 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1343 { 1344 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1345 PetscFunctionBegin; 1346 *array = a->a; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1352 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1353 { 1354 PetscFunctionBegin; 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #include "petscblaslapack.h" 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1361 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1362 { 1363 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1364 PetscErrorCode ierr; 1365 PetscInt i,bs=Y->rmap->bs,bs2,j; 1366 PetscBLASInt one = 1,bnz = PetscBLASIntCast(x->nz); 1367 1368 PetscFunctionBegin; 1369 if (str == SAME_NONZERO_PATTERN) { 1370 PetscScalar alpha = a; 1371 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1372 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1373 if (y->xtoy && y->XtoY != X) { 1374 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1375 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1376 } 1377 if (!y->xtoy) { /* get xtoy */ 1378 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1379 y->XtoY = X; 1380 } 1381 bs2 = bs*bs; 1382 for (i=0; i<x->nz; i++) { 1383 j = 0; 1384 while (j < bs2){ 1385 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1386 j++; 1387 } 1388 } 1389 ierr = PetscInfo3(Y,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1390 } else { 1391 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1392 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1393 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1394 } 1395 PetscFunctionReturn(0); 1396 } 1397 1398 #undef __FUNCT__ 1399 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1400 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1401 { 1402 PetscFunctionBegin; 1403 *flg = PETSC_TRUE; 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1409 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1410 { 1411 PetscFunctionBegin; 1412 *flg = PETSC_TRUE; 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1418 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1419 { 1420 PetscFunctionBegin; 1421 *flg = PETSC_FALSE; 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1427 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1428 { 1429 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1430 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1431 MatScalar *aa = a->a; 1432 1433 PetscFunctionBegin; 1434 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1440 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1441 { 1442 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1443 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1444 MatScalar *aa = a->a; 1445 1446 PetscFunctionBegin; 1447 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1448 PetscFunctionReturn(0); 1449 } 1450 1451 /* -------------------------------------------------------------------*/ 1452 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1453 MatGetRow_SeqSBAIJ, 1454 MatRestoreRow_SeqSBAIJ, 1455 MatMult_SeqSBAIJ_N, 1456 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1457 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1458 MatMultAdd_SeqSBAIJ_N, 1459 0, 1460 0, 1461 0, 1462 /*10*/ 0, 1463 0, 1464 MatCholeskyFactor_SeqSBAIJ, 1465 MatRelax_SeqSBAIJ, 1466 MatTranspose_SeqSBAIJ, 1467 /*15*/ MatGetInfo_SeqSBAIJ, 1468 MatEqual_SeqSBAIJ, 1469 MatGetDiagonal_SeqSBAIJ, 1470 MatDiagonalScale_SeqSBAIJ, 1471 MatNorm_SeqSBAIJ, 1472 /*20*/ 0, 1473 MatAssemblyEnd_SeqSBAIJ, 1474 MatSetOption_SeqSBAIJ, 1475 MatZeroEntries_SeqSBAIJ, 1476 /*24*/ 0, 1477 0, 1478 0, 1479 0, 1480 0, 1481 /*29*/ MatSetUpPreallocation_SeqSBAIJ, 1482 0, 1483 0, 1484 MatGetArray_SeqSBAIJ, 1485 MatRestoreArray_SeqSBAIJ, 1486 /*34*/ MatDuplicate_SeqSBAIJ, 1487 0, 1488 0, 1489 0, 1490 MatICCFactor_SeqSBAIJ, 1491 /*39*/ MatAXPY_SeqSBAIJ, 1492 MatGetSubMatrices_SeqSBAIJ, 1493 MatIncreaseOverlap_SeqSBAIJ, 1494 MatGetValues_SeqSBAIJ, 1495 MatCopy_SeqSBAIJ, 1496 /*44*/ 0, 1497 MatScale_SeqSBAIJ, 1498 0, 1499 0, 1500 0, 1501 /*49*/ 0, 1502 MatGetRowIJ_SeqSBAIJ, 1503 MatRestoreRowIJ_SeqSBAIJ, 1504 0, 1505 0, 1506 /*54*/ 0, 1507 0, 1508 0, 1509 0, 1510 MatSetValuesBlocked_SeqSBAIJ, 1511 /*59*/ MatGetSubMatrix_SeqSBAIJ, 1512 0, 1513 0, 1514 0, 1515 0, 1516 /*64*/ 0, 1517 0, 1518 0, 1519 0, 1520 0, 1521 /*69*/ MatGetRowMaxAbs_SeqSBAIJ, 1522 0, 1523 0, 1524 0, 1525 0, 1526 /*74*/ 0, 1527 0, 1528 0, 1529 0, 1530 0, 1531 /*79*/ 0, 1532 0, 1533 0, 1534 #if !defined(PETSC_USE_COMPLEX) 1535 MatGetInertia_SeqSBAIJ, 1536 #else 1537 0, 1538 #endif 1539 MatLoad_SeqSBAIJ, 1540 /*84*/ MatIsSymmetric_SeqSBAIJ, 1541 MatIsHermitian_SeqSBAIJ, 1542 MatIsStructurallySymmetric_SeqSBAIJ, 1543 0, 1544 0, 1545 /*89*/ 0, 1546 0, 1547 0, 1548 0, 1549 0, 1550 /*94*/ 0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 /*99*/ 0, 1556 0, 1557 0, 1558 0, 1559 0, 1560 /*104*/0, 1561 MatRealPart_SeqSBAIJ, 1562 MatImaginaryPart_SeqSBAIJ, 1563 MatGetRowUpperTriangular_SeqSBAIJ, 1564 MatRestoreRowUpperTriangular_SeqSBAIJ, 1565 /*109*/0, 1566 0, 1567 0, 1568 0, 1569 MatMissingDiagonal_SeqSBAIJ 1570 }; 1571 1572 EXTERN_C_BEGIN 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1575 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1576 { 1577 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1578 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 1579 PetscErrorCode ierr; 1580 1581 PetscFunctionBegin; 1582 if (aij->nonew != 1) { 1583 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1584 } 1585 1586 /* allocate space for values if not already there */ 1587 if (!aij->saved_values) { 1588 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1589 } 1590 1591 /* copy values over */ 1592 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1593 PetscFunctionReturn(0); 1594 } 1595 EXTERN_C_END 1596 1597 EXTERN_C_BEGIN 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1600 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1601 { 1602 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1603 PetscErrorCode ierr; 1604 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 1605 1606 PetscFunctionBegin; 1607 if (aij->nonew != 1) { 1608 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1609 } 1610 if (!aij->saved_values) { 1611 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1612 } 1613 1614 /* copy values over */ 1615 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1616 PetscFunctionReturn(0); 1617 } 1618 EXTERN_C_END 1619 1620 EXTERN_C_BEGIN 1621 #undef __FUNCT__ 1622 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1623 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1624 { 1625 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1626 PetscErrorCode ierr; 1627 PetscInt i,mbs,bs2, newbs = PetscAbs(bs); 1628 PetscTruth skipallocation = PETSC_FALSE,flg = PETSC_FALSE; 1629 1630 PetscFunctionBegin; 1631 B->preallocated = PETSC_TRUE; 1632 if (bs < 0) { 1633 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1634 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqSBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1635 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1636 bs = PetscAbs(bs); 1637 } 1638 if (nnz && newbs != bs) { 1639 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 1640 } 1641 bs = newbs; 1642 1643 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1644 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1645 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 1646 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 1647 1648 mbs = B->rmap->N/bs; 1649 bs2 = bs*bs; 1650 1651 if (mbs*bs != B->rmap->N) { 1652 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1653 } 1654 1655 if (nz == MAT_SKIP_ALLOCATION) { 1656 skipallocation = PETSC_TRUE; 1657 nz = 0; 1658 } 1659 1660 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1661 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1662 if (nnz) { 1663 for (i=0; i<mbs; i++) { 1664 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1665 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1666 } 1667 } 1668 1669 B->ops->mult = MatMult_SeqSBAIJ_N; 1670 B->ops->multadd = MatMultAdd_SeqSBAIJ_N; 1671 B->ops->multtranspose = MatMult_SeqSBAIJ_N; 1672 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_N; 1673 ierr = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_no_unroll",&flg,PETSC_NULL);CHKERRQ(ierr); 1674 if (!flg) { 1675 switch (bs) { 1676 case 1: 1677 B->ops->mult = MatMult_SeqSBAIJ_1; 1678 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1679 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1680 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1681 break; 1682 case 2: 1683 B->ops->mult = MatMult_SeqSBAIJ_2; 1684 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1685 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1686 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1687 break; 1688 case 3: 1689 B->ops->mult = MatMult_SeqSBAIJ_3; 1690 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1691 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1692 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1693 break; 1694 case 4: 1695 B->ops->mult = MatMult_SeqSBAIJ_4; 1696 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1697 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1698 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1699 break; 1700 case 5: 1701 B->ops->mult = MatMult_SeqSBAIJ_5; 1702 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1703 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1704 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1705 break; 1706 case 6: 1707 B->ops->mult = MatMult_SeqSBAIJ_6; 1708 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1709 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1710 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1711 break; 1712 case 7: 1713 B->ops->mult = MatMult_SeqSBAIJ_7; 1714 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1715 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1716 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1717 break; 1718 } 1719 } 1720 1721 b->mbs = mbs; 1722 b->nbs = mbs; 1723 if (!skipallocation) { 1724 if (!b->imax) { 1725 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1726 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 1727 } 1728 if (!nnz) { 1729 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1730 else if (nz <= 0) nz = 1; 1731 for (i=0; i<mbs; i++) { 1732 b->imax[i] = nz; 1733 } 1734 nz = nz*mbs; /* total nz */ 1735 } else { 1736 nz = 0; 1737 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1738 } 1739 /* b->ilen will count nonzeros in each block row so far. */ 1740 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1741 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1742 1743 /* allocate the matrix space */ 1744 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 1745 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 1746 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1747 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1748 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1749 b->singlemalloc = PETSC_TRUE; 1750 1751 /* pointer to beginning of each row */ 1752 b->i[0] = 0; 1753 for (i=1; i<mbs+1; i++) { 1754 b->i[i] = b->i[i-1] + b->imax[i-1]; 1755 } 1756 b->free_a = PETSC_TRUE; 1757 b->free_ij = PETSC_TRUE; 1758 } else { 1759 b->free_a = PETSC_FALSE; 1760 b->free_ij = PETSC_FALSE; 1761 } 1762 1763 B->rmap->bs = bs; 1764 b->bs2 = bs2; 1765 b->nz = 0; 1766 b->maxnz = nz*bs2; 1767 1768 b->inew = 0; 1769 b->jnew = 0; 1770 b->anew = 0; 1771 b->a2anew = 0; 1772 b->permute = PETSC_FALSE; 1773 PetscFunctionReturn(0); 1774 } 1775 EXTERN_C_END 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatSeqBAIJSetNumericFactorization" 1779 /* 1780 This is used to set the numeric factorization for both Cholesky and ICC symbolic factorization 1781 */ 1782 PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat B,PetscTruth natural) 1783 { 1784 PetscErrorCode ierr; 1785 PetscTruth flg = PETSC_FALSE; 1786 PetscInt bs = B->rmap->bs; 1787 1788 PetscFunctionBegin; 1789 ierr = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_no_unroll",&flg,PETSC_NULL);CHKERRQ(ierr); 1790 if (flg) bs = 8; 1791 1792 if (!natural) { 1793 switch (bs) { 1794 case 1: 1795 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1796 break; 1797 case 2: 1798 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1799 break; 1800 case 3: 1801 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1802 break; 1803 case 4: 1804 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1805 break; 1806 case 5: 1807 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1808 break; 1809 case 6: 1810 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1811 break; 1812 case 7: 1813 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1814 break; 1815 default: 1816 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N; 1817 break; 1818 } 1819 } else { 1820 switch (bs) { 1821 case 1: 1822 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1823 break; 1824 case 2: 1825 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1826 break; 1827 case 3: 1828 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1829 break; 1830 case 4: 1831 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1832 break; 1833 case 5: 1834 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1835 break; 1836 case 6: 1837 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1838 break; 1839 case 7: 1840 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1841 break; 1842 default: 1843 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 1844 break; 1845 } 1846 } 1847 PetscFunctionReturn(0); 1848 } 1849 1850 EXTERN_C_BEGIN 1851 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1852 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1853 EXTERN_C_END 1854 1855 1856 EXTERN_C_BEGIN 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatGetFactor_seqsbaij_petsc" 1859 PetscErrorCode MatGetFactor_seqsbaij_petsc(Mat A,MatFactorType ftype,Mat *B) 1860 { 1861 PetscInt n = A->rmap->n; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 1866 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 1867 if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1868 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 1869 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1870 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ; 1871 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqSBAIJ; 1872 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 1873 (*B)->factor = ftype; 1874 PetscFunctionReturn(0); 1875 } 1876 EXTERN_C_END 1877 1878 EXTERN_C_BEGIN 1879 #undef __FUNCT__ 1880 #define __FUNCT__ "MatGetFactorAvailable_seqsbaij_petsc" 1881 PetscErrorCode MatGetFactorAvailable_seqsbaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 1882 { 1883 PetscFunctionBegin; 1884 if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1885 *flg = PETSC_TRUE; 1886 } else { 1887 *flg = PETSC_FALSE; 1888 } 1889 PetscFunctionReturn(0); 1890 } 1891 EXTERN_C_END 1892 1893 EXTERN_C_BEGIN 1894 #if defined(PETSC_HAVE_MUMPS) 1895 extern PetscErrorCode MatGetFactor_seqsbaij_mumps(Mat,MatFactorType,Mat*); 1896 #endif 1897 #if defined(PETSC_HAVE_SPOOLES) 1898 extern PetscErrorCode MatGetFactor_seqsbaij_spooles(Mat,MatFactorType,Mat*); 1899 #endif 1900 #if defined(PETSC_HAVE_PASTIX) 1901 extern PetscErrorCode MatGetFactor_seqsbaij_pastix(Mat,MatFactorType,Mat*); 1902 #endif 1903 EXTERN_C_END 1904 1905 /*MC 1906 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1907 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1908 1909 Options Database Keys: 1910 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1911 1912 Level: beginner 1913 1914 .seealso: MatCreateSeqSBAIJ 1915 M*/ 1916 1917 EXTERN_C_BEGIN 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1920 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1921 { 1922 Mat_SeqSBAIJ *b; 1923 PetscErrorCode ierr; 1924 PetscMPIInt size; 1925 PetscTruth no_unroll = PETSC_FALSE,no_inode = PETSC_FALSE; 1926 1927 PetscFunctionBegin; 1928 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 1929 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1930 1931 ierr = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1932 B->data = (void*)b; 1933 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1934 B->ops->destroy = MatDestroy_SeqSBAIJ; 1935 B->ops->view = MatView_SeqSBAIJ; 1936 B->mapping = 0; 1937 b->row = 0; 1938 b->icol = 0; 1939 b->reallocs = 0; 1940 b->saved_values = 0; 1941 b->inode.limit = 5; 1942 b->inode.max_limit = 5; 1943 1944 b->roworiented = PETSC_TRUE; 1945 b->nonew = 0; 1946 b->diag = 0; 1947 b->solve_work = 0; 1948 b->mult_work = 0; 1949 B->spptr = 0; 1950 b->keepnonzeropattern = PETSC_FALSE; 1951 b->xtoy = 0; 1952 b->XtoY = 0; 1953 1954 b->inew = 0; 1955 b->jnew = 0; 1956 b->anew = 0; 1957 b->a2anew = 0; 1958 b->permute = PETSC_FALSE; 1959 1960 b->ignore_ltriangular = PETSC_FALSE; 1961 ierr = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_ignore_lower_triangular",&b->ignore_ltriangular,PETSC_NULL);CHKERRQ(ierr); 1962 1963 b->getrow_utriangular = PETSC_FALSE; 1964 ierr = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_getrow_uppertriangular",&b->getrow_utriangular,PETSC_NULL);CHKERRQ(ierr); 1965 1966 #if defined(PETSC_HAVE_PASTIX) 1967 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_pastix_C", 1968 "MatGetFactor_seqsbaij_pastix", 1969 MatGetFactor_seqsbaij_pastix);CHKERRQ(ierr); 1970 #endif 1971 #if defined(PETSC_HAVE_SPOOLES) 1972 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_spooles_C", 1973 "MatGetFactor_seqsbaij_spooles", 1974 MatGetFactor_seqsbaij_spooles);CHKERRQ(ierr); 1975 #endif 1976 #if defined(PETSC_HAVE_MUMPS) 1977 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_mumps_C", 1978 "MatGetFactor_seqsbaij_mumps", 1979 MatGetFactor_seqsbaij_mumps);CHKERRQ(ierr); 1980 #endif 1981 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqsbaij_petsc_C", 1982 "MatGetFactorAvailable_seqsbaij_petsc", 1983 MatGetFactorAvailable_seqsbaij_petsc);CHKERRQ(ierr); 1984 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_petsc_C", 1985 "MatGetFactor_seqsbaij_petsc", 1986 MatGetFactor_seqsbaij_petsc);CHKERRQ(ierr); 1987 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1988 "MatStoreValues_SeqSBAIJ", 1989 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1990 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1991 "MatRetrieveValues_SeqSBAIJ", 1992 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1993 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1994 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1995 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1996 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1997 "MatConvert_SeqSBAIJ_SeqAIJ", 1998 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1999 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 2000 "MatConvert_SeqSBAIJ_SeqBAIJ", 2001 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 2002 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 2003 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 2004 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 2005 2006 B->symmetric = PETSC_TRUE; 2007 B->structurally_symmetric = PETSC_TRUE; 2008 B->symmetric_set = PETSC_TRUE; 2009 B->structurally_symmetric_set = PETSC_TRUE; 2010 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr); 2011 2012 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for SEQSBAIJ matrix","Mat");CHKERRQ(ierr); 2013 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for inodes (slower)",PETSC_NULL,no_unroll,&no_unroll,PETSC_NULL);CHKERRQ(ierr); 2014 if (no_unroll) {ierr = PetscInfo(B,"Not using Inode routines due to -mat_no_unroll\n");CHKERRQ(ierr);} 2015 ierr = PetscOptionsTruth("-mat_no_inode","Do not optimize for inodes (slower)",PETSC_NULL,no_inode,&no_inode,PETSC_NULL);CHKERRQ(ierr); 2016 if (no_inode) {ierr = PetscInfo(B,"Not using Inode routines due to -mat_no_inode\n");CHKERRQ(ierr);} 2017 ierr = PetscOptionsInt("-mat_inode_limit","Do not use inodes larger then this value",PETSC_NULL,b->inode.limit,&b->inode.limit,PETSC_NULL);CHKERRQ(ierr); 2018 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2019 b->inode.use = (PetscTruth)(!(no_unroll || no_inode)); 2020 if (b->inode.limit > b->inode.max_limit) b->inode.limit = b->inode.max_limit; 2021 2022 PetscFunctionReturn(0); 2023 } 2024 EXTERN_C_END 2025 2026 #undef __FUNCT__ 2027 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 2028 /*@C 2029 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 2030 compressed row) format. For good matrix assembly performance the 2031 user should preallocate the matrix storage by setting the parameter nz 2032 (or the array nnz). By setting these parameters accurately, performance 2033 during matrix assembly can be increased by more than a factor of 50. 2034 2035 Collective on Mat 2036 2037 Input Parameters: 2038 + A - the symmetric matrix 2039 . bs - size of block 2040 . nz - number of block nonzeros per block row (same for all rows) 2041 - nnz - array containing the number of block nonzeros in the upper triangular plus 2042 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 2043 2044 Options Database Keys: 2045 . -mat_no_unroll - uses code that does not unroll the loops in the 2046 block calculations (much slower) 2047 . -mat_block_size - size of the blocks to use (only works if a negative bs is passed in 2048 2049 Level: intermediate 2050 2051 Notes: 2052 Specify the preallocated storage with either nz or nnz (not both). 2053 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2054 allocation. For additional details, see the users manual chapter on 2055 matrices. 2056 2057 You can call MatGetInfo() to get information on how effective the preallocation was; 2058 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2059 You can also run with the option -info and look for messages with the string 2060 malloc in them to see if additional memory allocation was needed. 2061 2062 If the nnz parameter is given then the nz parameter is ignored 2063 2064 2065 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 2066 @*/ 2067 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2068 { 2069 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2070 2071 PetscFunctionBegin; 2072 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2073 if (f) { 2074 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2075 } 2076 PetscFunctionReturn(0); 2077 } 2078 2079 #undef __FUNCT__ 2080 #define __FUNCT__ "MatCreateSeqSBAIJ" 2081 /*@C 2082 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 2083 compressed row) format. For good matrix assembly performance the 2084 user should preallocate the matrix storage by setting the parameter nz 2085 (or the array nnz). By setting these parameters accurately, performance 2086 during matrix assembly can be increased by more than a factor of 50. 2087 2088 Collective on MPI_Comm 2089 2090 Input Parameters: 2091 + comm - MPI communicator, set to PETSC_COMM_SELF 2092 . bs - size of block 2093 . m - number of rows, or number of columns 2094 . nz - number of block nonzeros per block row (same for all rows) 2095 - nnz - array containing the number of block nonzeros in the upper triangular plus 2096 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 2097 2098 Output Parameter: 2099 . A - the symmetric matrix 2100 2101 Options Database Keys: 2102 . -mat_no_unroll - uses code that does not unroll the loops in the 2103 block calculations (much slower) 2104 . -mat_block_size - size of the blocks to use 2105 2106 Level: intermediate 2107 2108 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2109 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2110 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2111 2112 Notes: 2113 The number of rows and columns must be divisible by blocksize. 2114 This matrix type does not support complex Hermitian operation. 2115 2116 Specify the preallocated storage with either nz or nnz (not both). 2117 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2118 allocation. For additional details, see the users manual chapter on 2119 matrices. 2120 2121 If the nnz parameter is given then the nz parameter is ignored 2122 2123 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 2124 @*/ 2125 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2126 { 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2131 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2132 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2133 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 2139 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2140 { 2141 Mat C; 2142 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 2143 PetscErrorCode ierr; 2144 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2145 2146 PetscFunctionBegin; 2147 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2148 2149 *B = 0; 2150 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 2151 ierr = MatSetSizes(C,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 2152 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2153 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2154 c = (Mat_SeqSBAIJ*)C->data; 2155 2156 C->preallocated = PETSC_TRUE; 2157 C->factor = A->factor; 2158 c->row = 0; 2159 c->icol = 0; 2160 c->saved_values = 0; 2161 c->keepnonzeropattern = a->keepnonzeropattern; 2162 C->assembled = PETSC_TRUE; 2163 2164 ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr); 2165 ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr); 2166 c->bs2 = a->bs2; 2167 c->mbs = a->mbs; 2168 c->nbs = a->nbs; 2169 2170 ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr); 2171 for (i=0; i<mbs; i++) { 2172 c->imax[i] = a->imax[i]; 2173 c->ilen[i] = a->ilen[i]; 2174 } 2175 2176 /* allocate the matrix space */ 2177 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2178 c->singlemalloc = PETSC_TRUE; 2179 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2180 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 2181 if (mbs > 0) { 2182 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2183 if (cpvalues == MAT_COPY_VALUES) { 2184 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2185 } else { 2186 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2187 } 2188 if (a->jshort) { 2189 ierr = PetscMalloc(nz*sizeof(unsigned short),&c->jshort);CHKERRQ(ierr); 2190 ierr = PetscMemcpy(c->jshort,a->jshort,nz*sizeof(unsigned short));CHKERRQ(ierr); 2191 } 2192 } 2193 2194 c->roworiented = a->roworiented; 2195 c->nonew = a->nonew; 2196 2197 if (a->diag) { 2198 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2199 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2200 for (i=0; i<mbs; i++) { 2201 c->diag[i] = a->diag[i]; 2202 } 2203 } else c->diag = 0; 2204 c->nz = a->nz; 2205 c->maxnz = a->maxnz; 2206 c->solve_work = 0; 2207 c->mult_work = 0; 2208 c->free_a = PETSC_TRUE; 2209 c->free_ij = PETSC_TRUE; 2210 *B = C; 2211 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2212 PetscFunctionReturn(0); 2213 } 2214 2215 #undef __FUNCT__ 2216 #define __FUNCT__ "MatLoad_SeqSBAIJ" 2217 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, const MatType type,Mat *A) 2218 { 2219 Mat_SeqSBAIJ *a; 2220 Mat B; 2221 PetscErrorCode ierr; 2222 int fd; 2223 PetscMPIInt size; 2224 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2225 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 2226 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2227 PetscInt *masked,nmask,tmp,bs2,ishift; 2228 PetscScalar *aa; 2229 MPI_Comm comm = ((PetscObject)viewer)->comm; 2230 2231 PetscFunctionBegin; 2232 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2233 bs2 = bs*bs; 2234 2235 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2236 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2237 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2238 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2239 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2240 M = header[1]; N = header[2]; nz = header[3]; 2241 2242 if (header[3] < 0) { 2243 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 2244 } 2245 2246 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2247 2248 /* 2249 This code adds extra rows to make sure the number of rows is 2250 divisible by the blocksize 2251 */ 2252 mbs = M/bs; 2253 extra_rows = bs - M + bs*(mbs); 2254 if (extra_rows == bs) extra_rows = 0; 2255 else mbs++; 2256 if (extra_rows) { 2257 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2258 } 2259 2260 /* read in row lengths */ 2261 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2262 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2263 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2264 2265 /* read in column indices */ 2266 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2267 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2268 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2269 2270 /* loop over row lengths determining block row lengths */ 2271 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 2272 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2273 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2274 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2275 masked = mask + mbs; 2276 rowcount = 0; nzcount = 0; 2277 for (i=0; i<mbs; i++) { 2278 nmask = 0; 2279 for (j=0; j<bs; j++) { 2280 kmax = rowlengths[rowcount]; 2281 for (k=0; k<kmax; k++) { 2282 tmp = jj[nzcount++]/bs; /* block col. index */ 2283 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 2284 } 2285 rowcount++; 2286 } 2287 s_browlengths[i] += nmask; 2288 2289 /* zero out the mask elements we set */ 2290 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2291 } 2292 2293 /* create our matrix */ 2294 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 2295 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2296 ierr = MatSetType(B,type);CHKERRQ(ierr); 2297 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 2298 a = (Mat_SeqSBAIJ*)B->data; 2299 2300 /* set matrix "i" values */ 2301 a->i[0] = 0; 2302 for (i=1; i<= mbs; i++) { 2303 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 2304 a->ilen[i-1] = s_browlengths[i-1]; 2305 } 2306 a->nz = a->i[mbs]; 2307 2308 /* read in nonzero values */ 2309 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2310 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2311 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2312 2313 /* set "a" and "j" values into matrix */ 2314 nzcount = 0; jcount = 0; 2315 for (i=0; i<mbs; i++) { 2316 nzcountb = nzcount; 2317 nmask = 0; 2318 for (j=0; j<bs; j++) { 2319 kmax = rowlengths[i*bs+j]; 2320 for (k=0; k<kmax; k++) { 2321 tmp = jj[nzcount++]/bs; /* block col. index */ 2322 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 2323 } 2324 } 2325 /* sort the masked values */ 2326 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2327 2328 /* set "j" values into matrix */ 2329 maskcount = 1; 2330 for (j=0; j<nmask; j++) { 2331 a->j[jcount++] = masked[j]; 2332 mask[masked[j]] = maskcount++; 2333 } 2334 2335 /* set "a" values into matrix */ 2336 ishift = bs2*a->i[i]; 2337 for (j=0; j<bs; j++) { 2338 kmax = rowlengths[i*bs+j]; 2339 for (k=0; k<kmax; k++) { 2340 tmp = jj[nzcountb]/bs ; /* block col. index */ 2341 if (tmp >= i){ 2342 block = mask[tmp] - 1; 2343 point = jj[nzcountb] - bs*tmp; 2344 idx = ishift + bs2*block + j + bs*point; 2345 a->a[idx] = aa[nzcountb]; 2346 } 2347 nzcountb++; 2348 } 2349 } 2350 /* zero out the mask elements we set */ 2351 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2352 } 2353 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2354 2355 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2356 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 2357 ierr = PetscFree(aa);CHKERRQ(ierr); 2358 ierr = PetscFree(jj);CHKERRQ(ierr); 2359 ierr = PetscFree(mask);CHKERRQ(ierr); 2360 2361 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2362 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2363 ierr = MatView_Private(B);CHKERRQ(ierr); 2364 *A = B; 2365 PetscFunctionReturn(0); 2366 } 2367 2368 #undef __FUNCT__ 2369 #define __FUNCT__ "MatRelax_SeqSBAIJ" 2370 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2371 { 2372 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2373 const MatScalar *aa=a->a,*v,*v1,*aidiag; 2374 PetscScalar *x,*b,*t,sum; 2375 MatScalar tmp; 2376 PetscErrorCode ierr; 2377 PetscInt m=a->mbs,bs=A->rmap->bs,j; 2378 const PetscInt *ai=a->i,*aj=a->j,*vj,*vj1; 2379 PetscInt nz,nz1,i; 2380 2381 PetscFunctionBegin; 2382 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 2383 2384 its = its*lits; 2385 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2386 2387 if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2388 2389 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2390 if (xx != bb) { 2391 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2392 } else { 2393 b = x; 2394 } 2395 2396 if (!a->idiagvalid) { 2397 if (!a->idiag) { 2398 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 2399 } 2400 for (i=0; i<a->mbs; i++) a->idiag[i] = 1.0/a->a[a->i[i]]; 2401 a->idiagvalid = PETSC_TRUE; 2402 } 2403 2404 if (!a->relax_work) { 2405 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 2406 } 2407 t = a->relax_work; 2408 2409 aidiag = a->idiag; 2410 2411 if (flag == SOR_APPLY_UPPER) { 2412 /* apply (U + D/omega) to the vector */ 2413 PetscScalar d; 2414 for (i=0; i<m; i++) { 2415 d = fshift + aa[ai[i]]; 2416 nz = ai[i+1] - ai[i] - 1; 2417 vj = aj + ai[i] + 1; 2418 v = aa + ai[i] + 1; 2419 sum = b[i]*d/omega; 2420 PetscSparseDensePlusDot(sum,b,v,vj,nz); 2421 x[i] = sum; 2422 } 2423 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 2424 } 2425 2426 if (flag & SOR_ZERO_INITIAL_GUESS) { 2427 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2428 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2429 2430 v = aa + 1; 2431 vj = aj + 1; 2432 for (i=0; i<m; i++){ 2433 nz = ai[i+1] - ai[i] - 1; 2434 tmp = - (x[i] = omega*t[i]*aidiag[i]); 2435 for (j=0; j<nz; j++) { 2436 t[vj[j]] += tmp*v[j]; 2437 } 2438 v += nz + 1; 2439 vj += nz + 1; 2440 } 2441 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 2442 } 2443 2444 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2445 int nz2; 2446 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 2447 t = b; 2448 } 2449 2450 v = aa + ai[m-1] + 1; 2451 vj = aj + ai[m-1] + 1; 2452 nz = 0; 2453 for (i=m-1; i>=0; i--){ 2454 sum = 0.0; 2455 nz2 = ai[i] - ai[i-1] - 1; 2456 PETSC_Prefetch(v-nz2-1,0,1); 2457 PETSC_Prefetch(vj-nz2-1,0,1); 2458 PetscSparseDensePlusDot(sum,x,v,vj,nz); 2459 sum = t[i] - sum; 2460 if (t == b) { 2461 x[i] = omega*sum*aidiag[i]; 2462 } else { 2463 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 2464 } 2465 nz = nz2; 2466 v -= nz + 1; 2467 vj -= nz + 1; 2468 } 2469 t = a->relax_work; 2470 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 2471 } 2472 its--; 2473 } 2474 2475 while (its--) { 2476 /* 2477 forward sweep: 2478 for i=0,...,m-1: 2479 sum[i] = (b[i] - U(i,:)x )/d[i]; 2480 x[i] = (1-omega)x[i] + omega*sum[i]; 2481 b = b - x[i]*U^T(i,:); 2482 2483 */ 2484 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2485 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2486 2487 for (i=0; i<m; i++){ 2488 v = aa + ai[i] + 1; v1=v; 2489 vj = aj + ai[i] + 1; vj1=vj; 2490 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2491 sum = t[i]; 2492 ierr = PetscLogFlops(4.0*nz-2);CHKERRQ(ierr); 2493 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2494 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 2495 while (nz--) t[*vj++] -= x[i]*(*v++); 2496 } 2497 } 2498 2499 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2500 /* 2501 backward sweep: 2502 b = b - x[i]*U^T(i,:), i=0,...,n-2 2503 for i=m-1,...,0: 2504 sum[i] = (b[i] - U(i,:)x )/d[i]; 2505 x[i] = (1-omega)x[i] + omega*sum[i]; 2506 */ 2507 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 2508 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2509 2510 for (i=0; i<m-1; i++){ /* update rhs */ 2511 v = aa + ai[i] + 1; 2512 vj = aj + ai[i] + 1; 2513 nz = ai[i+1] - ai[i] - 1; 2514 ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr); 2515 while (nz--) t[*vj++] -= x[i]*(*v++); 2516 } 2517 for (i=m-1; i>=0; i--){ 2518 v = aa + ai[i] + 1; 2519 vj = aj + ai[i] + 1; 2520 nz = ai[i+1] - ai[i] - 1; 2521 ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr); 2522 sum = t[i]; 2523 while (nz--) sum -= x[*vj++]*(*v++); 2524 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 2525 } 2526 } 2527 } 2528 2529 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2530 if (bb != xx) { 2531 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2532 } 2533 PetscFunctionReturn(0); 2534 } 2535 2536 #undef __FUNCT__ 2537 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays" 2538 /*@ 2539 MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements 2540 (upper triangular entries in CSR format) provided by the user. 2541 2542 Collective on MPI_Comm 2543 2544 Input Parameters: 2545 + comm - must be an MPI communicator of size 1 2546 . bs - size of block 2547 . m - number of rows 2548 . n - number of columns 2549 . i - row indices 2550 . j - column indices 2551 - a - matrix values 2552 2553 Output Parameter: 2554 . mat - the matrix 2555 2556 Level: intermediate 2557 2558 Notes: 2559 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2560 once the matrix is destroyed 2561 2562 You cannot set new nonzero locations into this matrix, that will generate an error. 2563 2564 The i and j indices are 0 based 2565 2566 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ() 2567 2568 @*/ 2569 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2570 { 2571 PetscErrorCode ierr; 2572 PetscInt ii; 2573 Mat_SeqSBAIJ *sbaij; 2574 2575 PetscFunctionBegin; 2576 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2577 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2578 2579 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2580 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2581 ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr); 2582 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2583 sbaij = (Mat_SeqSBAIJ*)(*mat)->data; 2584 ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr); 2585 2586 sbaij->i = i; 2587 sbaij->j = j; 2588 sbaij->a = a; 2589 sbaij->singlemalloc = PETSC_FALSE; 2590 sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2591 sbaij->free_a = PETSC_FALSE; 2592 sbaij->free_ij = PETSC_FALSE; 2593 2594 for (ii=0; ii<m; ii++) { 2595 sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii]; 2596 #if defined(PETSC_USE_DEBUG) 2597 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2598 #endif 2599 } 2600 #if defined(PETSC_USE_DEBUG) 2601 for (ii=0; ii<sbaij->i[m]; ii++) { 2602 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2603 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2604 } 2605 #endif 2606 2607 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2608 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2609 PetscFunctionReturn(0); 2610 } 2611 2612 2613 2614 2615 2616